Tweaked auto-tuning for Intel CPUs
Alder Lake and newer CPUs have exclusive L3 cache and benefit from more threads until L3+L2 is filled.
This commit is contained in:
parent
c4e1363148
commit
7f7fc363e1
1 changed files with 7 additions and 2 deletions
|
@ -298,8 +298,10 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
||||||
cores.reserve(m_cores);
|
cores.reserve(m_cores);
|
||||||
findByType(cache, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); });
|
findByType(cache, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); });
|
||||||
|
|
||||||
|
const bool L3_exclusive = isCacheExclusive(cache);
|
||||||
|
|
||||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||||
if ((algorithm == Algorithm::GHOSTRIDER_RTM) && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
|
if ((algorithm == Algorithm::GHOSTRIDER_RTM) && L3_exclusive && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
|
||||||
// Don't use E-cores on Alder Lake
|
// Don't use E-cores on Alder Lake
|
||||||
cores.erase(std::remove_if(cores.begin(), cores.end(), [](hwloc_obj_t c) { return hwloc_bitmap_weight(c->cpuset) == 1; }), cores.end());
|
cores.erase(std::remove_if(cores.begin(), cores.end(), [](hwloc_obj_t c) { return hwloc_bitmap_weight(c->cpuset) == 1; }), cores.end());
|
||||||
|
|
||||||
|
@ -311,7 +313,6 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
size_t L3 = cache->attr->cache.size;
|
size_t L3 = cache->attr->cache.size;
|
||||||
const bool L3_exclusive = isCacheExclusive(cache);
|
|
||||||
size_t L2 = 0;
|
size_t L2 = 0;
|
||||||
int L2_associativity = 0;
|
int L2_associativity = 0;
|
||||||
size_t extra = 0;
|
size_t extra = 0;
|
||||||
|
@ -349,6 +350,10 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
||||||
}
|
}
|
||||||
|
|
||||||
# ifdef XMRIG_ALGO_RANDOMX
|
# ifdef XMRIG_ALGO_RANDOMX
|
||||||
|
if ((algorithm.family() == Algorithm::RANDOM_X) && L3_exclusive && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
|
||||||
|
// Use all L3+L2 on latest Intel CPUs with P-cores, E-cores and exclusive L3 cache
|
||||||
|
cacheHashes = (L3 + L2) / scratchpad;
|
||||||
|
}
|
||||||
if (extra == 0 && algorithm.l2() > 0) {
|
if (extra == 0 && algorithm.l2() > 0) {
|
||||||
cacheHashes = std::min<size_t>(std::max<size_t>(L2 / algorithm.l2(), cores.size()), cacheHashes);
|
cacheHashes = std::min<size_t>(std::max<size_t>(L2 / algorithm.l2(), cores.size()), cacheHashes);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue