Auto-detect the fastest code for dataset init

This commit is contained in:
SChernykh 2020-12-19 13:59:28 +01:00
parent 7aba194d3b
commit 410313d933
5 changed files with 67 additions and 3 deletions

View file

@ -40,6 +40,14 @@ public:
VENDOR_AMD
};
enum Arch : uint32_t {
ARCH_UNKNOWN,
ARCH_ZEN,
ARCH_ZEN_PLUS,
ARCH_ZEN2,
ARCH_ZEN3
};
enum MsrMod : uint32_t {
MSR_MOD_NONE,
MSR_MOD_RYZEN_17H,
@ -100,6 +108,7 @@ public:
virtual size_t packages() const = 0;
virtual size_t threads() const = 0;
virtual Vendor vendor() const = 0;
virtual Arch arch() const = 0;
virtual bool jccErratum() const = 0;
};

View file

@ -217,9 +217,27 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
switch (m_family) {
case 0x17:
m_msrMod = MSR_MOD_RYZEN_17H;
switch (m_model) {
case 1:
case 17:
case 32:
m_arch = ARCH_ZEN;
break;
case 8:
case 24:
m_arch = ARCH_ZEN_PLUS;
break;
case 49:
case 96:
case 113:
case 144:
m_arch = ARCH_ZEN2;
break;
}
break;
case 0x19:
m_arch = ARCH_ZEN3;
m_msrMod = MSR_MOD_RYZEN_19H;
break;

View file

@ -64,12 +64,14 @@ protected:
inline size_t packages() const override { return 1; }
inline size_t threads() const override { return m_threads; }
inline Vendor vendor() const override { return m_vendor; }
inline Arch arch() const override { return m_arch; }
inline bool jccErratum() const override { return m_jccErratum; }
protected:
char m_brand[64 + 6]{};
size_t m_threads;
Vendor m_vendor = VENDOR_UNKNOWN;
Arch m_arch = ARCH_UNKNOWN;
bool m_jccErratum = false;
private:

View file

@ -214,9 +214,43 @@ namespace randomx {
hasAVX = xmrig::Cpu::info()->hasAVX();
hasAVX2 = xmrig::Cpu::info()->hasAVX2();
// Set to false by default
initDatasetAVX2 = false;
xmrig::ICpuInfo::Vendor vendor = xmrig::Cpu::info()->vendor();
xmrig::ICpuInfo::Arch arch = xmrig::Cpu::info()->arch();
if (vendor == xmrig::ICpuInfo::VENDOR_INTEL) {
// AVX2 init is faster on Intel CPUs without HT
initDatasetAVX2 = xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads();
}
else if (vendor == xmrig::ICpuInfo::VENDOR_AMD) {
switch (arch) {
case xmrig::ICpuInfo::ARCH_ZEN:
case xmrig::ICpuInfo::ARCH_ZEN_PLUS:
// AVX2 init is slow on Zen/Zen+
initDatasetAVX2 = false;
break;
case xmrig::ICpuInfo::ARCH_ZEN2:
// AVX2 init is faster on Zen2 without SMT (mobile CPUs)
initDatasetAVX2 = xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads();
break;
case xmrig::ICpuInfo::ARCH_ZEN3:
// AVX2 init is faster on Zen3
initDatasetAVX2 = true;
break;
}
}
// Sorry low-end Intel CPUs
if (!hasAVX2) {
initDatasetAVX2 = false;
}
hasXOP = xmrig::Cpu::info()->hasXOP();
allocatedSize = hasAVX2 ? (CodeSize * 4) : (CodeSize * 2);
allocatedSize = initDatasetAVX2 ? (CodeSize * 4) : (CodeSize * 2);
allocatedCode = static_cast<uint8_t*>(allocExecutableMemory(allocatedSize,
# ifdef XMRIG_SECURE_JIT
false
@ -299,7 +333,7 @@ namespace randomx {
template<size_t N>
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) {
uint8_t* p = code;
if (hasAVX2) {
if (initDatasetAVX2) {
codePos = 0;
emit(codeDatasetInitAVX2_prologue, datasetInitAVX2_prologue_size, code, codePos);
@ -356,7 +390,7 @@ namespace randomx {
void JitCompilerX86::generateDatasetInitCode() {
// AVX2 code is generated in generateSuperscalarHash()
if (!hasAVX2) {
if (!initDatasetAVX2) {
memcpy(code, codeDatasetInit, datasetInitSize);
}
}

View file

@ -97,6 +97,7 @@ namespace randomx {
bool BranchesWithin32B = false;
bool hasAVX;
bool hasAVX2;
bool initDatasetAVX2;
bool hasXOP;
uint8_t* allocatedCode = nullptr;