Auto-detect the fastest code for dataset init

2020-12-19 13:59:28 +01:00 · 2020-12-19 13:59:28 +01:00 · 410313d933
commit 410313d933
parent 7aba194d3b
5 changed files with 67 additions and 3 deletions
--- a/src/backend/cpu/interfaces/ICpuInfo.h
+++ b/src/backend/cpu/interfaces/ICpuInfo.h
@ -40,6 +40,14 @@ public:
        VENDOR_AMD
    };

+    enum Arch : uint32_t {
+        ARCH_UNKNOWN,
+        ARCH_ZEN,
+        ARCH_ZEN_PLUS,
+        ARCH_ZEN2,
+        ARCH_ZEN3
+    };
+
    enum MsrMod : uint32_t {
        MSR_MOD_NONE,
        MSR_MOD_RYZEN_17H,
@ -100,6 +108,7 @@ public:
    virtual size_t packages() const                                                 = 0;
    virtual size_t threads() const                                                  = 0;
    virtual Vendor vendor() const                                                   = 0;
+    virtual Arch arch() const                                                       = 0;
    virtual bool jccErratum() const                                                 = 0;
 };

--- a/src/backend/cpu/platform/BasicCpuInfo.cpp
+++ b/src/backend/cpu/platform/BasicCpuInfo.cpp
@ -217,9 +217,27 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
                switch (m_family) {
                case 0x17:
                    m_msrMod = MSR_MOD_RYZEN_17H;
+                    switch (m_model) {
+                    case 1:
+                    case 17:
+                    case 32:
+                        m_arch = ARCH_ZEN;
+                        break;
+                    case 8:
+                    case 24:
+                        m_arch = ARCH_ZEN_PLUS;
+                        break;
+                    case 49:
+                    case 96:
+                    case 113:
+                    case 144:
+                        m_arch = ARCH_ZEN2;
+                        break;
+                    }
                    break;

                case 0x19:
+                    m_arch = ARCH_ZEN3;
                    m_msrMod = MSR_MOD_RYZEN_19H;
                    break;

--- a/src/backend/cpu/platform/BasicCpuInfo.h
+++ b/src/backend/cpu/platform/BasicCpuInfo.h
@ -64,12 +64,14 @@ protected:
    inline size_t packages() const override         { return 1; }
    inline size_t threads() const override          { return m_threads; }
    inline Vendor vendor() const override           { return m_vendor; }
+    inline Arch arch() const override               { return m_arch; }
    inline bool jccErratum() const override         { return m_jccErratum; }

 protected:
    char m_brand[64 + 6]{};
    size_t m_threads;
    Vendor m_vendor         = VENDOR_UNKNOWN;
+    Arch m_arch             = ARCH_UNKNOWN;
    bool m_jccErratum       = false;

 private:
--- a/src/crypto/randomx/jit_compiler_x86.cpp
+++ b/src/crypto/randomx/jit_compiler_x86.cpp
@ -214,9 +214,43 @@ namespace randomx {

 		hasAVX = xmrig::Cpu::info()->hasAVX();
 		hasAVX2 = xmrig::Cpu::info()->hasAVX2();
+
+		// Set to false by default
+		initDatasetAVX2 = false;
+
+		xmrig::ICpuInfo::Vendor vendor = xmrig::Cpu::info()->vendor();
+		xmrig::ICpuInfo::Arch arch = xmrig::Cpu::info()->arch();
+
+		if (vendor == xmrig::ICpuInfo::VENDOR_INTEL) {
+			// AVX2 init is faster on Intel CPUs without HT
+			initDatasetAVX2 = xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads();
+		}
+		else if (vendor == xmrig::ICpuInfo::VENDOR_AMD) {
+			switch (arch) {
+			case xmrig::ICpuInfo::ARCH_ZEN:
+			case xmrig::ICpuInfo::ARCH_ZEN_PLUS:
+				// AVX2 init is slow on Zen/Zen+
+				initDatasetAVX2 = false;
+				break;
+			case xmrig::ICpuInfo::ARCH_ZEN2:
+				// AVX2 init is faster on Zen2 without SMT (mobile CPUs)
+				initDatasetAVX2 = xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads();
+				break;
+			case xmrig::ICpuInfo::ARCH_ZEN3:
+				// AVX2 init is faster on Zen3
+				initDatasetAVX2 = true;
+				break;
+			}
+		}
+
+		// Sorry low-end Intel CPUs
+		if (!hasAVX2) {
+			initDatasetAVX2 = false;
+		}
+
 		hasXOP = xmrig::Cpu::info()->hasXOP();

-		allocatedSize = hasAVX2 ? (CodeSize * 4) : (CodeSize * 2);
+		allocatedSize = initDatasetAVX2 ? (CodeSize * 4) : (CodeSize * 2);
 		allocatedCode = static_cast<uint8_t*>(allocExecutableMemory(allocatedSize,
 #			ifdef XMRIG_SECURE_JIT
 			false
@ -299,7 +333,7 @@ namespace randomx {
 	template<size_t N>
 	void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) {
 		uint8_t* p = code;
-		if (hasAVX2) {
+		if (initDatasetAVX2) {
 			codePos = 0;
 			emit(codeDatasetInitAVX2_prologue, datasetInitAVX2_prologue_size, code, codePos);

@ -356,7 +390,7 @@ namespace randomx {

 	void JitCompilerX86::generateDatasetInitCode() {
 		// AVX2 code is generated in generateSuperscalarHash()
-		if (!hasAVX2) {
+		if (!initDatasetAVX2) {
 			memcpy(code, codeDatasetInit, datasetInitSize);
 		}
 	}
--- a/src/crypto/randomx/jit_compiler_x86.hpp
+++ b/src/crypto/randomx/jit_compiler_x86.hpp
@ -97,6 +97,7 @@ namespace randomx {
 		bool BranchesWithin32B = false;
 		bool hasAVX;
 		bool hasAVX2;
+		bool initDatasetAVX2;
 		bool hasXOP;

 		uint8_t* allocatedCode = nullptr;