From 410313d933a33252205aa1167955e294eadab30e Mon Sep 17 00:00:00 2001 From: SChernykh Date: Sat, 19 Dec 2020 13:59:28 +0100 Subject: [PATCH] Auto-detect the fastest code for dataset init --- src/backend/cpu/interfaces/ICpuInfo.h | 9 +++++ src/backend/cpu/platform/BasicCpuInfo.cpp | 18 ++++++++++ src/backend/cpu/platform/BasicCpuInfo.h | 2 ++ src/crypto/randomx/jit_compiler_x86.cpp | 40 +++++++++++++++++++++-- src/crypto/randomx/jit_compiler_x86.hpp | 1 + 5 files changed, 67 insertions(+), 3 deletions(-) diff --git a/src/backend/cpu/interfaces/ICpuInfo.h b/src/backend/cpu/interfaces/ICpuInfo.h index 44ec3301..b772a92c 100644 --- a/src/backend/cpu/interfaces/ICpuInfo.h +++ b/src/backend/cpu/interfaces/ICpuInfo.h @@ -40,6 +40,14 @@ public: VENDOR_AMD }; + enum Arch : uint32_t { + ARCH_UNKNOWN, + ARCH_ZEN, + ARCH_ZEN_PLUS, + ARCH_ZEN2, + ARCH_ZEN3 + }; + enum MsrMod : uint32_t { MSR_MOD_NONE, MSR_MOD_RYZEN_17H, @@ -100,6 +108,7 @@ public: virtual size_t packages() const = 0; virtual size_t threads() const = 0; virtual Vendor vendor() const = 0; + virtual Arch arch() const = 0; virtual bool jccErratum() const = 0; }; diff --git a/src/backend/cpu/platform/BasicCpuInfo.cpp b/src/backend/cpu/platform/BasicCpuInfo.cpp index ae0f9aa3..2a4dc829 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.cpp +++ b/src/backend/cpu/platform/BasicCpuInfo.cpp @@ -217,9 +217,27 @@ xmrig::BasicCpuInfo::BasicCpuInfo() : switch (m_family) { case 0x17: m_msrMod = MSR_MOD_RYZEN_17H; + switch (m_model) { + case 1: + case 17: + case 32: + m_arch = ARCH_ZEN; + break; + case 8: + case 24: + m_arch = ARCH_ZEN_PLUS; + break; + case 49: + case 96: + case 113: + case 144: + m_arch = ARCH_ZEN2; + break; + } break; case 0x19: + m_arch = ARCH_ZEN3; m_msrMod = MSR_MOD_RYZEN_19H; break; diff --git a/src/backend/cpu/platform/BasicCpuInfo.h b/src/backend/cpu/platform/BasicCpuInfo.h index d21b6d61..edf119a2 100644 --- a/src/backend/cpu/platform/BasicCpuInfo.h +++ b/src/backend/cpu/platform/BasicCpuInfo.h @@ -64,12 +64,14 @@ protected: inline size_t packages() const override { return 1; } inline size_t threads() const override { return m_threads; } inline Vendor vendor() const override { return m_vendor; } + inline Arch arch() const override { return m_arch; } inline bool jccErratum() const override { return m_jccErratum; } protected: char m_brand[64 + 6]{}; size_t m_threads; Vendor m_vendor = VENDOR_UNKNOWN; + Arch m_arch = ARCH_UNKNOWN; bool m_jccErratum = false; private: diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index d3a71f94..621ca9b6 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -214,9 +214,43 @@ namespace randomx { hasAVX = xmrig::Cpu::info()->hasAVX(); hasAVX2 = xmrig::Cpu::info()->hasAVX2(); + + // Set to false by default + initDatasetAVX2 = false; + + xmrig::ICpuInfo::Vendor vendor = xmrig::Cpu::info()->vendor(); + xmrig::ICpuInfo::Arch arch = xmrig::Cpu::info()->arch(); + + if (vendor == xmrig::ICpuInfo::VENDOR_INTEL) { + // AVX2 init is faster on Intel CPUs without HT + initDatasetAVX2 = xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads(); + } + else if (vendor == xmrig::ICpuInfo::VENDOR_AMD) { + switch (arch) { + case xmrig::ICpuInfo::ARCH_ZEN: + case xmrig::ICpuInfo::ARCH_ZEN_PLUS: + // AVX2 init is slow on Zen/Zen+ + initDatasetAVX2 = false; + break; + case xmrig::ICpuInfo::ARCH_ZEN2: + // AVX2 init is faster on Zen2 without SMT (mobile CPUs) + initDatasetAVX2 = xmrig::Cpu::info()->cores() == xmrig::Cpu::info()->threads(); + break; + case xmrig::ICpuInfo::ARCH_ZEN3: + // AVX2 init is faster on Zen3 + initDatasetAVX2 = true; + break; + } + } + + // Sorry low-end Intel CPUs + if (!hasAVX2) { + initDatasetAVX2 = false; + } + hasXOP = xmrig::Cpu::info()->hasXOP(); - allocatedSize = hasAVX2 ? (CodeSize * 4) : (CodeSize * 2); + allocatedSize = initDatasetAVX2 ? (CodeSize * 4) : (CodeSize * 2); allocatedCode = static_cast(allocExecutableMemory(allocatedSize, # ifdef XMRIG_SECURE_JIT false @@ -299,7 +333,7 @@ namespace randomx { template void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N]) { uint8_t* p = code; - if (hasAVX2) { + if (initDatasetAVX2) { codePos = 0; emit(codeDatasetInitAVX2_prologue, datasetInitAVX2_prologue_size, code, codePos); @@ -356,7 +390,7 @@ namespace randomx { void JitCompilerX86::generateDatasetInitCode() { // AVX2 code is generated in generateSuperscalarHash() - if (!hasAVX2) { + if (!initDatasetAVX2) { memcpy(code, codeDatasetInit, datasetInitSize); } } diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index 8aa3484d..f0f71895 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -97,6 +97,7 @@ namespace randomx { bool BranchesWithin32B = false; bool hasAVX; bool hasAVX2; + bool initDatasetAVX2; bool hasXOP; uint8_t* allocatedCode = nullptr;