From ec1839d580c1ab44b4101d3b918869d9800d50dc Mon Sep 17 00:00:00 2001 From: XMRig Date: Tue, 27 Aug 2019 06:31:40 +0700 Subject: [PATCH] Restored OclCache. --- CMakeLists.txt | 1 + src/3rdparty/base32/base32.h | 6 +- src/backend/common/Thread.h | 6 +- src/backend/common/Workers.cpp | 14 +- src/backend/cpu/CpuWorker.cpp | 4 +- src/backend/cpu/CpuWorker.h | 2 +- src/backend/opencl/OclBackend.cpp | 2 +- src/backend/opencl/OclCache.cpp | 158 +++++++++++++++++++ src/backend/opencl/OclCache.h | 13 +- src/backend/opencl/OclCache_unix.cpp | 2 +- src/backend/opencl/OclCache_win.cpp | 2 +- src/backend/opencl/OclConfig.cpp | 4 +- src/backend/opencl/OclConfig.h | 2 +- src/backend/opencl/OclLaunchData.cpp | 3 +- src/backend/opencl/OclLaunchData.h | 4 +- src/backend/opencl/OclWorker.cpp | 12 +- src/backend/opencl/OclWorker.h | 2 +- src/backend/opencl/cl/cn/cryptonight.cl | 10 +- src/backend/opencl/cl/cn/cryptonight2.cl | 6 +- src/backend/opencl/cl/cn/cryptonight_gpu.cl | 4 +- src/backend/opencl/cl/cn/cryptonight_r.cl | 2 +- src/backend/opencl/interfaces/IOclRunner.h | 6 + src/backend/opencl/opencl.cmake | 6 + src/backend/opencl/runners/OclBaseRunner.cpp | 41 +++-- src/backend/opencl/runners/OclBaseRunner.h | 16 +- src/backend/opencl/runners/OclCnRunner.cpp | 12 +- src/backend/opencl/wrappers/OclLib.cpp | 22 ++- 27 files changed, 290 insertions(+), 72 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c8c7ffa7..73d08b76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,7 @@ option(WITH_TLS "Enable OpenSSL support" ON) option(WITH_ASM "Enable ASM PoW implementations" ON) option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF) option(WITH_OPENCL "Enable OpenCL backend" OFF) +option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON) option(BUILD_STATIC "Build static binary" OFF) option(ARM_TARGET "Force use specific ARM target 8 or 7" 0) diff --git a/src/3rdparty/base32/base32.h b/src/3rdparty/base32/base32.h index 11e18ba3..7b8187f0 100644 --- a/src/3rdparty/base32/base32.h +++ b/src/3rdparty/base32/base32.h @@ -25,8 +25,8 @@ // All functions return the number of output bytes or -1 on error. If the // output buffer is too small, the result will silently be truncated. -#ifndef _BASE32_H_ -#define _BASE32_H_ +#ifndef XMRIG_BASE32_H +#define XMRIG_BASE32_H #include @@ -65,4 +65,4 @@ int base32_encode(const uint8_t *data, int length, uint8_t *result, int bufSize) } -#endif /* _BASE32_H_ */ +#endif /* XMRIG_BASE32_H */ diff --git a/src/backend/common/Thread.h b/src/backend/common/Thread.h index b7165915..3bef5319 100644 --- a/src/backend/common/Thread.h +++ b/src/backend/common/Thread.h @@ -42,18 +42,18 @@ template class Thread { public: - inline Thread(IBackend *backend, size_t index, const T &config) : m_index(index), m_config(config), m_backend(backend) {} + inline Thread(IBackend *backend, size_t id, const T &config) : m_id(id), m_config(config), m_backend(backend) {} inline ~Thread() { m_thread.join(); delete m_worker; } inline const T &config() const { return m_config; } inline IBackend *backend() const { return m_backend; } inline IWorker *worker() const { return m_worker; } - inline size_t index() const { return m_index; } + inline size_t id() const { return m_id; } inline void setWorker(IWorker *worker) { m_worker = worker; } inline void start(void (*callback) (void *)) { m_thread = std::thread(callback, this); } private: - const size_t m_index = 0; + const size_t m_id = 0; const T m_config; IBackend *m_backend; IWorker *m_worker = nullptr; diff --git a/src/backend/common/Workers.cpp b/src/backend/common/Workers.cpp index 1d3c5620..27a0410c 100644 --- a/src/backend/common/Workers.cpp +++ b/src/backend/common/Workers.cpp @@ -134,7 +134,7 @@ void xmrig::Workers::tick(uint64_t) return; } - d_ptr->hashrate->add(handle->index(), handle->worker()->hashCount(), handle->worker()->timestamp()); + d_ptr->hashrate->add(handle->id(), handle->worker()->hashCount(), handle->worker()->timestamp()); } d_ptr->hashrate->updateHighest(); @@ -175,19 +175,19 @@ xmrig::IWorker *xmrig::Workers::create(Thread *han { switch (handle->config().intensity) { case 1: - return new CpuWorker<1>(handle->index(), handle->config()); + return new CpuWorker<1>(handle->id(), handle->config()); case 2: - return new CpuWorker<2>(handle->index(), handle->config()); + return new CpuWorker<2>(handle->id(), handle->config()); case 3: - return new CpuWorker<3>(handle->index(), handle->config()); + return new CpuWorker<3>(handle->id(), handle->config()); case 4: - return new CpuWorker<4>(handle->index(), handle->config()); + return new CpuWorker<4>(handle->id(), handle->config()); case 5: - return new CpuWorker<5>(handle->index(), handle->config()); + return new CpuWorker<5>(handle->id(), handle->config()); } return nullptr; @@ -201,7 +201,7 @@ template class Workers; template<> xmrig::IWorker *xmrig::Workers::create(Thread *handle) { - return new OclWorker(handle->index(), handle->config()); + return new OclWorker(handle->id(), handle->config()); } diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp index 0fc77f8d..98259a15 100644 --- a/src/backend/cpu/CpuWorker.cpp +++ b/src/backend/cpu/CpuWorker.cpp @@ -53,8 +53,8 @@ static constexpr uint32_t kReserveCount = 4096; template -xmrig::CpuWorker::CpuWorker(size_t index, const CpuLaunchData &data) : - Worker(index, data.affinity, data.priority), +xmrig::CpuWorker::CpuWorker(size_t id, const CpuLaunchData &data) : + Worker(id, data.affinity, data.priority), m_algorithm(data.algorithm), m_assembly(data.assembly), m_hwAES(data.hwAES), diff --git a/src/backend/cpu/CpuWorker.h b/src/backend/cpu/CpuWorker.h index c3fb49f1..1bfb46a6 100644 --- a/src/backend/cpu/CpuWorker.h +++ b/src/backend/cpu/CpuWorker.h @@ -43,7 +43,7 @@ template class CpuWorker : public Worker { public: - CpuWorker(size_t index, const CpuLaunchData &data); + CpuWorker(size_t id, const CpuLaunchData &data); ~CpuWorker() override; protected: diff --git a/src/backend/opencl/OclBackend.cpp b/src/backend/opencl/OclBackend.cpp index 4949f2ad..1bff5379 100644 --- a/src/backend/opencl/OclBackend.cpp +++ b/src/backend/opencl/OclBackend.cpp @@ -260,7 +260,7 @@ void xmrig::OclBackend::setJob(const Job &job) const OclConfig &cl = d_ptr->controller->config()->cl(); - std::vector threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->devices, tag); + std::vector threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->platform, d_ptr->devices, tag); if (!d_ptr->threads.empty() && d_ptr->threads.size() == threads.size() && std::equal(d_ptr->threads.begin(), d_ptr->threads.end(), threads.begin())) { return; } diff --git a/src/backend/opencl/OclCache.cpp b/src/backend/opencl/OclCache.cpp index b17e2f5e..fc4249e8 100644 --- a/src/backend/opencl/OclCache.cpp +++ b/src/backend/opencl/OclCache.cpp @@ -23,4 +23,162 @@ */ +#include +#include +#include +#include + + +#include "3rdparty/base32/base32.h" +#include "backend/opencl/interfaces/IOclRunner.h" #include "backend/opencl/OclCache.h" +#include "backend/opencl/OclLaunchData.h" +#include "backend/opencl/wrappers/OclLib.h" +#include "base/io/log/Log.h" +#include "base/tools/Chrono.h" +#include "crypto/common/keccak.h" + + +namespace xmrig { + + +static const char *tag = MAGENTA_BG_BOLD(WHITE_BOLD_S " ocl "); +static std::mutex mutex; + + +static cl_program createFromSource(const IOclRunner *runner) +{ + LOG_INFO("%s GPU " WHITE_BOLD("#%zu") " " YELLOW_BOLD("compiling..."), tag, runner->data().device.index()); + + cl_int ret; + cl_device_id device = runner->data().device.id(); + const char *source = runner->source(); + const uint64_t ts = Chrono::steadyMSecs(); + + cl_program program = OclLib::createProgramWithSource(runner->data().ctx, 1, &source, nullptr, &ret); + if (ret != CL_SUCCESS) { + return nullptr; + } + + if (OclLib::buildProgram(program, 1, &device, runner->buildOptions()) != CL_SUCCESS) { + printf("BUILD LOG:\n%s\n", OclLib::getProgramBuildLog(program, device).data()); + + OclLib::releaseProgram(program); + return nullptr; + } + + LOG_INFO("%s GPU " WHITE_BOLD("#%zu") " " GREEN_BOLD("compilation completed") BLACK_BOLD( " (%.3fs)"), + tag, runner->data().device.index(), (Chrono::steadyMSecs() - ts) / 1000.0); + + return program; +} + + +static cl_program createFromBinary(const IOclRunner *runner, const std::string &fileName) +{ + std::ifstream file(fileName, std::ofstream::in | std::ofstream::binary); + if (!file.good()) { + return nullptr; + } + + std::ostringstream ss; + ss << file.rdbuf(); + + const std::string s = ss.str(); + const size_t bin_size = s.size(); + auto data_ptr = s.data(); + cl_device_id device = runner->data().device.id(); + + cl_int clStatus; + cl_int ret; + cl_program program = OclLib::createProgramWithBinary(runner->data().ctx, 1, &device, &bin_size, reinterpret_cast(&data_ptr), &clStatus, &ret); + if (ret != CL_SUCCESS) { + return nullptr; + } + + if (OclLib::buildProgram(program, 1, &device) != CL_SUCCESS) { + OclLib::releaseProgram(program); + return nullptr; + } + + return program; +} + + +} // namespace xmrig + + +cl_program xmrig::OclCache::build(const IOclRunner *runner) +{ + std::lock_guard lock(mutex); + + if (Nonce::sequence(Nonce::OPENCL) == 0) { + return nullptr; + } + + std::string fileName; + if (runner->data().cache) { +# ifdef _WIN32 + fileName = prefix() + "\\xmrig\\.cache\\" + cacheKey(runner) + ".bin"; +# else + fileName = prefix() + "/.cache/" + cacheKey(runner) + ".bin"; +# endif + + cl_program program = createFromBinary(runner, fileName); + if (program) { + return program; + } + } + + cl_program program = createFromSource(runner); + if (runner->data().cache && program) { + save(program, fileName); + } + + return program; +} + + +std::string xmrig::OclCache::cacheKey(const char *deviceKey, const char *options, const char *source) +{ + std::string in(source); + in += options; + in += deviceKey; + + uint8_t hash[200]; + keccak(in.c_str(), in.size(), hash); + + uint8_t result[32] = { 0 }; + base32_encode(hash, 12, result, sizeof(result)); + + return reinterpret_cast(result); +} + + +std::string xmrig::OclCache::cacheKey(const IOclRunner *runner) +{ + return cacheKey(runner->deviceKey(), runner->buildOptions(), runner->source()); +} + + +void xmrig::OclCache::save(cl_program program, const std::string &fileName) +{ + size_t size = 0; + if (OclLib::getProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size), &size) != CL_SUCCESS) { + return; + } + + std::vector binary(size); + + char *data = binary.data(); + if (OclLib::getProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char *), &data) != CL_SUCCESS) { + return; + } + + createDirectory(); + + std::ofstream file_stream; + file_stream.open(fileName, std::ofstream::out | std::ofstream::binary); + file_stream.write(binary.data(), static_cast(binary.size())); + file_stream.close(); +} diff --git a/src/backend/opencl/OclCache.h b/src/backend/opencl/OclCache.h index 629eec3a..75d9e068 100644 --- a/src/backend/opencl/OclCache.h +++ b/src/backend/opencl/OclCache.h @@ -29,17 +29,26 @@ #include +typedef struct _cl_program *cl_program; + + namespace xmrig { +class IOclRunner; + + class OclCache { public: - + static cl_program build(const IOclRunner *runner); + static std::string cacheKey(const char *deviceKey, const char *options, const char *source); + static std::string cacheKey(const IOclRunner *runner); private: - void createDirectory() const; static std::string prefix(); + static void createDirectory(); + static void save(cl_program program, const std::string &fileName); }; diff --git a/src/backend/opencl/OclCache_unix.cpp b/src/backend/opencl/OclCache_unix.cpp index 0d97ea07..563a5b85 100644 --- a/src/backend/opencl/OclCache_unix.cpp +++ b/src/backend/opencl/OclCache_unix.cpp @@ -29,7 +29,7 @@ #include "backend/opencl/OclCache.h" -void xmrig::OclCache::createDirectory() const +void xmrig::OclCache::createDirectory() { std::string path = prefix() + "/.cache"; mkdir(path.c_str(), 0744); diff --git a/src/backend/opencl/OclCache_win.cpp b/src/backend/opencl/OclCache_win.cpp index 4e473a57..c6da323c 100644 --- a/src/backend/opencl/OclCache_win.cpp +++ b/src/backend/opencl/OclCache_win.cpp @@ -31,7 +31,7 @@ #include "backend/opencl/OclCache.h" -void xmrig::OclCache::createDirectory() const +void xmrig::OclCache::createDirectory() { std::string path = prefix() + "/xmrig"; _mkdir(path.c_str()); diff --git a/src/backend/opencl/OclConfig.cpp b/src/backend/opencl/OclConfig.cpp index 03b36c57..20978429 100644 --- a/src/backend/opencl/OclConfig.cpp +++ b/src/backend/opencl/OclConfig.cpp @@ -149,7 +149,7 @@ rapidjson::Value xmrig::OclConfig::toJSON(rapidjson::Document &doc) const } -std::vector xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const std::vector &devices, const char *tag) const +std::vector xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector &devices, const char *tag) const { std::vector out; const OclThreads &threads = m_threads.get(algorithm); @@ -166,7 +166,7 @@ std::vector xmrig::OclConfig::get(const Miner *miner, cons continue; } - out.emplace_back(miner, algorithm, *this, thread, devices[thread.index()]); + out.emplace_back(miner, algorithm, *this, platform, thread, devices[thread.index()]); } return out; diff --git a/src/backend/opencl/OclConfig.h b/src/backend/opencl/OclConfig.h index 05a303a5..22d1378e 100644 --- a/src/backend/opencl/OclConfig.h +++ b/src/backend/opencl/OclConfig.h @@ -42,7 +42,7 @@ public: OclPlatform platform() const; rapidjson::Value toJSON(rapidjson::Document &doc) const; - std::vector get(const Miner *miner, const Algorithm &algorithm, const std::vector &devices, const char *tag) const; + std::vector get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector &devices, const char *tag) const; void read(const rapidjson::Value &value); inline bool isCacheEnabled() const { return m_cache; } diff --git a/src/backend/opencl/OclLaunchData.cpp b/src/backend/opencl/OclLaunchData.cpp index 7a94cf75..e5e8b95f 100644 --- a/src/backend/opencl/OclLaunchData.cpp +++ b/src/backend/opencl/OclLaunchData.cpp @@ -28,11 +28,12 @@ #include "backend/opencl/OclConfig.h" -xmrig::OclLaunchData::OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclThread &thread, const OclDevice &device) : +xmrig::OclLaunchData::OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclPlatform &platform, const OclThread &thread, const OclDevice &device) : algorithm(algorithm), cache(config.isCacheEnabled()), miner(miner), device(device), + platform(platform), thread(thread) { } diff --git a/src/backend/opencl/OclLaunchData.h b/src/backend/opencl/OclLaunchData.h index b8bde5be..b4adb85c 100644 --- a/src/backend/opencl/OclLaunchData.h +++ b/src/backend/opencl/OclLaunchData.h @@ -29,6 +29,7 @@ #include "backend/opencl/OclThread.h" #include "backend/opencl/wrappers/OclDevice.h" +#include "backend/opencl/wrappers/OclPlatform.h" #include "crypto/common/Algorithm.h" #include "crypto/common/Nonce.h" @@ -46,7 +47,7 @@ class Miner; class OclLaunchData { public: - OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclThread &thread, const OclDevice &device); + OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclPlatform &platform, const OclThread &thread, const OclDevice &device); bool isEqual(const OclLaunchData &other) const; @@ -60,6 +61,7 @@ public: const bool cache; const Miner *miner; const OclDevice device; + const OclPlatform platform; const OclThread thread; }; diff --git a/src/backend/opencl/OclWorker.cpp b/src/backend/opencl/OclWorker.cpp index f9e9d900..0ed558c4 100644 --- a/src/backend/opencl/OclWorker.cpp +++ b/src/backend/opencl/OclWorker.cpp @@ -48,15 +48,15 @@ static constexpr uint32_t kReserveCount = 4096; -xmrig::OclWorker::OclWorker(size_t index, const OclLaunchData &data) : - Worker(index, data.thread.affinity(), -1), +xmrig::OclWorker::OclWorker(size_t id, const OclLaunchData &data) : + Worker(id, data.thread.affinity(), -1), m_algorithm(data.algorithm), m_miner(data.miner) { switch (m_algorithm.family()) { case Algorithm::RANDOM_X: # ifdef XMRIG_ALGO_RANDOMX - m_runner = new OclRxRunner(index, data); + m_runner = new OclRxRunner(id, data); # endif break; @@ -67,9 +67,13 @@ xmrig::OclWorker::OclWorker(size_t index, const OclLaunchData &data) : break; default: - m_runner = new OclCnRunner(index, data); + m_runner = new OclCnRunner(id, data); break; } + + if (m_runner) { + m_runner->build(); + } } diff --git a/src/backend/opencl/OclWorker.h b/src/backend/opencl/OclWorker.h index 0f73e13b..4be3e3e1 100644 --- a/src/backend/opencl/OclWorker.h +++ b/src/backend/opencl/OclWorker.h @@ -42,7 +42,7 @@ class IOclRunner; class OclWorker : public Worker { public: - OclWorker(size_t index, const OclLaunchData &data); + OclWorker(size_t id, const OclLaunchData &data); ~OclWorker() override; protected: diff --git a/src/backend/opencl/cl/cn/cryptonight.cl b/src/backend/opencl/cl/cn/cryptonight.cl index 106d2b1a..157e4edf 100644 --- a/src/backend/opencl/cl/cn/cryptonight.cl +++ b/src/backend/opencl/cl/cn/cryptonight.cl @@ -573,7 +573,7 @@ __kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, uin if (gIdx < Threads) # endif { - #pragma unroll UNROLL_FACTOR + #pragma unroll CN_UNROLL for (int i = 0; i < ITERATIONS; ++i) { ulong c[2]; @@ -686,7 +686,7 @@ __kernel void cn1_v2_monero(__global uint4 *Scratchpad, __global ulong *states, uint2 division_result = as_uint2(states[12]); uint sqrt_result = as_uint2(states[13]).s0; - #pragma unroll UNROLL_FACTOR + #pragma unroll CN_UNROLL for(int i = 0; i < ITERATIONS; ++i) { # ifdef __NV_CL_C_VERSION @@ -846,7 +846,7 @@ __kernel void cn1_v2_half(__global uint4 *Scratchpad, __global ulong *states, ui uint2 division_result = as_uint2(states[12]); uint sqrt_result = as_uint2(states[13]).s0; - #pragma unroll UNROLL_FACTOR + #pragma unroll CN_UNROLL for(int i = 0; i < 0x40000; ++i) { # ifdef __NV_CL_C_VERSION @@ -1074,7 +1074,7 @@ __kernel void cn1_tube(__global uint4 *Scratchpad, __global ulong *states, uint { uint idx0 = a[0]; - #pragma unroll UNROLL_FACTOR + #pragma unroll CN_UNROLL for (int i = 0; i < ITERATIONS; ++i) { ulong c[2]; @@ -1171,7 +1171,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint varia { uint idx0 = a[0]; - #pragma unroll UNROLL_FACTOR + #pragma unroll CN_UNROLL for (int i = 0; i < ITERATIONS; ++i) { ulong c[2]; diff --git a/src/backend/opencl/cl/cn/cryptonight2.cl b/src/backend/opencl/cl/cn/cryptonight2.cl index ce3ee019..407efd00 100644 --- a/src/backend/opencl/cl/cn/cryptonight2.cl +++ b/src/backend/opencl/cl/cn/cryptonight2.cl @@ -75,7 +75,7 @@ __kernel void cn1_v2_rwz(__global uint4 *Scratchpad, __global ulong *states, uin uint2 division_result = as_uint2(states[12]); uint sqrt_result = as_uint2(states[13]).s0; - #pragma unroll UNROLL_FACTOR + #pragma unroll CN_UNROLL for(int i = 0; i < 0x60000; ++i) { # ifdef __NV_CL_C_VERSION @@ -235,7 +235,7 @@ __kernel void cn1_v2_zls(__global uint4 *Scratchpad, __global ulong *states, uin uint2 division_result = as_uint2(states[12]); uint sqrt_result = as_uint2(states[13]).s0; - #pragma unroll UNROLL_FACTOR + #pragma unroll CN_UNROLL for(int i = 0; i < 0x60000; ++i) { # ifdef __NV_CL_C_VERSION @@ -395,7 +395,7 @@ __kernel void cn1_v2_double(__global uint4 *Scratchpad, __global ulong *states, uint2 division_result = as_uint2(states[12]); uint sqrt_result = as_uint2(states[13]).s0; - #pragma unroll UNROLL_FACTOR + #pragma unroll CN_UNROLL for(int i = 0; i < 0x100000; ++i) { # ifdef __NV_CL_C_VERSION diff --git a/src/backend/opencl/cl/cn/cryptonight_gpu.cl b/src/backend/opencl/cl/cn/cryptonight_gpu.cl index ee39f6ca..592565ca 100644 --- a/src/backend/opencl/cl/cn/cryptonight_gpu.cl +++ b/src/backend/opencl/cl/cn/cryptonight_gpu.cl @@ -199,7 +199,7 @@ struct SharedMemChunk float4 va[16]; }; -__attribute__((reqd_work_group_size(WORKSIZE_GPU * 16, 1, 1))) +__attribute__((reqd_work_group_size(WORKSIZE * 16, 1, 1))) __kernel void cn1_cn_gpu(__global int *lpad_in, __global int *spad, uint numThreads) { const uint gIdx = getIdx(); @@ -214,7 +214,7 @@ __kernel void cn1_cn_gpu(__global int *lpad_in, __global int *spad, uint numThre __global int* lpad = (__global int*)((__global char*)lpad_in + MEMORY * (gIdx/16)); - __local struct SharedMemChunk smem_in[WORKSIZE_GPU]; + __local struct SharedMemChunk smem_in[WORKSIZE]; __local struct SharedMemChunk* smem = smem_in + chunk; uint tid = get_local_id(0) % 16; diff --git a/src/backend/opencl/cl/cn/cryptonight_r.cl b/src/backend/opencl/cl/cn/cryptonight_r.cl index 857318b6..31b627c7 100644 --- a/src/backend/opencl/cl/cn/cryptonight_r.cl +++ b/src/backend/opencl/cl/cn/cryptonight_r.cl @@ -66,7 +66,7 @@ __kernel void cn1_cryptonight_r_N(__global uint4 *Scratchpad, __global ulong *st uint r2 = as_uint2(states[13]).s0; uint r3 = as_uint2(states[13]).s1; - #pragma unroll UNROLL_FACTOR + #pragma unroll CN_UNROLL for(int i = 0; i < ITERATIONS; ++i) { # ifdef __NV_CL_C_VERSION diff --git a/src/backend/opencl/interfaces/IOclRunner.h b/src/backend/opencl/interfaces/IOclRunner.h index efe349f2..f122fd7d 100644 --- a/src/backend/opencl/interfaces/IOclRunner.h +++ b/src/backend/opencl/interfaces/IOclRunner.h @@ -33,6 +33,7 @@ namespace xmrig { class Job; +class OclLaunchData; class IOclRunner @@ -42,6 +43,11 @@ public: virtual bool selfTest() const = 0; virtual const char *buildOptions() const = 0; + virtual const char *deviceKey() const = 0; + virtual const char *source() const = 0; + virtual const OclLaunchData &data() const = 0; + virtual size_t threadId() const = 0; + virtual void build() = 0; virtual void run(uint32_t *hashOutput) = 0; virtual void set(const Job &job) = 0; }; diff --git a/src/backend/opencl/opencl.cmake b/src/backend/opencl/opencl.cmake index bea54f51..543354d4 100644 --- a/src/backend/opencl/opencl.cmake +++ b/src/backend/opencl/opencl.cmake @@ -51,6 +51,12 @@ if (WITH_OPENCL) list(APPEND HEADERS_BACKEND_OPENCL src/backend/opencl/runners/OclRxRunner.h) list(APPEND SOURCES_BACKEND_OPENCL src/backend/opencl/runners/OclRxRunner.cpp) endif() + + if (WITH_STRICT_CACHE) + add_definitions(/DXMRIG_STRICT_OPENCL_CACHE) + else() + remove_definitions(/DXMRIG_STRICT_OPENCL_CACHE) + endif() else() remove_definitions(/DXMRIG_FEATURE_OPENCL) diff --git a/src/backend/opencl/runners/OclBaseRunner.cpp b/src/backend/opencl/runners/OclBaseRunner.cpp index ee9a8fe8..e2ceed83 100644 --- a/src/backend/opencl/runners/OclBaseRunner.cpp +++ b/src/backend/opencl/runners/OclBaseRunner.cpp @@ -23,29 +23,49 @@ */ +#include "backend/opencl/cl/OclSource.h" +#include "backend/opencl/OclCache.h" #include "backend/opencl/OclLaunchData.h" #include "backend/opencl/runners/OclBaseRunner.h" #include "backend/opencl/wrappers/OclLib.h" +#include "base/io/log/Log.h" #include "base/net/stratum/Job.h" -xmrig::OclBaseRunner::OclBaseRunner(size_t, const OclLaunchData &data) : +xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) : m_algorithm(data.algorithm), - m_ctx(data.ctx) + m_source(OclSource::get(data.algorithm)), + m_data(data), + m_threadId(id) { cl_int ret; - m_queue = OclLib::createCommandQueue(m_ctx, data.device.id(), &ret); + m_queue = OclLib::createCommandQueue(data.ctx, data.device.id(), &ret); if (ret != CL_SUCCESS) { return; } - m_input = OclLib::createBuffer(m_ctx, CL_MEM_READ_ONLY, Job::kMaxBlobSize, nullptr, &ret); - m_output = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100, nullptr, &ret); + m_input = OclLib::createBuffer(data.ctx, CL_MEM_READ_ONLY, Job::kMaxBlobSize, nullptr, &ret); + m_output = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100, nullptr, &ret); + + m_deviceKey = data.device.name(); + +# ifdef XMRIG_STRICT_OPENCL_CACHE + m_deviceKey += ":"; + m_deviceKey += data.platform.version(); + + m_deviceKey += ":"; + m_deviceKey += OclLib::getDeviceString(data.device.id(), CL_DRIVER_VERSION); +# endif + +# if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__) + m_deviceKey += ":64"; +# endif } xmrig::OclBaseRunner::~OclBaseRunner() { + OclLib::releaseProgram(m_program); OclLib::releaseMemObject(m_input); OclLib::releaseMemObject(m_output); @@ -55,14 +75,17 @@ xmrig::OclBaseRunner::~OclBaseRunner() bool xmrig::OclBaseRunner::selfTest() const { - return m_queue != nullptr && m_input != nullptr && m_output != nullptr && !m_options.empty(); + return m_queue != nullptr && m_input != nullptr && m_output != nullptr && !m_options.empty() && m_source != nullptr; } - -const char *xmrig::OclBaseRunner::buildOptions() const +void xmrig::OclBaseRunner::build() { - return m_options.c_str(); + if (!selfTest()) { + return; + } + + m_program = OclCache::build(this); } diff --git a/src/backend/opencl/runners/OclBaseRunner.h b/src/backend/opencl/runners/OclBaseRunner.h index 2b8fa791..742a3464 100644 --- a/src/backend/opencl/runners/OclBaseRunner.h +++ b/src/backend/opencl/runners/OclBaseRunner.h @@ -43,21 +43,31 @@ class OclLaunchData; class OclBaseRunner : public IOclRunner { public: - OclBaseRunner(size_t index, const OclLaunchData &data); + OclBaseRunner(size_t id, const OclLaunchData &data); ~OclBaseRunner() override; protected: + inline const char *buildOptions() const override { return m_options.c_str(); } + inline const char *deviceKey() const override { return m_deviceKey.c_str(); } + inline const char *source() const override { return m_source; } + inline const OclLaunchData &data() const override { return m_data; } + inline size_t threadId() const override { return m_threadId; } + bool selfTest() const override; - const char *buildOptions() const override; + void build() override; void run(uint32_t *hashOutput) override; void set(const Job &job) override; protected: Algorithm m_algorithm; cl_command_queue m_queue = nullptr; - cl_context m_ctx; cl_mem m_input = nullptr; cl_mem m_output = nullptr; + cl_program m_program = nullptr; + const char *m_source; + const OclLaunchData &m_data; + const size_t m_threadId; + std::string m_deviceKey; std::string m_options; }; diff --git a/src/backend/opencl/runners/OclCnRunner.cpp b/src/backend/opencl/runners/OclCnRunner.cpp index 8c87309e..e8ba9f27 100644 --- a/src/backend/opencl/runners/OclCnRunner.cpp +++ b/src/backend/opencl/runners/OclCnRunner.cpp @@ -38,16 +38,16 @@ xmrig::OclCnRunner::OclCnRunner(size_t index, const OclLaunchData &data) : OclBa const size_t g_thd = data.thread.intensity(); cl_int ret; - m_scratchpads = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, data.algorithm.l3() * g_thd, nullptr, &ret); + m_scratchpads = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, data.algorithm.l3() * g_thd, nullptr, &ret); if (ret != CL_SUCCESS) { return; } - m_states = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 200 * g_thd, nullptr, &ret); - m_blake256 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret); - m_groestl256 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret); - m_jh256 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret); - m_skein512 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret); + m_states = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, 200 * g_thd, nullptr, &ret); + m_blake256 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret); + m_groestl256 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret); + m_jh256 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret); + m_skein512 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret); uint32_t stridedIndex = data.thread.stridedIndex(); if (data.device.vendorId() == OCL_VENDOR_NVIDIA) { diff --git a/src/backend/opencl/wrappers/OclLib.cpp b/src/backend/opencl/wrappers/OclLib.cpp index 3aa5fc25..b0da3f64 100644 --- a/src/backend/opencl/wrappers/OclLib.cpp +++ b/src/backend/opencl/wrappers/OclLib.cpp @@ -375,23 +375,13 @@ cl_int xmrig::OclLib::releaseCommandQueue(cl_command_queue command_queue) assert(pReleaseCommandQueue != nullptr); assert(pGetCommandQueueInfo != nullptr); + finish(command_queue); + cl_int ret = pReleaseCommandQueue(command_queue); if (ret != CL_SUCCESS) { LOG_ERR(kErrorTemplate, OclError::toString(ret), kReleaseCommandQueue); } - cl_uint refs = 0; - ret = pGetCommandQueueInfo(command_queue, CL_QUEUE_REFERENCE_COUNT, sizeof(refs), &refs, nullptr); - if (ret == CL_SUCCESS && refs > 0) { - std::this_thread::sleep_for(std::chrono::milliseconds(200)); - } - -# ifndef NDEBUG - ret = pGetCommandQueueInfo(command_queue, CL_QUEUE_REFERENCE_COUNT, sizeof(refs), &refs, nullptr); - assert(ret == CL_SUCCESS); - assert(refs == 0); -# endif - return ret; } @@ -447,6 +437,10 @@ cl_int xmrig::OclLib::releaseProgram(cl_program program) { assert(pReleaseProgram != nullptr); + if (program == nullptr) { + return CL_SUCCESS; + } + const cl_int ret = pReleaseProgram(program); if (ret != CL_SUCCESS) { LOG_ERR(kErrorTemplate, OclError::toString(ret), kReleaseProgram); @@ -500,6 +494,8 @@ cl_program xmrig::OclLib::createProgramWithBinary(cl_context context, cl_uint nu auto result = pCreateProgramWithBinary(context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret); if (*errcode_ret != CL_SUCCESS) { LOG_ERR(kErrorTemplate, OclError::toString(*errcode_ret), kCreateProgramWithBinary); + + return nullptr; } return result; @@ -513,6 +509,8 @@ cl_program xmrig::OclLib::createProgramWithSource(cl_context context, cl_uint co auto result = pCreateProgramWithSource(context, count, strings, lengths, errcode_ret); if (*errcode_ret != CL_SUCCESS) { LOG_ERR(kErrorTemplate, OclError::toString(*errcode_ret), kCreateProgramWithSource); + + return nullptr; } return result;