Restored OclCache.

This commit is contained in:
XMRig 2019-08-27 06:31:40 +07:00
parent 47b8cb6044
commit ec1839d580
27 changed files with 290 additions and 72 deletions

View file

@ -15,6 +15,7 @@ option(WITH_TLS "Enable OpenSSL support" ON)
option(WITH_ASM "Enable ASM PoW implementations" ON) option(WITH_ASM "Enable ASM PoW implementations" ON)
option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF) option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
option(WITH_OPENCL "Enable OpenCL backend" OFF) option(WITH_OPENCL "Enable OpenCL backend" OFF)
option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
option(BUILD_STATIC "Build static binary" OFF) option(BUILD_STATIC "Build static binary" OFF)
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0) option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)

View file

@ -25,8 +25,8 @@
// All functions return the number of output bytes or -1 on error. If the // All functions return the number of output bytes or -1 on error. If the
// output buffer is too small, the result will silently be truncated. // output buffer is too small, the result will silently be truncated.
#ifndef _BASE32_H_ #ifndef XMRIG_BASE32_H
#define _BASE32_H_ #define XMRIG_BASE32_H
#include <stdint.h> #include <stdint.h>
@ -65,4 +65,4 @@ int base32_encode(const uint8_t *data, int length, uint8_t *result, int bufSize)
} }
#endif /* _BASE32_H_ */ #endif /* XMRIG_BASE32_H */

View file

@ -42,18 +42,18 @@ template<class T>
class Thread class Thread
{ {
public: public:
inline Thread(IBackend *backend, size_t index, const T &config) : m_index(index), m_config(config), m_backend(backend) {} inline Thread(IBackend *backend, size_t id, const T &config) : m_id(id), m_config(config), m_backend(backend) {}
inline ~Thread() { m_thread.join(); delete m_worker; } inline ~Thread() { m_thread.join(); delete m_worker; }
inline const T &config() const { return m_config; } inline const T &config() const { return m_config; }
inline IBackend *backend() const { return m_backend; } inline IBackend *backend() const { return m_backend; }
inline IWorker *worker() const { return m_worker; } inline IWorker *worker() const { return m_worker; }
inline size_t index() const { return m_index; } inline size_t id() const { return m_id; }
inline void setWorker(IWorker *worker) { m_worker = worker; } inline void setWorker(IWorker *worker) { m_worker = worker; }
inline void start(void (*callback) (void *)) { m_thread = std::thread(callback, this); } inline void start(void (*callback) (void *)) { m_thread = std::thread(callback, this); }
private: private:
const size_t m_index = 0; const size_t m_id = 0;
const T m_config; const T m_config;
IBackend *m_backend; IBackend *m_backend;
IWorker *m_worker = nullptr; IWorker *m_worker = nullptr;

View file

@ -134,7 +134,7 @@ void xmrig::Workers<T>::tick(uint64_t)
return; return;
} }
d_ptr->hashrate->add(handle->index(), handle->worker()->hashCount(), handle->worker()->timestamp()); d_ptr->hashrate->add(handle->id(), handle->worker()->hashCount(), handle->worker()->timestamp());
} }
d_ptr->hashrate->updateHighest(); d_ptr->hashrate->updateHighest();
@ -175,19 +175,19 @@ xmrig::IWorker *xmrig::Workers<CpuLaunchData>::create(Thread<CpuLaunchData> *han
{ {
switch (handle->config().intensity) { switch (handle->config().intensity) {
case 1: case 1:
return new CpuWorker<1>(handle->index(), handle->config()); return new CpuWorker<1>(handle->id(), handle->config());
case 2: case 2:
return new CpuWorker<2>(handle->index(), handle->config()); return new CpuWorker<2>(handle->id(), handle->config());
case 3: case 3:
return new CpuWorker<3>(handle->index(), handle->config()); return new CpuWorker<3>(handle->id(), handle->config());
case 4: case 4:
return new CpuWorker<4>(handle->index(), handle->config()); return new CpuWorker<4>(handle->id(), handle->config());
case 5: case 5:
return new CpuWorker<5>(handle->index(), handle->config()); return new CpuWorker<5>(handle->id(), handle->config());
} }
return nullptr; return nullptr;
@ -201,7 +201,7 @@ template class Workers<CpuLaunchData>;
template<> template<>
xmrig::IWorker *xmrig::Workers<OclLaunchData>::create(Thread<OclLaunchData> *handle) xmrig::IWorker *xmrig::Workers<OclLaunchData>::create(Thread<OclLaunchData> *handle)
{ {
return new OclWorker(handle->index(), handle->config()); return new OclWorker(handle->id(), handle->config());
} }

View file

@ -53,8 +53,8 @@ static constexpr uint32_t kReserveCount = 4096;
template<size_t N> template<size_t N>
xmrig::CpuWorker<N>::CpuWorker(size_t index, const CpuLaunchData &data) : xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
Worker(index, data.affinity, data.priority), Worker(id, data.affinity, data.priority),
m_algorithm(data.algorithm), m_algorithm(data.algorithm),
m_assembly(data.assembly), m_assembly(data.assembly),
m_hwAES(data.hwAES), m_hwAES(data.hwAES),

View file

@ -43,7 +43,7 @@ template<size_t N>
class CpuWorker : public Worker class CpuWorker : public Worker
{ {
public: public:
CpuWorker(size_t index, const CpuLaunchData &data); CpuWorker(size_t id, const CpuLaunchData &data);
~CpuWorker() override; ~CpuWorker() override;
protected: protected:

View file

@ -260,7 +260,7 @@ void xmrig::OclBackend::setJob(const Job &job)
const OclConfig &cl = d_ptr->controller->config()->cl(); const OclConfig &cl = d_ptr->controller->config()->cl();
std::vector<OclLaunchData> threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->devices, tag); std::vector<OclLaunchData> threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->platform, d_ptr->devices, tag);
if (!d_ptr->threads.empty() && d_ptr->threads.size() == threads.size() && std::equal(d_ptr->threads.begin(), d_ptr->threads.end(), threads.begin())) { if (!d_ptr->threads.empty() && d_ptr->threads.size() == threads.size() && std::equal(d_ptr->threads.begin(), d_ptr->threads.end(), threads.begin())) {
return; return;
} }

View file

@ -23,4 +23,162 @@
*/ */
#include <fstream>
#include <map>
#include <mutex>
#include <sstream>
#include "3rdparty/base32/base32.h"
#include "backend/opencl/interfaces/IOclRunner.h"
#include "backend/opencl/OclCache.h" #include "backend/opencl/OclCache.h"
#include "backend/opencl/OclLaunchData.h"
#include "backend/opencl/wrappers/OclLib.h"
#include "base/io/log/Log.h"
#include "base/tools/Chrono.h"
#include "crypto/common/keccak.h"
namespace xmrig {
static const char *tag = MAGENTA_BG_BOLD(WHITE_BOLD_S " ocl ");
static std::mutex mutex;
static cl_program createFromSource(const IOclRunner *runner)
{
LOG_INFO("%s GPU " WHITE_BOLD("#%zu") " " YELLOW_BOLD("compiling..."), tag, runner->data().device.index());
cl_int ret;
cl_device_id device = runner->data().device.id();
const char *source = runner->source();
const uint64_t ts = Chrono::steadyMSecs();
cl_program program = OclLib::createProgramWithSource(runner->data().ctx, 1, &source, nullptr, &ret);
if (ret != CL_SUCCESS) {
return nullptr;
}
if (OclLib::buildProgram(program, 1, &device, runner->buildOptions()) != CL_SUCCESS) {
printf("BUILD LOG:\n%s\n", OclLib::getProgramBuildLog(program, device).data());
OclLib::releaseProgram(program);
return nullptr;
}
LOG_INFO("%s GPU " WHITE_BOLD("#%zu") " " GREEN_BOLD("compilation completed") BLACK_BOLD( " (%.3fs)"),
tag, runner->data().device.index(), (Chrono::steadyMSecs() - ts) / 1000.0);
return program;
}
static cl_program createFromBinary(const IOclRunner *runner, const std::string &fileName)
{
std::ifstream file(fileName, std::ofstream::in | std::ofstream::binary);
if (!file.good()) {
return nullptr;
}
std::ostringstream ss;
ss << file.rdbuf();
const std::string s = ss.str();
const size_t bin_size = s.size();
auto data_ptr = s.data();
cl_device_id device = runner->data().device.id();
cl_int clStatus;
cl_int ret;
cl_program program = OclLib::createProgramWithBinary(runner->data().ctx, 1, &device, &bin_size, reinterpret_cast<const unsigned char **>(&data_ptr), &clStatus, &ret);
if (ret != CL_SUCCESS) {
return nullptr;
}
if (OclLib::buildProgram(program, 1, &device) != CL_SUCCESS) {
OclLib::releaseProgram(program);
return nullptr;
}
return program;
}
} // namespace xmrig
cl_program xmrig::OclCache::build(const IOclRunner *runner)
{
std::lock_guard<std::mutex> lock(mutex);
if (Nonce::sequence(Nonce::OPENCL) == 0) {
return nullptr;
}
std::string fileName;
if (runner->data().cache) {
# ifdef _WIN32
fileName = prefix() + "\\xmrig\\.cache\\" + cacheKey(runner) + ".bin";
# else
fileName = prefix() + "/.cache/" + cacheKey(runner) + ".bin";
# endif
cl_program program = createFromBinary(runner, fileName);
if (program) {
return program;
}
}
cl_program program = createFromSource(runner);
if (runner->data().cache && program) {
save(program, fileName);
}
return program;
}
std::string xmrig::OclCache::cacheKey(const char *deviceKey, const char *options, const char *source)
{
std::string in(source);
in += options;
in += deviceKey;
uint8_t hash[200];
keccak(in.c_str(), in.size(), hash);
uint8_t result[32] = { 0 };
base32_encode(hash, 12, result, sizeof(result));
return reinterpret_cast<char *>(result);
}
std::string xmrig::OclCache::cacheKey(const IOclRunner *runner)
{
return cacheKey(runner->deviceKey(), runner->buildOptions(), runner->source());
}
void xmrig::OclCache::save(cl_program program, const std::string &fileName)
{
size_t size = 0;
if (OclLib::getProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size), &size) != CL_SUCCESS) {
return;
}
std::vector<char> binary(size);
char *data = binary.data();
if (OclLib::getProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char *), &data) != CL_SUCCESS) {
return;
}
createDirectory();
std::ofstream file_stream;
file_stream.open(fileName, std::ofstream::out | std::ofstream::binary);
file_stream.write(binary.data(), static_cast<int64_t>(binary.size()));
file_stream.close();
}

View file

@ -29,17 +29,26 @@
#include <string> #include <string>
typedef struct _cl_program *cl_program;
namespace xmrig { namespace xmrig {
class IOclRunner;
class OclCache class OclCache
{ {
public: public:
static cl_program build(const IOclRunner *runner);
static std::string cacheKey(const char *deviceKey, const char *options, const char *source);
static std::string cacheKey(const IOclRunner *runner);
private: private:
void createDirectory() const;
static std::string prefix(); static std::string prefix();
static void createDirectory();
static void save(cl_program program, const std::string &fileName);
}; };

View file

@ -29,7 +29,7 @@
#include "backend/opencl/OclCache.h" #include "backend/opencl/OclCache.h"
void xmrig::OclCache::createDirectory() const void xmrig::OclCache::createDirectory()
{ {
std::string path = prefix() + "/.cache"; std::string path = prefix() + "/.cache";
mkdir(path.c_str(), 0744); mkdir(path.c_str(), 0744);

View file

@ -31,7 +31,7 @@
#include "backend/opencl/OclCache.h" #include "backend/opencl/OclCache.h"
void xmrig::OclCache::createDirectory() const void xmrig::OclCache::createDirectory()
{ {
std::string path = prefix() + "/xmrig"; std::string path = prefix() + "/xmrig";
_mkdir(path.c_str()); _mkdir(path.c_str());

View file

@ -149,7 +149,7 @@ rapidjson::Value xmrig::OclConfig::toJSON(rapidjson::Document &doc) const
} }
std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const std::vector<OclDevice> &devices, const char *tag) const std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices, const char *tag) const
{ {
std::vector<OclLaunchData> out; std::vector<OclLaunchData> out;
const OclThreads &threads = m_threads.get(algorithm); const OclThreads &threads = m_threads.get(algorithm);
@ -166,7 +166,7 @@ std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, cons
continue; continue;
} }
out.emplace_back(miner, algorithm, *this, thread, devices[thread.index()]); out.emplace_back(miner, algorithm, *this, platform, thread, devices[thread.index()]);
} }
return out; return out;

View file

@ -42,7 +42,7 @@ public:
OclPlatform platform() const; OclPlatform platform() const;
rapidjson::Value toJSON(rapidjson::Document &doc) const; rapidjson::Value toJSON(rapidjson::Document &doc) const;
std::vector<OclLaunchData> get(const Miner *miner, const Algorithm &algorithm, const std::vector<OclDevice> &devices, const char *tag) const; std::vector<OclLaunchData> get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices, const char *tag) const;
void read(const rapidjson::Value &value); void read(const rapidjson::Value &value);
inline bool isCacheEnabled() const { return m_cache; } inline bool isCacheEnabled() const { return m_cache; }

View file

@ -28,11 +28,12 @@
#include "backend/opencl/OclConfig.h" #include "backend/opencl/OclConfig.h"
xmrig::OclLaunchData::OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclThread &thread, const OclDevice &device) : xmrig::OclLaunchData::OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclPlatform &platform, const OclThread &thread, const OclDevice &device) :
algorithm(algorithm), algorithm(algorithm),
cache(config.isCacheEnabled()), cache(config.isCacheEnabled()),
miner(miner), miner(miner),
device(device), device(device),
platform(platform),
thread(thread) thread(thread)
{ {
} }

View file

@ -29,6 +29,7 @@
#include "backend/opencl/OclThread.h" #include "backend/opencl/OclThread.h"
#include "backend/opencl/wrappers/OclDevice.h" #include "backend/opencl/wrappers/OclDevice.h"
#include "backend/opencl/wrappers/OclPlatform.h"
#include "crypto/common/Algorithm.h" #include "crypto/common/Algorithm.h"
#include "crypto/common/Nonce.h" #include "crypto/common/Nonce.h"
@ -46,7 +47,7 @@ class Miner;
class OclLaunchData class OclLaunchData
{ {
public: public:
OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclThread &thread, const OclDevice &device); OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclPlatform &platform, const OclThread &thread, const OclDevice &device);
bool isEqual(const OclLaunchData &other) const; bool isEqual(const OclLaunchData &other) const;
@ -60,6 +61,7 @@ public:
const bool cache; const bool cache;
const Miner *miner; const Miner *miner;
const OclDevice device; const OclDevice device;
const OclPlatform platform;
const OclThread thread; const OclThread thread;
}; };

View file

@ -48,15 +48,15 @@ static constexpr uint32_t kReserveCount = 4096;
xmrig::OclWorker::OclWorker(size_t index, const OclLaunchData &data) : xmrig::OclWorker::OclWorker(size_t id, const OclLaunchData &data) :
Worker(index, data.thread.affinity(), -1), Worker(id, data.thread.affinity(), -1),
m_algorithm(data.algorithm), m_algorithm(data.algorithm),
m_miner(data.miner) m_miner(data.miner)
{ {
switch (m_algorithm.family()) { switch (m_algorithm.family()) {
case Algorithm::RANDOM_X: case Algorithm::RANDOM_X:
# ifdef XMRIG_ALGO_RANDOMX # ifdef XMRIG_ALGO_RANDOMX
m_runner = new OclRxRunner(index, data); m_runner = new OclRxRunner(id, data);
# endif # endif
break; break;
@ -67,9 +67,13 @@ xmrig::OclWorker::OclWorker(size_t index, const OclLaunchData &data) :
break; break;
default: default:
m_runner = new OclCnRunner(index, data); m_runner = new OclCnRunner(id, data);
break; break;
} }
if (m_runner) {
m_runner->build();
}
} }

View file

@ -42,7 +42,7 @@ class IOclRunner;
class OclWorker : public Worker class OclWorker : public Worker
{ {
public: public:
OclWorker(size_t index, const OclLaunchData &data); OclWorker(size_t id, const OclLaunchData &data);
~OclWorker() override; ~OclWorker() override;
protected: protected:

View file

@ -573,7 +573,7 @@ __kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, uin
if (gIdx < Threads) if (gIdx < Threads)
# endif # endif
{ {
#pragma unroll UNROLL_FACTOR #pragma unroll CN_UNROLL
for (int i = 0; i < ITERATIONS; ++i) { for (int i = 0; i < ITERATIONS; ++i) {
ulong c[2]; ulong c[2];
@ -686,7 +686,7 @@ __kernel void cn1_v2_monero(__global uint4 *Scratchpad, __global ulong *states,
uint2 division_result = as_uint2(states[12]); uint2 division_result = as_uint2(states[12]);
uint sqrt_result = as_uint2(states[13]).s0; uint sqrt_result = as_uint2(states[13]).s0;
#pragma unroll UNROLL_FACTOR #pragma unroll CN_UNROLL
for(int i = 0; i < ITERATIONS; ++i) for(int i = 0; i < ITERATIONS; ++i)
{ {
# ifdef __NV_CL_C_VERSION # ifdef __NV_CL_C_VERSION
@ -846,7 +846,7 @@ __kernel void cn1_v2_half(__global uint4 *Scratchpad, __global ulong *states, ui
uint2 division_result = as_uint2(states[12]); uint2 division_result = as_uint2(states[12]);
uint sqrt_result = as_uint2(states[13]).s0; uint sqrt_result = as_uint2(states[13]).s0;
#pragma unroll UNROLL_FACTOR #pragma unroll CN_UNROLL
for(int i = 0; i < 0x40000; ++i) for(int i = 0; i < 0x40000; ++i)
{ {
# ifdef __NV_CL_C_VERSION # ifdef __NV_CL_C_VERSION
@ -1074,7 +1074,7 @@ __kernel void cn1_tube(__global uint4 *Scratchpad, __global ulong *states, uint
{ {
uint idx0 = a[0]; uint idx0 = a[0];
#pragma unroll UNROLL_FACTOR #pragma unroll CN_UNROLL
for (int i = 0; i < ITERATIONS; ++i) { for (int i = 0; i < ITERATIONS; ++i) {
ulong c[2]; ulong c[2];
@ -1171,7 +1171,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint varia
{ {
uint idx0 = a[0]; uint idx0 = a[0];
#pragma unroll UNROLL_FACTOR #pragma unroll CN_UNROLL
for (int i = 0; i < ITERATIONS; ++i) { for (int i = 0; i < ITERATIONS; ++i) {
ulong c[2]; ulong c[2];

View file

@ -75,7 +75,7 @@ __kernel void cn1_v2_rwz(__global uint4 *Scratchpad, __global ulong *states, uin
uint2 division_result = as_uint2(states[12]); uint2 division_result = as_uint2(states[12]);
uint sqrt_result = as_uint2(states[13]).s0; uint sqrt_result = as_uint2(states[13]).s0;
#pragma unroll UNROLL_FACTOR #pragma unroll CN_UNROLL
for(int i = 0; i < 0x60000; ++i) for(int i = 0; i < 0x60000; ++i)
{ {
# ifdef __NV_CL_C_VERSION # ifdef __NV_CL_C_VERSION
@ -235,7 +235,7 @@ __kernel void cn1_v2_zls(__global uint4 *Scratchpad, __global ulong *states, uin
uint2 division_result = as_uint2(states[12]); uint2 division_result = as_uint2(states[12]);
uint sqrt_result = as_uint2(states[13]).s0; uint sqrt_result = as_uint2(states[13]).s0;
#pragma unroll UNROLL_FACTOR #pragma unroll CN_UNROLL
for(int i = 0; i < 0x60000; ++i) for(int i = 0; i < 0x60000; ++i)
{ {
# ifdef __NV_CL_C_VERSION # ifdef __NV_CL_C_VERSION
@ -395,7 +395,7 @@ __kernel void cn1_v2_double(__global uint4 *Scratchpad, __global ulong *states,
uint2 division_result = as_uint2(states[12]); uint2 division_result = as_uint2(states[12]);
uint sqrt_result = as_uint2(states[13]).s0; uint sqrt_result = as_uint2(states[13]).s0;
#pragma unroll UNROLL_FACTOR #pragma unroll CN_UNROLL
for(int i = 0; i < 0x100000; ++i) for(int i = 0; i < 0x100000; ++i)
{ {
# ifdef __NV_CL_C_VERSION # ifdef __NV_CL_C_VERSION

View file

@ -199,7 +199,7 @@ struct SharedMemChunk
float4 va[16]; float4 va[16];
}; };
__attribute__((reqd_work_group_size(WORKSIZE_GPU * 16, 1, 1))) __attribute__((reqd_work_group_size(WORKSIZE * 16, 1, 1)))
__kernel void cn1_cn_gpu(__global int *lpad_in, __global int *spad, uint numThreads) __kernel void cn1_cn_gpu(__global int *lpad_in, __global int *spad, uint numThreads)
{ {
const uint gIdx = getIdx(); const uint gIdx = getIdx();
@ -214,7 +214,7 @@ __kernel void cn1_cn_gpu(__global int *lpad_in, __global int *spad, uint numThre
__global int* lpad = (__global int*)((__global char*)lpad_in + MEMORY * (gIdx/16)); __global int* lpad = (__global int*)((__global char*)lpad_in + MEMORY * (gIdx/16));
__local struct SharedMemChunk smem_in[WORKSIZE_GPU]; __local struct SharedMemChunk smem_in[WORKSIZE];
__local struct SharedMemChunk* smem = smem_in + chunk; __local struct SharedMemChunk* smem = smem_in + chunk;
uint tid = get_local_id(0) % 16; uint tid = get_local_id(0) % 16;

View file

@ -66,7 +66,7 @@ __kernel void cn1_cryptonight_r_N(__global uint4 *Scratchpad, __global ulong *st
uint r2 = as_uint2(states[13]).s0; uint r2 = as_uint2(states[13]).s0;
uint r3 = as_uint2(states[13]).s1; uint r3 = as_uint2(states[13]).s1;
#pragma unroll UNROLL_FACTOR #pragma unroll CN_UNROLL
for(int i = 0; i < ITERATIONS; ++i) for(int i = 0; i < ITERATIONS; ++i)
{ {
# ifdef __NV_CL_C_VERSION # ifdef __NV_CL_C_VERSION

View file

@ -33,6 +33,7 @@ namespace xmrig {
class Job; class Job;
class OclLaunchData;
class IOclRunner class IOclRunner
@ -42,6 +43,11 @@ public:
virtual bool selfTest() const = 0; virtual bool selfTest() const = 0;
virtual const char *buildOptions() const = 0; virtual const char *buildOptions() const = 0;
virtual const char *deviceKey() const = 0;
virtual const char *source() const = 0;
virtual const OclLaunchData &data() const = 0;
virtual size_t threadId() const = 0;
virtual void build() = 0;
virtual void run(uint32_t *hashOutput) = 0; virtual void run(uint32_t *hashOutput) = 0;
virtual void set(const Job &job) = 0; virtual void set(const Job &job) = 0;
}; };

View file

@ -51,6 +51,12 @@ if (WITH_OPENCL)
list(APPEND HEADERS_BACKEND_OPENCL src/backend/opencl/runners/OclRxRunner.h) list(APPEND HEADERS_BACKEND_OPENCL src/backend/opencl/runners/OclRxRunner.h)
list(APPEND SOURCES_BACKEND_OPENCL src/backend/opencl/runners/OclRxRunner.cpp) list(APPEND SOURCES_BACKEND_OPENCL src/backend/opencl/runners/OclRxRunner.cpp)
endif() endif()
if (WITH_STRICT_CACHE)
add_definitions(/DXMRIG_STRICT_OPENCL_CACHE)
else()
remove_definitions(/DXMRIG_STRICT_OPENCL_CACHE)
endif()
else() else()
remove_definitions(/DXMRIG_FEATURE_OPENCL) remove_definitions(/DXMRIG_FEATURE_OPENCL)

View file

@ -23,29 +23,49 @@
*/ */
#include "backend/opencl/cl/OclSource.h"
#include "backend/opencl/OclCache.h"
#include "backend/opencl/OclLaunchData.h" #include "backend/opencl/OclLaunchData.h"
#include "backend/opencl/runners/OclBaseRunner.h" #include "backend/opencl/runners/OclBaseRunner.h"
#include "backend/opencl/wrappers/OclLib.h" #include "backend/opencl/wrappers/OclLib.h"
#include "base/io/log/Log.h"
#include "base/net/stratum/Job.h" #include "base/net/stratum/Job.h"
xmrig::OclBaseRunner::OclBaseRunner(size_t, const OclLaunchData &data) : xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) :
m_algorithm(data.algorithm), m_algorithm(data.algorithm),
m_ctx(data.ctx) m_source(OclSource::get(data.algorithm)),
m_data(data),
m_threadId(id)
{ {
cl_int ret; cl_int ret;
m_queue = OclLib::createCommandQueue(m_ctx, data.device.id(), &ret); m_queue = OclLib::createCommandQueue(data.ctx, data.device.id(), &ret);
if (ret != CL_SUCCESS) { if (ret != CL_SUCCESS) {
return; return;
} }
m_input = OclLib::createBuffer(m_ctx, CL_MEM_READ_ONLY, Job::kMaxBlobSize, nullptr, &ret); m_input = OclLib::createBuffer(data.ctx, CL_MEM_READ_ONLY, Job::kMaxBlobSize, nullptr, &ret);
m_output = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100, nullptr, &ret); m_output = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100, nullptr, &ret);
m_deviceKey = data.device.name();
# ifdef XMRIG_STRICT_OPENCL_CACHE
m_deviceKey += ":";
m_deviceKey += data.platform.version();
m_deviceKey += ":";
m_deviceKey += OclLib::getDeviceString(data.device.id(), CL_DRIVER_VERSION);
# endif
# if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__)
m_deviceKey += ":64";
# endif
} }
xmrig::OclBaseRunner::~OclBaseRunner() xmrig::OclBaseRunner::~OclBaseRunner()
{ {
OclLib::releaseProgram(m_program);
OclLib::releaseMemObject(m_input); OclLib::releaseMemObject(m_input);
OclLib::releaseMemObject(m_output); OclLib::releaseMemObject(m_output);
@ -55,14 +75,17 @@ xmrig::OclBaseRunner::~OclBaseRunner()
bool xmrig::OclBaseRunner::selfTest() const bool xmrig::OclBaseRunner::selfTest() const
{ {
return m_queue != nullptr && m_input != nullptr && m_output != nullptr && !m_options.empty(); return m_queue != nullptr && m_input != nullptr && m_output != nullptr && !m_options.empty() && m_source != nullptr;
} }
void xmrig::OclBaseRunner::build()
const char *xmrig::OclBaseRunner::buildOptions() const
{ {
return m_options.c_str(); if (!selfTest()) {
return;
}
m_program = OclCache::build(this);
} }

View file

@ -43,21 +43,31 @@ class OclLaunchData;
class OclBaseRunner : public IOclRunner class OclBaseRunner : public IOclRunner
{ {
public: public:
OclBaseRunner(size_t index, const OclLaunchData &data); OclBaseRunner(size_t id, const OclLaunchData &data);
~OclBaseRunner() override; ~OclBaseRunner() override;
protected: protected:
inline const char *buildOptions() const override { return m_options.c_str(); }
inline const char *deviceKey() const override { return m_deviceKey.c_str(); }
inline const char *source() const override { return m_source; }
inline const OclLaunchData &data() const override { return m_data; }
inline size_t threadId() const override { return m_threadId; }
bool selfTest() const override; bool selfTest() const override;
const char *buildOptions() const override; void build() override;
void run(uint32_t *hashOutput) override; void run(uint32_t *hashOutput) override;
void set(const Job &job) override; void set(const Job &job) override;
protected: protected:
Algorithm m_algorithm; Algorithm m_algorithm;
cl_command_queue m_queue = nullptr; cl_command_queue m_queue = nullptr;
cl_context m_ctx;
cl_mem m_input = nullptr; cl_mem m_input = nullptr;
cl_mem m_output = nullptr; cl_mem m_output = nullptr;
cl_program m_program = nullptr;
const char *m_source;
const OclLaunchData &m_data;
const size_t m_threadId;
std::string m_deviceKey;
std::string m_options; std::string m_options;
}; };

View file

@ -38,16 +38,16 @@ xmrig::OclCnRunner::OclCnRunner(size_t index, const OclLaunchData &data) : OclBa
const size_t g_thd = data.thread.intensity(); const size_t g_thd = data.thread.intensity();
cl_int ret; cl_int ret;
m_scratchpads = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, data.algorithm.l3() * g_thd, nullptr, &ret); m_scratchpads = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, data.algorithm.l3() * g_thd, nullptr, &ret);
if (ret != CL_SUCCESS) { if (ret != CL_SUCCESS) {
return; return;
} }
m_states = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 200 * g_thd, nullptr, &ret); m_states = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, 200 * g_thd, nullptr, &ret);
m_blake256 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret); m_blake256 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
m_groestl256 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret); m_groestl256 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
m_jh256 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret); m_jh256 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
m_skein512 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret); m_skein512 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
uint32_t stridedIndex = data.thread.stridedIndex(); uint32_t stridedIndex = data.thread.stridedIndex();
if (data.device.vendorId() == OCL_VENDOR_NVIDIA) { if (data.device.vendorId() == OCL_VENDOR_NVIDIA) {

View file

@ -375,23 +375,13 @@ cl_int xmrig::OclLib::releaseCommandQueue(cl_command_queue command_queue)
assert(pReleaseCommandQueue != nullptr); assert(pReleaseCommandQueue != nullptr);
assert(pGetCommandQueueInfo != nullptr); assert(pGetCommandQueueInfo != nullptr);
finish(command_queue);
cl_int ret = pReleaseCommandQueue(command_queue); cl_int ret = pReleaseCommandQueue(command_queue);
if (ret != CL_SUCCESS) { if (ret != CL_SUCCESS) {
LOG_ERR(kErrorTemplate, OclError::toString(ret), kReleaseCommandQueue); LOG_ERR(kErrorTemplate, OclError::toString(ret), kReleaseCommandQueue);
} }
cl_uint refs = 0;
ret = pGetCommandQueueInfo(command_queue, CL_QUEUE_REFERENCE_COUNT, sizeof(refs), &refs, nullptr);
if (ret == CL_SUCCESS && refs > 0) {
std::this_thread::sleep_for(std::chrono::milliseconds(200));
}
# ifndef NDEBUG
ret = pGetCommandQueueInfo(command_queue, CL_QUEUE_REFERENCE_COUNT, sizeof(refs), &refs, nullptr);
assert(ret == CL_SUCCESS);
assert(refs == 0);
# endif
return ret; return ret;
} }
@ -447,6 +437,10 @@ cl_int xmrig::OclLib::releaseProgram(cl_program program)
{ {
assert(pReleaseProgram != nullptr); assert(pReleaseProgram != nullptr);
if (program == nullptr) {
return CL_SUCCESS;
}
const cl_int ret = pReleaseProgram(program); const cl_int ret = pReleaseProgram(program);
if (ret != CL_SUCCESS) { if (ret != CL_SUCCESS) {
LOG_ERR(kErrorTemplate, OclError::toString(ret), kReleaseProgram); LOG_ERR(kErrorTemplate, OclError::toString(ret), kReleaseProgram);
@ -500,6 +494,8 @@ cl_program xmrig::OclLib::createProgramWithBinary(cl_context context, cl_uint nu
auto result = pCreateProgramWithBinary(context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret); auto result = pCreateProgramWithBinary(context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret);
if (*errcode_ret != CL_SUCCESS) { if (*errcode_ret != CL_SUCCESS) {
LOG_ERR(kErrorTemplate, OclError::toString(*errcode_ret), kCreateProgramWithBinary); LOG_ERR(kErrorTemplate, OclError::toString(*errcode_ret), kCreateProgramWithBinary);
return nullptr;
} }
return result; return result;
@ -513,6 +509,8 @@ cl_program xmrig::OclLib::createProgramWithSource(cl_context context, cl_uint co
auto result = pCreateProgramWithSource(context, count, strings, lengths, errcode_ret); auto result = pCreateProgramWithSource(context, count, strings, lengths, errcode_ret);
if (*errcode_ret != CL_SUCCESS) { if (*errcode_ret != CL_SUCCESS) {
LOG_ERR(kErrorTemplate, OclError::toString(*errcode_ret), kCreateProgramWithSource); LOG_ERR(kErrorTemplate, OclError::toString(*errcode_ret), kCreateProgramWithSource);
return nullptr;
} }
return result; return result;