Restored OpenCL interleave.

This commit is contained in:
XMRig 2019-09-06 11:43:02 +07:00
parent 0e362f38bc
commit 9dc2525ce1
11 changed files with 258 additions and 14 deletions

View file

@ -16,6 +16,7 @@ option(WITH_ASM "Enable ASM PoW implementations" ON)
option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF) option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
option(WITH_OPENCL "Enable OpenCL backend" OFF) option(WITH_OPENCL "Enable OpenCL backend" OFF)
option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON) option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
option(BUILD_STATIC "Build static binary" OFF) option(BUILD_STATIC "Build static binary" OFF)
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0) option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)

View file

@ -28,7 +28,7 @@
#include <atomic> #include <atomic>
#include <stdint.h> #include <cstdint>
#include "backend/common/interfaces/IWorker.h" #include "backend/common/interfaces/IWorker.h"

View file

@ -158,7 +158,7 @@ std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, cons
return out; return out;
} }
out.reserve(threads.count()); out.reserve(threads.count() * 2);
for (const OclThread &thread : threads.data()) { for (const OclThread &thread : threads.data()) {
if (thread.index() >= devices.size()) { if (thread.index() >= devices.size()) {
@ -166,8 +166,18 @@ std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, cons
continue; continue;
} }
for (int64_t affinity : thread.threads()) { if (thread.threads().size() > 1) {
out.emplace_back(miner, algorithm, *this, platform, thread, devices[thread.index()], affinity); auto interleave = std::make_shared<OclInterleave>(thread.threads().size());
for (int64_t affinity : thread.threads()) {
OclLaunchData data(miner, algorithm, *this, platform, thread, devices[thread.index()], affinity);
data.interleave = interleave;
out.emplace_back(data);
}
}
else {
out.emplace_back(miner, algorithm, *this, platform, thread, devices[thread.index()], thread.threads()[0]);
} }
} }

View file

@ -0,0 +1,118 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "backend/opencl/OclInterleave.h"
#include "base/io/log/Log.h"
#include "base/tools/Chrono.h"
#include <cinttypes>
#include <thread>
uint64_t xmrig::OclInterleave::adjustDelay(size_t id)
{
const uint64_t t0 = Chrono::steadyMSecs();
uint64_t delay = 0;
{
std::lock_guard<std::mutex> lock(m_mutex);
const uint64_t dt = t0 - m_timestamp;
m_timestamp = t0;
// The perfect interleaving is when N threads on the same GPU start with T/N interval between each other
// If a thread starts earlier than 0.75*T/N ms after the previous thread, delay it to restore perfect interleaving
if ((dt > 0) && (dt < m_threshold * (m_averageRunTime / m_threads))) {
delay = static_cast<uint64_t>(m_averageRunTime / m_threads - dt);
m_threshold = 0.75;
}
}
if (delay == 0) {
return 0;
}
if (delay >= 400) {
delay = 200;
}
std::this_thread::sleep_for(std::chrono::milliseconds(delay));
# ifdef XMRIG_INTERLEAVE_DEBUG
LOG_WARN("Thread #%zu was paused for %" PRIu64 " ms to adjust interleaving", id, delay);
# endif
return delay;
}
uint64_t xmrig::OclInterleave::resumeDelay(size_t id)
{
uint64_t delay = 0;
{
constexpr const double firstThreadSpeedupCoeff = 1.25;
std::lock_guard<std::mutex> lock(m_mutex);
delay = static_cast<uint64_t>(m_resumeCounter * m_averageRunTime / m_threads / firstThreadSpeedupCoeff);
++m_resumeCounter;
}
if (delay == 0) {
return 0;
}
if (delay > 1000) {
delay = 1000;
}
# ifdef XMRIG_INTERLEAVE_DEBUG
LOG_WARN("Thread #%zu will be paused for %" PRIu64 " ms to before resuming", id, delay);
# endif
std::this_thread::sleep_for(std::chrono::milliseconds(delay));
return delay;
}
void xmrig::OclInterleave::setResumeCounter(uint32_t value)
{
std::lock_guard<std::mutex> lock(m_mutex);
m_resumeCounter = value;
}
void xmrig::OclInterleave::setRunTime(uint64_t time)
{
// averagingBias = 1.0 - only the last delta time is taken into account
// averagingBias = 0.5 - the last delta time has the same weight as all the previous ones combined
// averagingBias = 0.1 - the last delta time has 10% weight of all the previous ones combined
constexpr double averagingBias = 0.1;
std::lock_guard<std::mutex> lock(m_mutex);
m_averageRunTime = m_averageRunTime * (1.0 - averagingBias) + time * averagingBias;
}

View file

@ -0,0 +1,63 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_OCLINTERLEAVE_H
#define XMRIG_OCLINTERLEAVE_H
#include <memory>
#include <mutex>
namespace xmrig {
class OclInterleave
{
public:
OclInterleave() = delete;
inline OclInterleave(size_t threads) : m_threads(threads) {}
uint64_t adjustDelay(size_t id);
uint64_t resumeDelay(size_t id);
void setResumeCounter(uint32_t value);
void setRunTime(uint64_t time);
private:
const size_t m_threads;
double m_averageRunTime = 0.0;
double m_threshold = 0.95;
std::mutex m_mutex;
uint32_t m_resumeCounter = 0;
uint64_t m_timestamp = 0;
};
using OclInterleavePtr = std::shared_ptr<OclInterleave>;
} /* namespace xmrig */
#endif /* XMRIG_OCLINTERLEAVE_H */

View file

@ -27,6 +27,7 @@
#define XMRIG_OCLLAUNCHDATA_H #define XMRIG_OCLLAUNCHDATA_H
#include "backend/opencl/OclInterleave.h"
#include "backend/opencl/OclThread.h" #include "backend/opencl/OclThread.h"
#include "backend/opencl/wrappers/OclDevice.h" #include "backend/opencl/wrappers/OclDevice.h"
#include "backend/opencl/wrappers/OclPlatform.h" #include "backend/opencl/wrappers/OclPlatform.h"
@ -64,6 +65,7 @@ public:
const OclDevice device; const OclDevice device;
const OclPlatform platform; const OclPlatform platform;
const OclThread thread; const OclThread thread;
OclInterleavePtr interleave;
}; };

View file

@ -26,6 +26,7 @@
#define XMRIG_OCLTHREAD_H #define XMRIG_OCLTHREAD_H
#include "crypto/common/Algorithm.h"
#include "rapidjson/fwd.h" #include "rapidjson/fwd.h"
@ -39,7 +40,8 @@ class OclThread
{ {
public: public:
OclThread() = delete; OclThread() = delete;
OclThread(uint32_t index, uint32_t intensity, uint32_t worksize, uint32_t stridedIndex, uint32_t memChunk, uint32_t threads) : OclThread(uint32_t index, uint32_t intensity, uint32_t worksize, uint32_t stridedIndex, uint32_t memChunk, uint32_t threads, const Algorithm &algorithm) :
m_algorithm(algorithm),
m_threads(threads, -1), m_threads(threads, -1),
m_index(index), m_index(index),
m_memChunk(memChunk), m_memChunk(memChunk),
@ -72,6 +74,7 @@ public:
private: private:
inline void setIntensity(uint32_t intensity) { m_intensity = intensity / m_worksize * m_worksize; } inline void setIntensity(uint32_t intensity) { m_intensity = intensity / m_worksize * m_worksize; }
Algorithm m_algorithm;
int m_datasetHost = -1; int m_datasetHost = -1;
std::vector<int64_t> m_threads; std::vector<int64_t> m_threads;
uint32_t m_bfactor = 6; uint32_t m_bfactor = 6;

View file

@ -24,12 +24,11 @@
*/ */
#include <assert.h>
#include <thread>
#include "backend/opencl/OclWorker.h" #include "backend/opencl/OclWorker.h"
#include "backend/opencl/runners/OclCnRunner.h" #include "backend/opencl/runners/OclCnRunner.h"
#include "base/io/log/Log.h"
#include "base/tools/Chrono.h"
#include "core/Miner.h" #include "core/Miner.h"
#include "crypto/common/Nonce.h" #include "crypto/common/Nonce.h"
#include "net/JobResults.h" #include "net/JobResults.h"
@ -40,6 +39,10 @@
#endif #endif
#include <cassert>
#include <thread>
namespace xmrig { namespace xmrig {
@ -57,7 +60,8 @@ xmrig::OclWorker::OclWorker(size_t id, const OclLaunchData &data) :
Worker(id, data.affinity, -1), Worker(id, data.affinity, -1),
m_algorithm(data.algorithm), m_algorithm(data.algorithm),
m_miner(data.miner), m_miner(data.miner),
m_intensity(data.thread.intensity()) m_intensity(data.thread.intensity()),
m_interleave(data.interleave)
{ {
switch (m_algorithm.family()) { switch (m_algorithm.family()) {
case Algorithm::RANDOM_X: case Algorithm::RANDOM_X:
@ -101,6 +105,10 @@ void xmrig::OclWorker::start()
while (Nonce::sequence(Nonce::OPENCL) > 0) { while (Nonce::sequence(Nonce::OPENCL) > 0) {
if (Nonce::isPaused()) { if (Nonce::isPaused()) {
if (m_interleave) {
m_interleave->setResumeCounter(0);
}
do { do {
std::this_thread::sleep_for(std::chrono::milliseconds(200)); std::this_thread::sleep_for(std::chrono::milliseconds(200));
} }
@ -110,11 +118,19 @@ void xmrig::OclWorker::start()
break; break;
} }
if (m_interleave) {
m_interleave->resumeDelay(m_id);
}
consumeJob(); consumeJob();
} }
while (!Nonce::isOutdated(Nonce::OPENCL, m_job.sequence())) { while (!Nonce::isOutdated(Nonce::OPENCL, m_job.sequence())) {
storeStats(); if (m_interleave) {
m_interleave->adjustDelay(m_id);
}
const uint64_t t = Chrono::steadyMSecs();
if (!m_runner->run(*m_job.nonce(), results)) { if (!m_runner->run(*m_job.nonce(), results)) {
return; return;
@ -125,8 +141,8 @@ void xmrig::OclWorker::start()
} }
m_job.nextRound(roundSize(m_intensity), m_intensity); m_job.nextRound(roundSize(m_intensity), m_intensity);
m_count += m_intensity;
storeStats(t);
std::this_thread::yield(); std::this_thread::yield();
} }
@ -144,3 +160,19 @@ void xmrig::OclWorker::consumeJob()
m_job.add(m_miner->job(), Nonce::sequence(Nonce::OPENCL), roundSize(m_intensity) * m_intensity); m_job.add(m_miner->job(), Nonce::sequence(Nonce::OPENCL), roundSize(m_intensity) * m_intensity);
m_runner->set(m_job.currentJob(), m_job.blob()); m_runner->set(m_job.currentJob(), m_job.blob());
} }
void xmrig::OclWorker::storeStats(uint64_t t)
{
if (Nonce::isPaused()) {
return;
}
m_count += m_intensity;
if (m_interleave) {
m_interleave->setRunTime(Chrono::steadyMSecs() - t);
}
Worker::storeStats();
}

View file

@ -42,20 +42,29 @@ class IOclRunner;
class OclWorker : public Worker class OclWorker : public Worker
{ {
public: public:
OclWorker() = delete;
OclWorker(const OclWorker &other) = delete;
OclWorker(OclWorker &&other) = delete;
OclWorker(size_t id, const OclLaunchData &data); OclWorker(size_t id, const OclLaunchData &data);
~OclWorker() override; ~OclWorker() override;
OclWorker &operator=(const OclWorker &other) = delete;
OclWorker &operator=(OclWorker &&other) = delete;
protected: protected:
bool selfTest() override; bool selfTest() override;
void start() override; void start() override;
private: private:
void consumeJob(); void consumeJob();
void storeStats(uint64_t ts);
const Algorithm m_algorithm; const Algorithm m_algorithm;
const Miner *m_miner; const Miner *m_miner;
const uint32_t m_intensity; const uint32_t m_intensity;
IOclRunner *m_runner = nullptr; IOclRunner *m_runner = nullptr;
OclInterleavePtr m_interleave;
WorkerJob<1> m_job; WorkerJob<1> m_job;
}; };

View file

@ -13,6 +13,7 @@ if (WITH_OPENCL)
src/backend/opencl/OclBackend.h src/backend/opencl/OclBackend.h
src/backend/opencl/OclCache.h src/backend/opencl/OclCache.h
src/backend/opencl/OclConfig.h src/backend/opencl/OclConfig.h
src/backend/opencl/OclInterleave.h
src/backend/opencl/OclLaunchData.h src/backend/opencl/OclLaunchData.h
src/backend/opencl/OclThread.h src/backend/opencl/OclThread.h
src/backend/opencl/OclThreads.h src/backend/opencl/OclThreads.h
@ -38,6 +39,7 @@ if (WITH_OPENCL)
src/backend/opencl/OclBackend.cpp src/backend/opencl/OclBackend.cpp
src/backend/opencl/OclCache.cpp src/backend/opencl/OclCache.cpp
src/backend/opencl/OclConfig.cpp src/backend/opencl/OclConfig.cpp
src/backend/opencl/OclInterleave.cpp
src/backend/opencl/OclLaunchData.cpp src/backend/opencl/OclLaunchData.cpp
src/backend/opencl/OclThread.cpp src/backend/opencl/OclThread.cpp
src/backend/opencl/OclThreads.cpp src/backend/opencl/OclThreads.cpp
@ -69,6 +71,10 @@ if (WITH_OPENCL)
else() else()
remove_definitions(/DXMRIG_STRICT_OPENCL_CACHE) remove_definitions(/DXMRIG_STRICT_OPENCL_CACHE)
endif() endif()
if (WITH_INTERLEAVE_DEBUG_LOG)
add_definitions(/DXMRIG_INTERLEAVE_DEBUG)
endif()
else() else()
remove_definitions(/DXMRIG_FEATURE_OPENCL) remove_definitions(/DXMRIG_FEATURE_OPENCL)

View file

@ -186,7 +186,7 @@ void xmrig::OclDevice::generate(const Algorithm &algorithm, OclThreads &threads)
const uint32_t memChunk = getMemChunk(algorithm); const uint32_t memChunk = getMemChunk(algorithm);
const uint32_t threadCount = ((globalMem() - intensity * 2 * algorithm.l3()) > 128 * oneMiB) ? 2 : 1; const uint32_t threadCount = ((globalMem() - intensity * 2 * algorithm.l3()) > 128 * oneMiB) ? 2 : 1;
threads.add(OclThread(index(), intensity, worksize, stridedIndex, memChunk, threadCount)); threads.add(OclThread(index(), intensity, worksize, stridedIndex, memChunk, threadCount, algorithm));
} }
@ -258,7 +258,7 @@ uint32_t xmrig::OclDevice::getPossibleIntensity(const Algorithm &algorithm) cons
const size_t minFreeMem = (maxThreads == 40000u ? 512u : 128u) * oneMiB; const size_t minFreeMem = (maxThreads == 40000u ? 512u : 128u) * oneMiB;
const size_t availableMem = freeMem() - minFreeMem; const size_t availableMem = freeMem() - minFreeMem;
const size_t perThread = algorithm.l3() + 224u; const size_t perThread = algorithm.l3() + 224u;
const uint32_t maxIntensity = static_cast<uint32_t>(availableMem / perThread); const auto maxIntensity = static_cast<uint32_t>(availableMem / perThread);
return std::min<uint32_t>(maxThreads, maxIntensity); return std::min<uint32_t>(maxThreads, maxIntensity);
} }