Restored OpenCL interleave.
This commit is contained in:
parent
0e362f38bc
commit
9dc2525ce1
11 changed files with 258 additions and 14 deletions
|
@ -16,6 +16,7 @@ option(WITH_ASM "Enable ASM PoW implementations" ON)
|
|||
option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
|
||||
option(WITH_OPENCL "Enable OpenCL backend" OFF)
|
||||
option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
|
||||
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
|
||||
|
||||
option(BUILD_STATIC "Build static binary" OFF)
|
||||
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
|
||||
|
||||
#include <atomic>
|
||||
#include <stdint.h>
|
||||
#include <cstdint>
|
||||
|
||||
|
||||
#include "backend/common/interfaces/IWorker.h"
|
||||
|
|
|
@ -158,7 +158,7 @@ std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, cons
|
|||
return out;
|
||||
}
|
||||
|
||||
out.reserve(threads.count());
|
||||
out.reserve(threads.count() * 2);
|
||||
|
||||
for (const OclThread &thread : threads.data()) {
|
||||
if (thread.index() >= devices.size()) {
|
||||
|
@ -166,8 +166,18 @@ std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, cons
|
|||
continue;
|
||||
}
|
||||
|
||||
for (int64_t affinity : thread.threads()) {
|
||||
out.emplace_back(miner, algorithm, *this, platform, thread, devices[thread.index()], affinity);
|
||||
if (thread.threads().size() > 1) {
|
||||
auto interleave = std::make_shared<OclInterleave>(thread.threads().size());
|
||||
|
||||
for (int64_t affinity : thread.threads()) {
|
||||
OclLaunchData data(miner, algorithm, *this, platform, thread, devices[thread.index()], affinity);
|
||||
data.interleave = interleave;
|
||||
|
||||
out.emplace_back(data);
|
||||
}
|
||||
}
|
||||
else {
|
||||
out.emplace_back(miner, algorithm, *this, platform, thread, devices[thread.index()], thread.threads()[0]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
118
src/backend/opencl/OclInterleave.cpp
Normal file
118
src/backend/opencl/OclInterleave.cpp
Normal file
|
@ -0,0 +1,118 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "backend/opencl/OclInterleave.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
|
||||
|
||||
#include <cinttypes>
|
||||
#include <thread>
|
||||
|
||||
|
||||
uint64_t xmrig::OclInterleave::adjustDelay(size_t id)
|
||||
{
|
||||
const uint64_t t0 = Chrono::steadyMSecs();
|
||||
uint64_t delay = 0;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
|
||||
const uint64_t dt = t0 - m_timestamp;
|
||||
m_timestamp = t0;
|
||||
|
||||
// The perfect interleaving is when N threads on the same GPU start with T/N interval between each other
|
||||
// If a thread starts earlier than 0.75*T/N ms after the previous thread, delay it to restore perfect interleaving
|
||||
if ((dt > 0) && (dt < m_threshold * (m_averageRunTime / m_threads))) {
|
||||
delay = static_cast<uint64_t>(m_averageRunTime / m_threads - dt);
|
||||
m_threshold = 0.75;
|
||||
}
|
||||
}
|
||||
|
||||
if (delay == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (delay >= 400) {
|
||||
delay = 200;
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(delay));
|
||||
|
||||
# ifdef XMRIG_INTERLEAVE_DEBUG
|
||||
LOG_WARN("Thread #%zu was paused for %" PRIu64 " ms to adjust interleaving", id, delay);
|
||||
# endif
|
||||
|
||||
return delay;
|
||||
}
|
||||
|
||||
|
||||
uint64_t xmrig::OclInterleave::resumeDelay(size_t id)
|
||||
{
|
||||
uint64_t delay = 0;
|
||||
|
||||
{
|
||||
constexpr const double firstThreadSpeedupCoeff = 1.25;
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
delay = static_cast<uint64_t>(m_resumeCounter * m_averageRunTime / m_threads / firstThreadSpeedupCoeff);
|
||||
++m_resumeCounter;
|
||||
}
|
||||
|
||||
if (delay == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (delay > 1000) {
|
||||
delay = 1000;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_INTERLEAVE_DEBUG
|
||||
LOG_WARN("Thread #%zu will be paused for %" PRIu64 " ms to before resuming", id, delay);
|
||||
# endif
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(delay));
|
||||
|
||||
return delay;
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclInterleave::setResumeCounter(uint32_t value)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
m_resumeCounter = value;
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclInterleave::setRunTime(uint64_t time)
|
||||
{
|
||||
// averagingBias = 1.0 - only the last delta time is taken into account
|
||||
// averagingBias = 0.5 - the last delta time has the same weight as all the previous ones combined
|
||||
// averagingBias = 0.1 - the last delta time has 10% weight of all the previous ones combined
|
||||
constexpr double averagingBias = 0.1;
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
m_averageRunTime = m_averageRunTime * (1.0 - averagingBias) + time * averagingBias;
|
||||
}
|
63
src/backend/opencl/OclInterleave.h
Normal file
63
src/backend/opencl/OclInterleave.h
Normal file
|
@ -0,0 +1,63 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_OCLINTERLEAVE_H
|
||||
#define XMRIG_OCLINTERLEAVE_H
|
||||
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
class OclInterleave
|
||||
{
|
||||
public:
|
||||
OclInterleave() = delete;
|
||||
inline OclInterleave(size_t threads) : m_threads(threads) {}
|
||||
|
||||
uint64_t adjustDelay(size_t id);
|
||||
uint64_t resumeDelay(size_t id);
|
||||
void setResumeCounter(uint32_t value);
|
||||
void setRunTime(uint64_t time);
|
||||
|
||||
private:
|
||||
const size_t m_threads;
|
||||
double m_averageRunTime = 0.0;
|
||||
double m_threshold = 0.95;
|
||||
std::mutex m_mutex;
|
||||
uint32_t m_resumeCounter = 0;
|
||||
uint64_t m_timestamp = 0;
|
||||
};
|
||||
|
||||
|
||||
using OclInterleavePtr = std::shared_ptr<OclInterleave>;
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
|
||||
#endif /* XMRIG_OCLINTERLEAVE_H */
|
|
@ -27,6 +27,7 @@
|
|||
#define XMRIG_OCLLAUNCHDATA_H
|
||||
|
||||
|
||||
#include "backend/opencl/OclInterleave.h"
|
||||
#include "backend/opencl/OclThread.h"
|
||||
#include "backend/opencl/wrappers/OclDevice.h"
|
||||
#include "backend/opencl/wrappers/OclPlatform.h"
|
||||
|
@ -64,6 +65,7 @@ public:
|
|||
const OclDevice device;
|
||||
const OclPlatform platform;
|
||||
const OclThread thread;
|
||||
OclInterleavePtr interleave;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#define XMRIG_OCLTHREAD_H
|
||||
|
||||
|
||||
#include "crypto/common/Algorithm.h"
|
||||
#include "rapidjson/fwd.h"
|
||||
|
||||
|
||||
|
@ -39,7 +40,8 @@ class OclThread
|
|||
{
|
||||
public:
|
||||
OclThread() = delete;
|
||||
OclThread(uint32_t index, uint32_t intensity, uint32_t worksize, uint32_t stridedIndex, uint32_t memChunk, uint32_t threads) :
|
||||
OclThread(uint32_t index, uint32_t intensity, uint32_t worksize, uint32_t stridedIndex, uint32_t memChunk, uint32_t threads, const Algorithm &algorithm) :
|
||||
m_algorithm(algorithm),
|
||||
m_threads(threads, -1),
|
||||
m_index(index),
|
||||
m_memChunk(memChunk),
|
||||
|
@ -72,6 +74,7 @@ public:
|
|||
private:
|
||||
inline void setIntensity(uint32_t intensity) { m_intensity = intensity / m_worksize * m_worksize; }
|
||||
|
||||
Algorithm m_algorithm;
|
||||
int m_datasetHost = -1;
|
||||
std::vector<int64_t> m_threads;
|
||||
uint32_t m_bfactor = 6;
|
||||
|
|
|
@ -24,12 +24,11 @@
|
|||
*/
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <thread>
|
||||
|
||||
|
||||
#include "backend/opencl/OclWorker.h"
|
||||
|
||||
#include "backend/opencl/runners/OclCnRunner.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include "core/Miner.h"
|
||||
#include "crypto/common/Nonce.h"
|
||||
#include "net/JobResults.h"
|
||||
|
@ -40,6 +39,10 @@
|
|||
#endif
|
||||
|
||||
|
||||
#include <cassert>
|
||||
#include <thread>
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
|
@ -57,7 +60,8 @@ xmrig::OclWorker::OclWorker(size_t id, const OclLaunchData &data) :
|
|||
Worker(id, data.affinity, -1),
|
||||
m_algorithm(data.algorithm),
|
||||
m_miner(data.miner),
|
||||
m_intensity(data.thread.intensity())
|
||||
m_intensity(data.thread.intensity()),
|
||||
m_interleave(data.interleave)
|
||||
{
|
||||
switch (m_algorithm.family()) {
|
||||
case Algorithm::RANDOM_X:
|
||||
|
@ -101,6 +105,10 @@ void xmrig::OclWorker::start()
|
|||
|
||||
while (Nonce::sequence(Nonce::OPENCL) > 0) {
|
||||
if (Nonce::isPaused()) {
|
||||
if (m_interleave) {
|
||||
m_interleave->setResumeCounter(0);
|
||||
}
|
||||
|
||||
do {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200));
|
||||
}
|
||||
|
@ -110,11 +118,19 @@ void xmrig::OclWorker::start()
|
|||
break;
|
||||
}
|
||||
|
||||
if (m_interleave) {
|
||||
m_interleave->resumeDelay(m_id);
|
||||
}
|
||||
|
||||
consumeJob();
|
||||
}
|
||||
|
||||
while (!Nonce::isOutdated(Nonce::OPENCL, m_job.sequence())) {
|
||||
storeStats();
|
||||
if (m_interleave) {
|
||||
m_interleave->adjustDelay(m_id);
|
||||
}
|
||||
|
||||
const uint64_t t = Chrono::steadyMSecs();
|
||||
|
||||
if (!m_runner->run(*m_job.nonce(), results)) {
|
||||
return;
|
||||
|
@ -125,8 +141,8 @@ void xmrig::OclWorker::start()
|
|||
}
|
||||
|
||||
m_job.nextRound(roundSize(m_intensity), m_intensity);
|
||||
m_count += m_intensity;
|
||||
|
||||
storeStats(t);
|
||||
std::this_thread::yield();
|
||||
}
|
||||
|
||||
|
@ -144,3 +160,19 @@ void xmrig::OclWorker::consumeJob()
|
|||
m_job.add(m_miner->job(), Nonce::sequence(Nonce::OPENCL), roundSize(m_intensity) * m_intensity);
|
||||
m_runner->set(m_job.currentJob(), m_job.blob());
|
||||
}
|
||||
|
||||
|
||||
void xmrig::OclWorker::storeStats(uint64_t t)
|
||||
{
|
||||
if (Nonce::isPaused()) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_count += m_intensity;
|
||||
|
||||
if (m_interleave) {
|
||||
m_interleave->setRunTime(Chrono::steadyMSecs() - t);
|
||||
}
|
||||
|
||||
Worker::storeStats();
|
||||
}
|
||||
|
|
|
@ -42,20 +42,29 @@ class IOclRunner;
|
|||
class OclWorker : public Worker
|
||||
{
|
||||
public:
|
||||
OclWorker() = delete;
|
||||
OclWorker(const OclWorker &other) = delete;
|
||||
OclWorker(OclWorker &&other) = delete;
|
||||
OclWorker(size_t id, const OclLaunchData &data);
|
||||
|
||||
~OclWorker() override;
|
||||
|
||||
OclWorker &operator=(const OclWorker &other) = delete;
|
||||
OclWorker &operator=(OclWorker &&other) = delete;
|
||||
|
||||
protected:
|
||||
bool selfTest() override;
|
||||
void start() override;
|
||||
|
||||
private:
|
||||
void consumeJob();
|
||||
void storeStats(uint64_t ts);
|
||||
|
||||
const Algorithm m_algorithm;
|
||||
const Miner *m_miner;
|
||||
const uint32_t m_intensity;
|
||||
IOclRunner *m_runner = nullptr;
|
||||
OclInterleavePtr m_interleave;
|
||||
WorkerJob<1> m_job;
|
||||
};
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ if (WITH_OPENCL)
|
|||
src/backend/opencl/OclBackend.h
|
||||
src/backend/opencl/OclCache.h
|
||||
src/backend/opencl/OclConfig.h
|
||||
src/backend/opencl/OclInterleave.h
|
||||
src/backend/opencl/OclLaunchData.h
|
||||
src/backend/opencl/OclThread.h
|
||||
src/backend/opencl/OclThreads.h
|
||||
|
@ -38,6 +39,7 @@ if (WITH_OPENCL)
|
|||
src/backend/opencl/OclBackend.cpp
|
||||
src/backend/opencl/OclCache.cpp
|
||||
src/backend/opencl/OclConfig.cpp
|
||||
src/backend/opencl/OclInterleave.cpp
|
||||
src/backend/opencl/OclLaunchData.cpp
|
||||
src/backend/opencl/OclThread.cpp
|
||||
src/backend/opencl/OclThreads.cpp
|
||||
|
@ -69,6 +71,10 @@ if (WITH_OPENCL)
|
|||
else()
|
||||
remove_definitions(/DXMRIG_STRICT_OPENCL_CACHE)
|
||||
endif()
|
||||
|
||||
if (WITH_INTERLEAVE_DEBUG_LOG)
|
||||
add_definitions(/DXMRIG_INTERLEAVE_DEBUG)
|
||||
endif()
|
||||
else()
|
||||
remove_definitions(/DXMRIG_FEATURE_OPENCL)
|
||||
|
||||
|
|
|
@ -186,7 +186,7 @@ void xmrig::OclDevice::generate(const Algorithm &algorithm, OclThreads &threads)
|
|||
const uint32_t memChunk = getMemChunk(algorithm);
|
||||
const uint32_t threadCount = ((globalMem() - intensity * 2 * algorithm.l3()) > 128 * oneMiB) ? 2 : 1;
|
||||
|
||||
threads.add(OclThread(index(), intensity, worksize, stridedIndex, memChunk, threadCount));
|
||||
threads.add(OclThread(index(), intensity, worksize, stridedIndex, memChunk, threadCount, algorithm));
|
||||
}
|
||||
|
||||
|
||||
|
@ -258,7 +258,7 @@ uint32_t xmrig::OclDevice::getPossibleIntensity(const Algorithm &algorithm) cons
|
|||
const size_t minFreeMem = (maxThreads == 40000u ? 512u : 128u) * oneMiB;
|
||||
const size_t availableMem = freeMem() - minFreeMem;
|
||||
const size_t perThread = algorithm.l3() + 224u;
|
||||
const uint32_t maxIntensity = static_cast<uint32_t>(availableMem / perThread);
|
||||
const auto maxIntensity = static_cast<uint32_t>(availableMem / perThread);
|
||||
|
||||
return std::min<uint32_t>(maxThreads, maxIntensity);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue