OpenCL RandomX WIP

2019-09-11 15:48:02 +07:00 · 2019-09-11 15:48:02 +07:00 · 4c90f9960e
commit 4c90f9960e
parent ff89ec660c
72 changed files with 1717 additions and 505 deletions
--- a/scripts/generate_cl.js
+++ b/scripts/generate_cl.js
@ -64,8 +64,8 @@ function rx()
        'randomx_jit.cl'
    ]);

-    rx = rx.replace(/	#include "fillAes1Rx4.cl"/g, fs.readFileSync('fillAes1Rx4.cl', 'utf8'));
-    rx = rx.replace(/	#include "blake2b_double_block.cl"/g, fs.readFileSync('blake2b_double_block.cl', 'utf8'));
+    rx = rx.replace(/(\t| )*#include "fillAes1Rx4.cl"/g, fs.readFileSync('fillAes1Rx4.cl', 'utf8'));
+    rx = rx.replace(/(\t| )*#include "blake2b_double_block.cl"/g, fs.readFileSync('blake2b_double_block.cl', 'utf8'));

    //fs.writeFileSync('randomx_gen.cl', rx);
    fs.writeFileSync('randomx_cl.h', text2h(rx, 'xmrig', 'randomx_cl'));
--- a/src/backend/common/Tags.h
+++ b/src/backend/common/Tags.h
@ -0,0 +1,44 @@
+/* XMRig
+ * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
+ * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
+ * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
+ * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
+ * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
+ * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
+ * Copyright 2018      Lee Clagett <https://github.com/vtnerd>
+ * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef XMRIG_TAGS_H
+#define XMRIG_TAGS_H
+
+
+namespace xmrig {
+
+
+const char *cpu_tag();
+
+
+#ifdef XMRIG_FEATURE_OPENCL
+const char *ocl_tag();
+#endif
+
+
+} // namespace xmrig
+
+
+#endif /* XMRIG_TAGS_H */
--- a/src/backend/common/Workers.cpp
+++ b/src/backend/common/Workers.cpp
@ -151,12 +151,13 @@ xmrig::IWorker *xmrig::Workers<T>::create(Thread<T> *)
 template<class T>
 void xmrig::Workers<T>::onReady(void *arg)
 {
-    Thread<T> *handle = static_cast<Thread<T>* >(arg);
+    auto handle = static_cast<Thread<T>* >(arg);

    IWorker *worker = create(handle);
    if (!worker || !worker->selfTest()) {
        LOG_ERR("thread %zu error: \"hash self-test failed\".", worker->id());

+        delete worker;
        return;
    }

--- a/src/backend/common/common.cmake
+++ b/src/backend/common/common.cmake
@ -1,5 +1,6 @@
 set(HEADERS_BACKEND_COMMON
    src/backend/common/Hashrate.h
+    src/backend/common/Tags.h
    src/backend/common/interfaces/IBackend.h
    src/backend/common/interfaces/IRxListener.h
    src/backend/common/interfaces/IThread.h
--- a/src/backend/cpu/CpuBackend.cpp
+++ b/src/backend/cpu/CpuBackend.cpp
@ -28,6 +28,7 @@

 #include "backend/common/Hashrate.h"
 #include "backend/common/interfaces/IWorker.h"
+#include "backend/common/Tags.h"
 #include "backend/common/Workers.h"
 #include "backend/cpu/Cpu.h"
 #include "backend/cpu/CpuBackend.h"
@ -196,6 +197,12 @@ public:
 } // namespace xmrig


+const char *xmrig::cpu_tag()
+{
+    return tag;
+}
+
+
 xmrig::CpuBackend::CpuBackend(Controller *controller) :
    d_ptr(new CpuBackendPrivate(controller))
 {
--- a/src/backend/opencl/OclBackend.cpp
+++ b/src/backend/opencl/OclBackend.cpp
@ -28,6 +28,7 @@

 #include "backend/common/Hashrate.h"
 #include "backend/common/interfaces/IWorker.h"
+#include "backend/common/Tags.h"
 #include "backend/common/Workers.h"
 #include "backend/opencl/OclBackend.h"
 #include "backend/opencl/OclConfig.h"
@ -192,6 +193,12 @@ public:
 } // namespace xmrig


+const char *xmrig::ocl_tag()
+{
+    return tag;
+}
+
+
 xmrig::OclBackend::OclBackend(Controller *controller) :
    d_ptr(new OclBackendPrivate(controller))
 {
--- a/src/backend/opencl/OclCache.cpp
+++ b/src/backend/opencl/OclCache.cpp
@ -30,6 +30,7 @@


 #include "3rdparty/base32/base32.h"
+#include "backend/common/Tags.h"
 #include "backend/opencl/interfaces/IOclRunner.h"
 #include "backend/opencl/OclCache.h"
 #include "backend/opencl/OclLaunchData.h"
@ -42,13 +43,12 @@
 namespace xmrig {


-static const char *tag = MAGENTA_BG_BOLD(WHITE_BOLD_S " ocl ");
 static std::mutex mutex;


 static cl_program createFromSource(const IOclRunner *runner)
 {
-    LOG_INFO("%s GPU " WHITE_BOLD("#%zu") " " YELLOW_BOLD("compiling..."), tag, runner->data().device.index());
+    LOG_INFO("%s GPU " WHITE_BOLD("#%zu") " " YELLOW_BOLD("compiling..."), ocl_tag(), runner->data().device.index());

    cl_int ret;
    cl_device_id device = runner->data().device.id();
@ -68,7 +68,7 @@ static cl_program createFromSource(const IOclRunner *runner)
    }

    LOG_INFO("%s GPU " WHITE_BOLD("#%zu") " " GREEN_BOLD("compilation completed") BLACK_BOLD(" (%" PRIu64 " ms)"),
-             tag, runner->data().device.index(), Chrono::steadyMSecs() - ts);
+             ocl_tag(), runner->data().device.index(), Chrono::steadyMSecs() - ts);

    return program;
 }
--- a/src/backend/opencl/OclCache.h
+++ b/src/backend/opencl/OclCache.h
@ -29,7 +29,7 @@
 #include <string>


-typedef struct _cl_program *cl_program;
+using cl_program = struct _cl_program *;


 namespace xmrig {
--- a/src/backend/opencl/OclConfig.cpp
+++ b/src/backend/opencl/OclConfig.cpp
@ -166,6 +166,10 @@ std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, cons
            continue;
        }

+#       ifdef XMRIG_ALGO_RANDOMX
+        auto dataset = algorithm.family() == Algorithm::RANDOM_X ? std::make_shared<OclRxDataset>() : nullptr;
+#       endif
+
        if (thread.threads().size() > 1) {
            auto interleave = std::make_shared<OclInterleave>(thread.threads().size());

@ -173,11 +177,21 @@ std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, cons
                OclLaunchData data(miner, algorithm, *this, platform, thread, devices[thread.index()], affinity);
                data.interleave = interleave;

-                out.emplace_back(data);
+#               ifdef XMRIG_ALGO_RANDOMX
+                data.dataset = dataset;
+#               endif
+
+                out.emplace_back(std::move(data));
            }
        }
        else {
-           out.emplace_back(miner, algorithm, *this, platform, thread, devices[thread.index()], thread.threads()[0]);
+            OclLaunchData data(miner, algorithm, *this, platform, thread, devices[thread.index()], thread.threads().front());
+
+#           ifdef XMRIG_ALGO_RANDOMX
+            data.dataset = dataset;
+#           endif
+
+            out.emplace_back(std::move(data));
        }
    }

--- a/src/backend/opencl/OclLaunchData.h
+++ b/src/backend/opencl/OclLaunchData.h
@ -35,6 +35,11 @@
 #include "crypto/common/Nonce.h"


+#ifdef XMRIG_ALGO_RANDOMX
+#   include "backend/opencl/runners/tools/OclRxDataset.h"
+#endif
+
+
 using cl_context = struct _cl_context *;


@ -66,6 +71,10 @@ public:
    const OclPlatform platform;
    const OclThread thread;
    OclInterleavePtr interleave;
+
+#   ifdef XMRIG_ALGO_RANDOMX
+    OclRxDatasetPtr dataset;
+#   endif
 };


--- a/src/backend/opencl/OclThread.cpp
+++ b/src/backend/opencl/OclThread.cpp
@ -61,7 +61,7 @@ xmrig::OclThread::OclThread(const rapidjson::Value &value)
 #   ifdef XMRIG_ALGO_RANDOMX
    m_bfactor     = Json::getUint(value, kBFactor, 6);
    m_gcnAsm      = Json::getUint(value, kGCNAsm, m_gcnAsm);
-    m_datasetHost = Json::getInt(value, kDatasetHost, m_datasetHost);
+    m_datasetHost = Json::getBool(value, kDatasetHost, m_datasetHost);
 #   endif

    const rapidjson::Value &si = Json::getArray(value, kStridedIndex);
@ -134,11 +134,11 @@ rapidjson::Value xmrig::OclThread::toJSON(rapidjson::Document &doc) const
    out.AddMember(StringRef(kUnroll),       unrollFactor(), allocator);

 #   ifdef XMRIG_ALGO_RANDOMX
-    if (m_datasetHost != -1) {
-        out.AddMember(StringRef(kBFactor),      bfactor(), allocator);
-        out.AddMember(StringRef(kGCNAsm),       gcnAsm(), allocator);
-        out.AddMember(StringRef(kDatasetHost),  datasetHost(), allocator);
-    }
+//    if (m_datasetHost != -1) {
+//        out.AddMember(StringRef(kBFactor),      bfactor(), allocator);
+//        out.AddMember(StringRef(kGCNAsm),       gcnAsm(), allocator);
+//        out.AddMember(StringRef(kDatasetHost),  isDatasetHost(), allocator);
+//    }
 #   endif

    return out;
--- a/src/backend/opencl/OclThread.h
+++ b/src/backend/opencl/OclThread.h
@ -68,11 +68,11 @@ public:

    OclThread(const rapidjson::Value &value);

+    inline bool isAsm() const                               { return m_gcnAsm; }
+    inline bool isDatasetHost() const                       { return m_datasetHost; }
    inline bool isValid() const                             { return m_intensity > 0; }
    inline const std::vector<int64_t> &threads() const      { return m_threads; }
    inline uint32_t bfactor() const                         { return m_bfactor; }
-    inline uint32_t datasetHost() const                     { return m_datasetHost < 0 ? 0 : static_cast<uint32_t>(m_datasetHost); }
-    inline uint32_t gcnAsm() const                          { return m_gcnAsm; }
    inline uint32_t index() const                           { return m_index; }
    inline uint32_t intensity() const                       { return m_intensity; }
    inline uint32_t memChunk() const                        { return m_memChunk; }
@ -95,11 +95,11 @@ private:

    inline void setIntensity(uint32_t intensity)            { m_intensity = intensity / m_worksize * m_worksize; }

-    int m_datasetHost               = -1;
+    bool m_datasetHost              = false;
+    bool m_gcnAsm                   = false;
    std::bitset<FIELD_MAX> m_fields = 1;
    std::vector<int64_t> m_threads;
    uint32_t m_bfactor              = 6;
-    uint32_t m_gcnAsm               = 1;
    uint32_t m_index                = 0;
    uint32_t m_intensity            = 0;
    uint32_t m_memChunk             = 2;
--- a/src/backend/opencl/OclWorker.cpp
+++ b/src/backend/opencl/OclWorker.cpp
@ -26,6 +26,7 @@

 #include "backend/opencl/OclWorker.h"

+#include "backend/common/Tags.h"
 #include "backend/opencl/runners/OclCnRunner.h"
 #include "base/io/log/Log.h"
 #include "base/tools/Chrono.h"
@ -35,7 +36,8 @@


 #ifdef XMRIG_ALGO_RANDOMX
-#   include "backend/opencl/runners/OclRxRunner.h"
+#   include "backend/opencl/runners/OclRxJitRunner.h"
+#   include "backend/opencl/runners/OclRxVmRunner.h"
 #endif

 #ifdef XMRIG_ALGO_CN_GPU
@ -58,6 +60,12 @@ static inline bool isReady()                         { return !Nonce::isPaused()
 static inline uint32_t roundSize(uint32_t intensity) { return kReserveCount / intensity + 1; }


+static inline void printError(size_t id, const char *error)
+{
+    LOG_ERR("%s" RED_S " thread " RED_BOLD("#%zu") RED_S " failed with error " RED_BOLD("%s"), ocl_tag(), id, error);
+}
+
+
 } // namespace xmrig


@ -72,7 +80,12 @@ xmrig::OclWorker::OclWorker(size_t id, const OclLaunchData &data) :
    switch (m_algorithm.family()) {
    case Algorithm::RANDOM_X:
 #       ifdef XMRIG_ALGO_RANDOMX
-        m_runner = new OclRxRunner(id, data);
+        if (data.thread.isAsm() && data.device.vendorId() == OCL_VENDOR_AMD) {
+            m_runner = new OclRxJitRunner(id, data);
+        }
+        else {
+            m_runner = new OclRxVmRunner(id, data);
+        }
 #       endif
        break;

@ -95,9 +108,20 @@ xmrig::OclWorker::OclWorker(size_t id, const OclLaunchData &data) :
        break;
    }

-    if (m_runner) {
+    if (!m_runner) {
+        return;
+    }
+
+    try {
+        m_runner->init();
        m_runner->build();
    }
+    catch (std::exception &ex) {
+        printError(id, ex.what());
+
+        delete m_runner;
+        m_runner = nullptr;
+    }
 }


@ -109,7 +133,7 @@ xmrig::OclWorker::~OclWorker()

 bool xmrig::OclWorker::selfTest()
 {
-    return m_runner && m_runner->selfTest();
+    return m_runner != nullptr;
 }


@ -136,7 +160,9 @@ void xmrig::OclWorker::start()
                m_interleave->resumeDelay(m_id);
            }

-            consumeJob();
+            if (!consumeJob()) {
+                return;
+            }
        }

        while (!Nonce::isOutdated(Nonce::OPENCL, m_job.sequence())) {
@ -146,7 +172,12 @@ void xmrig::OclWorker::start()

            const uint64_t t = Chrono::steadyMSecs();

-            if (!m_runner->run(*m_job.nonce(), results)) {
+            try {
+                m_runner->run(*m_job.nonce(), results);
+            }
+            catch (std::exception &ex) {
+                printError(id(), ex.what());
+
                return;
            }

@ -160,19 +191,31 @@ void xmrig::OclWorker::start()
            std::this_thread::yield();
        }

-        consumeJob();
+        if (!consumeJob()) {
+            return;
+        }
    }
 }


-void xmrig::OclWorker::consumeJob()
+bool xmrig::OclWorker::consumeJob()
 {
    if (Nonce::sequence(Nonce::OPENCL) == 0) {
-        return;
+        return false;
    }

    m_job.add(m_miner->job(), Nonce::sequence(Nonce::OPENCL), roundSize(m_intensity) * m_intensity);
-    m_runner->set(m_job.currentJob(), m_job.blob());
+
+    try {
+        m_runner->set(m_job.currentJob(), m_job.blob());
+    }
+    catch (std::exception &ex) {
+        printError(id(), ex.what());
+
+        return false;
+    }
+
+    return true;
 }


--- a/src/backend/opencl/OclWorker.h
+++ b/src/backend/opencl/OclWorker.h
@ -59,7 +59,7 @@ protected:
    void start() override;

 private:
-    void consumeJob();
+    bool consumeJob();
    void storeStats(uint64_t ts);

    const Algorithm m_algorithm;
--- a/src/backend/opencl/interfaces/IOclRunner.h
+++ b/src/backend/opencl/interfaces/IOclRunner.h
@ -26,10 +26,13 @@
 #define XMRIG_IOCLRUNNER_H


-#include <stdint.h>
+#include "base/tools/Object.h"


-typedef struct _cl_context *cl_context;
+#include <cstdint>
+
+
+using cl_context = struct _cl_context *;


 namespace xmrig {
@ -43,10 +46,12 @@ class OclLaunchData;
 class IOclRunner
 {
 public:
+    XMRIG_DISABLE_COPY_MOVE(IOclRunner)
+
+    IOclRunner()          = default;
    virtual ~IOclRunner() = default;

    virtual bool run(uint32_t nonce, uint32_t *hashOutput)  = 0;
-    virtual bool selfTest() const                           = 0;
    virtual bool set(const Job &job, uint8_t *blob)         = 0;
    virtual cl_context ctx() const                          = 0;
    virtual const Algorithm &algorithm() const              = 0;
@ -57,9 +62,7 @@ public:
    virtual size_t threadId() const                         = 0;
    virtual uint32_t deviceIndex() const                    = 0;
    virtual void build()                                    = 0;
-
-protected:
-    virtual bool isReadyToBuild() const                     = 0;
+    virtual void init()                                     = 0;
 };


--- a/src/backend/opencl/kernels/Cn00RyoKernel.cpp
+++ b/src/backend/opencl/kernels/Cn00RyoKernel.cpp
@ -27,17 +27,18 @@
 #include "backend/opencl/wrappers/OclLib.h"


-bool xmrig::Cn00RyoKernel::enqueue(cl_command_queue queue, size_t threads)
+void xmrig::Cn00RyoKernel::enqueue(cl_command_queue queue, size_t threads)
 {
    const size_t gthreads = threads * 64;
    const size_t lthreads = 64;

-    return enqueueNDRange(queue, 1, nullptr, &gthreads, &lthreads);
+    enqueueNDRange(queue, 1, nullptr, &gthreads, &lthreads);
 }


 // __kernel void cn00(__global int *Scratchpad, __global ulong *states)
-bool xmrig::Cn00RyoKernel::setArgs(cl_mem scratchpads, cl_mem states)
+void xmrig::Cn00RyoKernel::setArgs(cl_mem scratchpads, cl_mem states)
 {
-    return setArg(0, sizeof(cl_mem), &scratchpads) && setArg(1, sizeof(cl_mem), &states);
+    setArg(0, sizeof(cl_mem), &scratchpads);
+    setArg(1, sizeof(cl_mem), &states);
 }
--- a/src/backend/opencl/kernels/Cn00RyoKernel.h
+++ b/src/backend/opencl/kernels/Cn00RyoKernel.h
@ -37,8 +37,8 @@ class Cn00RyoKernel : public OclKernel
 public:
    inline Cn00RyoKernel(cl_program program) : OclKernel(program, "cn00") {}

-    bool enqueue(cl_command_queue queue, size_t threads);
-    bool setArgs(cl_mem scratchpads, cl_mem states);
+    void enqueue(cl_command_queue queue, size_t threads);
+    void setArgs(cl_mem scratchpads, cl_mem states);
 };


--- a/src/backend/opencl/kernels/Cn0Kernel.cpp
+++ b/src/backend/opencl/kernels/Cn0Kernel.cpp
@ -27,21 +27,21 @@
 #include "backend/opencl/wrappers/OclLib.h"


-bool xmrig::Cn0Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t threads)
+void xmrig::Cn0Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t threads)
 {
    const size_t offset[2]          = { nonce, 1 };
    const size_t gthreads[2]        = { threads, 8 };
    static const size_t lthreads[2] = { 8, 8 };

-    return enqueueNDRange(queue, 2, offset, gthreads, lthreads);
+    enqueueNDRange(queue, 2, offset, gthreads, lthreads);
 }


 // __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
-bool xmrig::Cn0Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads)
+void xmrig::Cn0Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads)
 {
-    return setArg(0, sizeof(cl_mem), &input) &&
-           setArg(1, sizeof(cl_mem), &scratchpads) &&
-           setArg(2, sizeof(cl_mem), &states) &&
-           setArg(3, sizeof(uint32_t), &threads);
+    setArg(0, sizeof(cl_mem), &input);
+    setArg(1, sizeof(cl_mem), &scratchpads);
+    setArg(2, sizeof(cl_mem), &states);
+    setArg(3, sizeof(uint32_t), &threads);
 }
--- a/src/backend/opencl/kernels/Cn0Kernel.h
+++ b/src/backend/opencl/kernels/Cn0Kernel.h
@ -37,8 +37,8 @@ class Cn0Kernel : public OclKernel
 public:
    inline Cn0Kernel(cl_program program) : OclKernel(program, "cn0") {}

-    bool enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
-    bool setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads);
+    void enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
+    void setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads);
 };


--- a/src/backend/opencl/kernels/Cn1Kernel.cpp
+++ b/src/backend/opencl/kernels/Cn1Kernel.cpp
@ -43,21 +43,21 @@ xmrig::Cn1Kernel::Cn1Kernel(cl_program program, uint64_t height)
 }


-bool xmrig::Cn1Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t threads, size_t worksize)
+void xmrig::Cn1Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t threads, size_t worksize)
 {
    const size_t offset   = nonce;
    const size_t gthreads = threads;
    const size_t lthreads = worksize;

-    return enqueueNDRange(queue, 1, &offset, &gthreads, &lthreads);
+    enqueueNDRange(queue, 1, &offset, &gthreads, &lthreads);
 }


 // __kernel void cn1(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
-bool xmrig::Cn1Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads)
+void xmrig::Cn1Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads)
 {
-    return setArg(0, sizeof(cl_mem), &input) &&
-           setArg(1, sizeof(cl_mem), &scratchpads) &&
-           setArg(2, sizeof(cl_mem), &states) &&
-           setArg(3, sizeof(uint32_t), &threads);
+    setArg(0, sizeof(cl_mem), &input);
+    setArg(1, sizeof(cl_mem), &scratchpads);
+    setArg(2, sizeof(cl_mem), &states);
+    setArg(3, sizeof(uint32_t), &threads);
 }
--- a/src/backend/opencl/kernels/Cn1Kernel.h
+++ b/src/backend/opencl/kernels/Cn1Kernel.h
@ -38,8 +38,8 @@ public:
    Cn1Kernel(cl_program program);
    Cn1Kernel(cl_program program, uint64_t height);

-    bool enqueue(cl_command_queue queue, uint32_t nonce, size_t threads, size_t worksize);
-    bool setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads);
+    void enqueue(cl_command_queue queue, uint32_t nonce, size_t threads, size_t worksize);
+    void setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads);
 };


--- a/src/backend/opencl/kernels/Cn1RyoKernel.cpp
+++ b/src/backend/opencl/kernels/Cn1RyoKernel.cpp
@ -30,19 +30,19 @@
 #include "backend/opencl/wrappers/OclLib.h"


-bool xmrig::Cn1RyoKernel::enqueue(cl_command_queue queue, size_t threads, size_t worksize)
+void xmrig::Cn1RyoKernel::enqueue(cl_command_queue queue, size_t threads, size_t worksize)
 {
    const size_t gthreads = threads * 16;
    const size_t lthreads = worksize * 16;

-    return enqueueNDRange(queue, 1, nullptr, &gthreads, &lthreads);
+    enqueueNDRange(queue, 1, nullptr, &gthreads, &lthreads);
 }


 // __kernel void cn1(__global int *lpad_in, __global int *spad, uint numThreads)
-bool xmrig::Cn1RyoKernel::setArgs(cl_mem scratchpads, cl_mem states, uint32_t threads)
+void xmrig::Cn1RyoKernel::setArgs(cl_mem scratchpads, cl_mem states, uint32_t threads)
 {
-    return setArg(0, sizeof(cl_mem), &scratchpads) &&
-           setArg(1, sizeof(cl_mem), &states) &&
-           setArg(2, sizeof(uint32_t), &threads);
+    setArg(0, sizeof(cl_mem), &scratchpads);
+    setArg(1, sizeof(cl_mem), &states);
+    setArg(2, sizeof(uint32_t), &threads);
 }
--- a/src/backend/opencl/kernels/Cn1RyoKernel.h
+++ b/src/backend/opencl/kernels/Cn1RyoKernel.h
@ -37,8 +37,8 @@ class Cn1RyoKernel : public OclKernel
 public:
    inline Cn1RyoKernel(cl_program program) : OclKernel(program, "cn1") {}

-    bool enqueue(cl_command_queue queue, size_t threads, size_t worksize);
-    bool setArgs(cl_mem scratchpads, cl_mem states, uint32_t threads);
+    void enqueue(cl_command_queue queue, size_t threads, size_t worksize);
+    void setArgs(cl_mem scratchpads, cl_mem states, uint32_t threads);
 };


--- a/src/backend/opencl/kernels/Cn2Kernel.cpp
+++ b/src/backend/opencl/kernels/Cn2Kernel.cpp
@ -27,28 +27,24 @@
 #include "backend/opencl/wrappers/OclLib.h"


-bool xmrig::Cn2Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t threads)
+void xmrig::Cn2Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t threads)
 {
    const size_t offset[2]          = { nonce, 1 };
    const size_t gthreads[2]        = { threads, 8 };
    static const size_t lthreads[2] = { 8, 8 };

-    return enqueueNDRange(queue, 2, offset, gthreads, lthreads);
+    enqueueNDRange(queue, 2, offset, gthreads, lthreads);
 }


 // __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global uint *Branch0, __global uint *Branch1, __global uint *Branch2, __global uint *Branch3, uint Threads)
-bool xmrig::Cn2Kernel::setArgs(cl_mem scratchpads, cl_mem states, const std::vector<cl_mem> &branches, uint32_t threads)
+void xmrig::Cn2Kernel::setArgs(cl_mem scratchpads, cl_mem states, const std::vector<cl_mem> &branches, uint32_t threads)
 {
-    if (!setArg(0, sizeof(cl_mem), &scratchpads) || !setArg(1, sizeof(cl_mem), &states) || !setArg(6, sizeof(uint32_t), &threads)) {
-        return false;
-    }
+    setArg(0, sizeof(cl_mem), &scratchpads);
+    setArg(1, sizeof(cl_mem), &states);
+    setArg(6, sizeof(uint32_t), &threads);

    for (uint32_t i = 0; i < branches.size(); ++i) {
-        if (!setArg(i + 2, sizeof(cl_mem), &branches[i])) {
-            return false;
-        }
+        setArg(i + 2, sizeof(cl_mem), &branches[i]);
    }
-
-    return true;
 }
--- a/src/backend/opencl/kernels/Cn2Kernel.h
+++ b/src/backend/opencl/kernels/Cn2Kernel.h
@ -37,8 +37,8 @@ class Cn2Kernel : public OclKernel
 public:
    inline Cn2Kernel(cl_program program) : OclKernel(program, "cn2") {}

-    bool enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
-    bool setArgs(cl_mem scratchpads, cl_mem states, const std::vector<cl_mem> &branches, uint32_t threads);
+    void enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
+    void setArgs(cl_mem scratchpads, cl_mem states, const std::vector<cl_mem> &branches, uint32_t threads);
 };


--- a/src/backend/opencl/kernels/Cn2RyoKernel.cpp
+++ b/src/backend/opencl/kernels/Cn2RyoKernel.cpp
@ -27,22 +27,27 @@
 #include "backend/opencl/wrappers/OclLib.h"


-bool xmrig::Cn2RyoKernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t threads)
+void xmrig::Cn2RyoKernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t threads)
 {
    const size_t offset[2]          = { nonce, 1 };
    const size_t gthreads[2]        = { threads, 8 };
    static const size_t lthreads[2] = { 8, 8 };

-    return enqueueNDRange(queue, 2, offset, gthreads, lthreads);
+    enqueueNDRange(queue, 2, offset, gthreads, lthreads);
 }


 // __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global uint *output, ulong Target, uint Threads)
-bool xmrig::Cn2RyoKernel::setArgs(cl_mem scratchpads, cl_mem states, cl_mem output, uint64_t target, uint32_t threads)
+void xmrig::Cn2RyoKernel::setArgs(cl_mem scratchpads, cl_mem states, cl_mem output, uint32_t threads)
 {
-    return setArg(0, sizeof(cl_mem), &scratchpads) &&
-           setArg(1, sizeof(cl_mem), &states) &&
-           setArg(2, sizeof(cl_mem), &output) &&
-           setArg(3, sizeof(cl_ulong), &target) &&
-           setArg(4, sizeof(uint32_t), &threads);
+    setArg(0, sizeof(cl_mem), &scratchpads);
+    setArg(1, sizeof(cl_mem), &states);
+    setArg(2, sizeof(cl_mem), &output);
+    setArg(4, sizeof(uint32_t), &threads);
+}
+
+
+void xmrig::Cn2RyoKernel::setTarget(uint64_t target)
+{
+    setArg(3, sizeof(cl_ulong), &target);
 }
--- a/src/backend/opencl/kernels/Cn2RyoKernel.h
+++ b/src/backend/opencl/kernels/Cn2RyoKernel.h
@ -37,8 +37,9 @@ class Cn2RyoKernel : public OclKernel
 public:
    inline Cn2RyoKernel(cl_program program) : OclKernel(program, "cn2") {}

-    bool enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
-    bool setArgs(cl_mem scratchpads, cl_mem states, cl_mem output, uint64_t target, uint32_t threads);
+    void enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
+    void setArgs(cl_mem scratchpads, cl_mem states, cl_mem output, uint32_t threads);
+    void setTarget(uint64_t target);
 };


--- a/src/backend/opencl/kernels/CnBranchKernel.cpp
+++ b/src/backend/opencl/kernels/CnBranchKernel.cpp
@ -41,22 +41,27 @@ xmrig::CnBranchKernel::CnBranchKernel(size_t index, cl_program program) : OclKer
 }


-bool xmrig::CnBranchKernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t threads, size_t worksize)
+void xmrig::CnBranchKernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t threads, size_t worksize)
 {
    const size_t offset   = nonce;
    const size_t gthreads = threads;
    const size_t lthreads = worksize;

-    return enqueueNDRange(queue, 1, &offset, &gthreads, &lthreads);
+    enqueueNDRange(queue, 1, &offset, &gthreads, &lthreads);
 }


 // __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, uint Threads)
-bool xmrig::CnBranchKernel::setArgs(cl_mem states, cl_mem branch, cl_mem output, uint64_t target, uint32_t threads)
+void xmrig::CnBranchKernel::setArgs(cl_mem states, cl_mem branch, cl_mem output, uint32_t threads)
 {
-    return setArg(0, sizeof(cl_mem), &states) &&
-           setArg(1, sizeof(cl_mem), &branch) &&
-           setArg(2, sizeof(cl_mem), &output) &&
-           setArg(3, sizeof(cl_ulong), &target) &&
-           setArg(4, sizeof(cl_uint), &threads);
+    setArg(0, sizeof(cl_mem), &states);
+    setArg(1, sizeof(cl_mem), &branch);
+    setArg(2, sizeof(cl_mem), &output);
+    setArg(4, sizeof(cl_uint), &threads);
+}
+
+
+void xmrig::CnBranchKernel::setTarget(uint64_t target)
+{
+    setArg(3, sizeof(cl_ulong), &target);
 }
--- a/src/backend/opencl/kernels/CnBranchKernel.h
+++ b/src/backend/opencl/kernels/CnBranchKernel.h
@ -36,8 +36,9 @@ class CnBranchKernel : public OclKernel
 {
 public:
    CnBranchKernel(size_t index, cl_program program);
-    bool enqueue(cl_command_queue queue, uint32_t nonce, size_t threads, size_t worksize);
-    bool setArgs(cl_mem states, cl_mem branch, cl_mem output, uint64_t target, uint32_t threads);
+    void enqueue(cl_command_queue queue, uint32_t nonce, size_t threads, size_t worksize);
+    void setArgs(cl_mem states, cl_mem branch, cl_mem output, uint32_t threads);
+    void setTarget(uint64_t target);
 };


--- a/src/backend/opencl/kernels/rx/Blake2bHashRegistersKernel.cpp
+++ b/src/backend/opencl/kernels/rx/Blake2bHashRegistersKernel.cpp
@ -0,0 +1,37 @@
+/* XMRig
+ * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
+ * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
+ * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
+ * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
+ * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
+ * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
+ * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include "backend/opencl/kernels/rx/Blake2bHashRegistersKernel.h"
+#include "backend/opencl/wrappers/OclLib.h"
+
+
+// __kernel void blake2b_hash_registers_32(__global void *out, __global const void* in, uint inStrideBytes)
+// __kernel void blake2b_hash_registers_64(__global void *out, __global const void* in, uint inStrideBytes)
+void xmrig::Blake2bHashRegistersKernel::setArgs(cl_mem out, cl_mem in, uint32_t inStrideBytes)
+{
+    setArg(0, sizeof(cl_mem), &out);
+    setArg(1, sizeof(cl_mem), &in);
+    setArg(2, sizeof(uint32_t), &inStrideBytes);
+}
--- a/Show More
+++ b/Show More