KawPow WIP

2020-05-24 23:57:41 +02:00 · 2020-05-24 23:57:41 +02:00 · 22b937cc1c
commit 22b937cc1c
parent 07025dc41b
88 changed files with 11004 additions and 8383 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -6,10 +6,10 @@ option(WITH_HWLOC           "Enable hwloc support" ON)
 option(WITH_CN_LITE         "Enable CryptoNight-Lite algorithms family" ON)
 option(WITH_CN_HEAVY        "Enable CryptoNight-Heavy algorithms family" ON)
 option(WITH_CN_PICO         "Enable CryptoNight-Pico algorithm" ON)
-option(WITH_CN_GPU          "Enable CryptoNight-GPU algorithm" OFF)
 option(WITH_RANDOMX         "Enable RandomX algorithms family" ON)
 option(WITH_ARGON2          "Enable Argon2 algorithms family" ON)
 option(WITH_ASTROBWT        "Enable AstroBWT algorithms family" ON)
+option(WITH_KAWPOW          "Enable KawPow algorithms family" ON)
 option(WITH_HTTP            "Enable HTTP protocol support (client/server)" ON)
 option(WITH_DEBUG_LOG       "Enable debug log output" OFF)
 option(WITH_TLS             "Enable OpenSSL support" ON)
@ -172,9 +172,9 @@ include(cmake/flags.cmake)
 include(cmake/randomx.cmake)
 include(cmake/argon2.cmake)
 include(cmake/astrobwt.cmake)
+include(cmake/kawpow.cmake)
 include(cmake/OpenSSL.cmake)
 include(cmake/asm.cmake)
-include(cmake/cn-gpu.cmake)

 if (WITH_CN_LITE)
    add_definitions(/DXMRIG_ALGO_CN_LITE)
@ -204,8 +204,8 @@ if (WITH_DEBUG_LOG)
    add_definitions(/DAPP_DEBUG)
 endif()

-add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES} ${CN_GPU_SOURCES})
-target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY})
+add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES})
+target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY})

 if (WIN32)
    add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD
--- a/cmake/astrobwt.cmake
+++ b/cmake/astrobwt.cmake
@ -3,12 +3,10 @@ if (WITH_ASTROBWT)

    list(APPEND HEADERS_CRYPTO
        src/crypto/astrobwt/AstroBWT.h
-        src/crypto/astrobwt/sha3.h
    )

    list(APPEND SOURCES_CRYPTO
        src/crypto/astrobwt/AstroBWT.cpp
-        src/crypto/astrobwt/sha3.cpp
    )

    if (XMRIG_ARM)
--- a/cmake/cn-gpu.cmake
+++ b/cmake/cn-gpu.cmake
@ -1,25 +0,0 @@
-if (WITH_CN_GPU AND CMAKE_SIZEOF_VOID_P EQUAL 8)
-
-    if (XMRIG_ARM)
-        set(CN_GPU_SOURCES src/crypto/cn/gpu/cn_gpu_arm.cpp)
-
-        if (CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES Clang)
-            set_source_files_properties(src/crypto/cn/gpu/cn_gpu_arm.cpp PROPERTIES COMPILE_FLAGS "-O3")
-        endif()
-    else()
-        set(CN_GPU_SOURCES src/crypto/cn/gpu/cn_gpu_avx.cpp src/crypto/cn/gpu/cn_gpu_ssse3.cpp)
-
-        if (CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES Clang)
-            set_source_files_properties(src/crypto/cn/gpu/cn_gpu_avx.cpp PROPERTIES COMPILE_FLAGS "-O3 -mavx2")
-            set_source_files_properties(src/crypto/cn/gpu/cn_gpu_ssse3.cpp PROPERTIES COMPILE_FLAGS "-O3")
-        elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
-            set_source_files_properties(src/crypto/cn/gpu/cn_gpu_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX")
-        endif()
-    endif()
-
-    add_definitions(/DXMRIG_ALGO_CN_GPU)
-else()
-    set(CN_GPU_SOURCES "")
-
-    remove_definitions(/DXMRIG_ALGO_CN_GPU)
-endif()
--- a/cmake/kawpow.cmake
+++ b/cmake/kawpow.cmake
@ -0,0 +1,19 @@
+if (WITH_KAWPOW)
+    add_definitions(/DXMRIG_ALGO_KAWPOW)
+
+    list(APPEND HEADERS_CRYPTO
+        src/crypto/kawpow/KPCache.h
+        src/crypto/kawpow/KPHash.h
+    )
+
+    list(APPEND SOURCES_CRYPTO
+        src/crypto/kawpow/KPCache.cpp
+        src/crypto/kawpow/KPHash.cpp
+    )
+
+    add_subdirectory(src/3rdparty/libethash)
+    set(ETHASH_LIBRARY ethash)
+else()
+    remove_definitions(/DXMRIG_ALGO_KAWPOW)
+    set(ETHASH_LIBRARY "")
+endif()
--- a/doc/ALGORITHMS.md
+++ b/doc/ALGORITHMS.md
@ -25,7 +25,6 @@ Option `coin` useful for pools without algorithm negotiation support or daemon t
 | `cn/zls` | 2 MB | 2.14.0+ | CryptoNight variant 2 with 3/4 iterations. |
 | `cn/double` | 2 MB | 2.14.0+ | CryptoNight variant 2 with double iterations. |
 | `cn/r` | 2 MB | 2.13.0+ | CryptoNightR (Monero's variant 4). |
-| `cn/gpu` | 2 MB | 2.11.0+ | CryptoNight-GPU. |
 | `cn-pico` | 256 KB | 2.10.0+ | CryptoNight-Pico. |
 | `cn/half` | 2 MB | 2.9.0+ | CryptoNight variant 2 with half iterations. |
 | `cn/2` | 2 MB | 2.8.0+ | CryptoNight variant 2. |
--- a/doc/build/CMAKE_OPTIONS.md
+++ b/doc/build/CMAKE_OPTIONS.md
@ -6,7 +6,6 @@
 * **`-DWITH_CN_LITE=OFF`** disable all CryptoNight-Lite algorithms (`cn-lite/0`, `cn-lite/1`).
 * **`-DWITH_CN_HEAVY=OFF`** disable all CryptoNight-Heavy algorithms (`cn-heavy/0`, `cn-heavy/xhv`, `cn-heavy/tube`).
 * **`-DWITH_CN_PICO=OFF`** disable CryptoNight-Pico algorithm (`cn-pico`).
-* **`-DWITH_CN_GPU=OFF`** disable CryptoNight-GPU algorithm (`cn/gpu`).
 * **`-DWITH_RANDOMX=OFF`** disable RandomX algorithms (`rx/loki`, `rx/wow`).
 * **`-DWITH_ARGON2=OFF`** disable Argon2 algorithms (`argon2/chukwa`, `argon2/wrkz`).

--- a/scripts/generate_cl.js
+++ b/scripts/generate_cl.js
@ -43,15 +43,6 @@ function cn_r()
 }


-function cn_gpu()
-{
-    const cn_gpu = opencl_minify(addIncludes('cryptonight_gpu.cl', [ 'wolf-aes.cl', 'keccak.cl' ]));
-
-    // fs.writeFileSync('cryptonight_gpu_gen.cl', cn_gpu);
-    fs.writeFileSync('cryptonight_gpu_cl.h', text2h(cn_gpu, 'xmrig', 'cryptonight_gpu_cl'));
-}
-
-
 function rx()
 {
    let rx = addIncludes('randomx.cl', [
@ -85,11 +76,21 @@ function astrobwt()
 }


+function kawpow()
+{
+    const kawpow = opencl_minify(addIncludes('kawpow.cl', [ 'defs.h' ]));
+    const kawpow_dag = opencl_minify(addIncludes('kawpow_dag.cl', [ 'defs.h' ]));
+
+    // fs.writeFileSync('kawpow_gen.cl', kawpow);
+    fs.writeFileSync('kawpow_cl.h', text2h(kawpow, 'xmrig', 'kawpow_cl'));
+    fs.writeFileSync('kawpow_dag_cl.h', text2h(kawpow_dag, 'xmrig', 'kawpow_dag_cl'));
+}
+
+
 process.chdir(path.resolve('src/backend/opencl/cl/cn'));

 cn();
 cn_r();
-cn_gpu();

 process.chdir(cwd);
 process.chdir(path.resolve('src/backend/opencl/cl/rx'));
@ -100,3 +101,8 @@ process.chdir(cwd);
 process.chdir(path.resolve('src/backend/opencl/cl/astrobwt'));

 astrobwt();
+
+process.chdir(cwd);
+process.chdir(path.resolve('src/backend/opencl/cl/kawpow'));
+
+kawpow();
--- a/src/3rdparty/libethash/CMakeLists.txt
+++ b/src/3rdparty/libethash/CMakeLists.txt
@ -0,0 +1,24 @@
+cmake_minimum_required (VERSION 2.8)
+project (ethash C)
+
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Os")
+
+set(HEADERS
+    data_sizes.h
+    endian.h
+    ethash.h
+    ethash_internal.h
+    fnv.h
+    )
+
+set(SOURCES
+    ethash_internal.c
+    keccakf800.c
+   )
+
+include_directories(../..)
+
+add_library(ethash STATIC
+    ${HEADERS}
+    ${SOURCES}
+    )
--- a/src/3rdparty/libethash/data_sizes.h
+++ b/src/3rdparty/libethash/data_sizes.h
--- a/src/3rdparty/libethash/endian.h
+++ b/src/3rdparty/libethash/endian.h
@ -0,0 +1,77 @@
+#pragma once
+
+#include <stdint.h>
+
+#if defined(__MINGW32__) || defined(_WIN32)
+  # define LITTLE_ENDIAN 1234
+  # define BYTE_ORDER    LITTLE_ENDIAN
+#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__)
+  # include <sys/endian.h>
+#elif defined(__OpenBSD__) || defined(__SVR4)
+  # include <sys/types.h>
+#elif defined(__APPLE__)
+# include <machine/endian.h>
+#elif defined( BSD ) && (BSD >= 199103)
+  # include <machine/endian.h>
+#elif defined( __QNXNTO__ ) && defined( __LITTLEENDIAN__ )
+  # define LITTLE_ENDIAN 1234
+  # define BYTE_ORDER    LITTLE_ENDIAN
+#elif defined( __QNXNTO__ ) && defined( __BIGENDIAN__ )
+  # define BIG_ENDIAN 1234
+  # define BYTE_ORDER BIG_ENDIAN
+#else
+# include <endian.h>
+#endif
+
+#if defined(_WIN32)
+#include <stdlib.h>
+#define ethash_swap_u32(input_) _byteswap_ulong(input_)
+#define ethash_swap_u64(input_) _byteswap_uint64(input_)
+#elif defined(__APPLE__)
+#include <libkern/OSByteOrder.h>
+#define ethash_swap_u32(input_) OSSwapInt32(input_)
+#define ethash_swap_u64(input_) OSSwapInt64(input_)
+#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__)
+#define ethash_swap_u32(input_) bswap32(input_)
+#define ethash_swap_u64(input_) bswap64(input_)
+#elif defined(__OpenBSD__)
+#include <endian.h>
+#define ethash_swap_u32(input_) swap32(input_)
+#define ethash_swap_u64(input_) swap64(input_)
+#else // posix
+#include <byteswap.h>
+#define ethash_swap_u32(input_) bswap_32(input_)
+#define ethash_swap_u64(input_) bswap_64(input_)
+#endif
+
+
+#if LITTLE_ENDIAN == BYTE_ORDER
+
+#define fix_endian32(dst_ ,src_) dst_ = src_
+#define fix_endian32_same(val_)
+#define fix_endian64(dst_, src_) dst_ = src_
+#define fix_endian64_same(val_)
+#define fix_endian_arr32(arr_, size_)
+#define fix_endian_arr64(arr_, size_)
+
+#elif BIG_ENDIAN == BYTE_ORDER
+
+#define fix_endian32(dst_, src_) dst_ = ethash_swap_u32(src_)
+#define fix_endian32_same(val_) val_ = ethash_swap_u32(val_)
+#define fix_endian64(dst_, src_) dst_ = ethash_swap_u64(src_)
+#define fix_endian64_same(val_) val_ = ethash_swap_u64(val_)
+#define fix_endian_arr32(arr_, size_) \
+  do { \
+    for (unsigned i_ = 0; i_ < (size_); ++i_) { \
+      arr_[i_] = ethash_swap_u32(arr_[i_]); \
+    } \
+  } while (0)
+#define fix_endian_arr64(arr_, size_) \
+  do { \
+    for (unsigned i_ = 0; i_ < (size_); ++i_) { \
+      arr_[i_] = ethash_swap_u64(arr_[i_]); \
+    } \
+  } while (0)
+#else
+# error "endian not supported"
+#endif // BYTE_ORDER
--- a/src/3rdparty/libethash/ethash.h
+++ b/src/3rdparty/libethash/ethash.h
@ -0,0 +1,158 @@
+/*
+  This file is part of ethash.
+
+  ethash is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  ethash is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with ethash.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/** @file ethash.h
+* @date 2015
+*/
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stddef.h>
+
+#define ETHASH_REVISION 23
+#define ETHASH_DATASET_BYTES_INIT 1073741824U // 2**30
+#define ETHASH_DATASET_BYTES_GROWTH 8388608U  // 2**23
+#define ETHASH_CACHE_BYTES_INIT 1073741824U // 2**24
+#define ETHASH_CACHE_BYTES_GROWTH 131072U  // 2**17
+#define ETHASH_EPOCH_LENGTH 30000U
+#define ETHASH_MIX_BYTES 128
+#define ETHASH_HASH_BYTES 64
+#define ETHASH_DATASET_PARENTS 256
+#define ETHASH_CACHE_ROUNDS 3
+#define ETHASH_ACCESSES 64
+#define ETHASH_DAG_MAGIC_NUM_SIZE 8
+#define ETHASH_DAG_MAGIC_NUM 0xFEE1DEADBADDCAFE
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// Type of a seedhash/blockhash e.t.c.
+typedef struct ethash_h256 { uint8_t b[32]; } ethash_h256_t;
+
+// convenience macro to statically initialize an h256_t
+// usage:
+// ethash_h256_t a = ethash_h256_static_init(1, 2, 3, ... )
+// have to provide all 32 values. If you don't provide all the rest
+// will simply be unitialized (not guranteed to be 0)
+#define ethash_h256_static_init(...)			\
+	{ {__VA_ARGS__} }
+
+struct ethash_light;
+typedef struct ethash_light* ethash_light_t;
+struct ethash_full;
+typedef struct ethash_full* ethash_full_t;
+typedef int(*ethash_callback_t)(unsigned);
+
+typedef struct ethash_return_value {
+	ethash_h256_t result;
+	ethash_h256_t mix_hash;
+	bool success;
+} ethash_return_value_t;
+
+/**
+ * Allocate and initialize a new ethash_light handler
+ *
+ * @param block_number   The block number for which to create the handler
+ * @return               Newly allocated ethash_light handler or NULL in case of
+ *                       ERRNOMEM or invalid parameters used for @ref ethash_compute_cache_nodes()
+ */
+ethash_light_t ethash_light_new(uint64_t block_number);
+/**
+ */
+bool ethash_compute_cache_nodes(
+    void* nodes,
+    uint64_t cache_size,
+    ethash_h256_t const* seed
+);
+/**
+ * Frees a previously allocated ethash_light handler
+ * @param light        The light handler to free
+ */
+void ethash_light_delete(ethash_light_t light);
+/**
+ * Calculate the light client data
+ *
+ * @param light          The light client handler
+ * @param header_hash    The header hash to pack into the mix
+ * @param nonce          The nonce to pack into the mix
+ * @return               an object of ethash_return_value_t holding the return values
+ */
+ethash_return_value_t ethash_light_compute(
+	ethash_light_t light,
+	ethash_h256_t const header_hash,
+	uint64_t nonce
+);
+
+/**
+ * Allocate and initialize a new ethash_full handler
+ *
+ * @param light         The light handler containing the cache.
+ * @param callback      A callback function with signature of @ref ethash_callback_t
+ *                      It accepts an unsigned with which a progress of DAG calculation
+ *                      can be displayed. If all goes well the callback should return 0.
+ *                      If a non-zero value is returned then DAG generation will stop.
+ *                      Be advised. A progress value of 100 means that DAG creation is
+ *                      almost complete and that this function will soon return succesfully.
+ *                      It does not mean that the function has already had a succesfull return.
+ * @return              Newly allocated ethash_full handler or NULL in case of
+ *                      ERRNOMEM or invalid parameters used for @ref ethash_compute_full_data()
+ */
+ethash_full_t ethash_full_new(ethash_light_t light, ethash_callback_t callback);
+
+/**
+ * Frees a previously allocated ethash_full handler
+ * @param full    The light handler to free
+ */
+void ethash_full_delete(ethash_full_t full);
+/**
+ * Calculate the full client data
+ *
+ * @param full           The full client handler
+ * @param header_hash    The header hash to pack into the mix
+ * @param nonce          The nonce to pack into the mix
+ * @return               An object of ethash_return_value to hold the return value
+ */
+ethash_return_value_t ethash_full_compute(
+	ethash_full_t full,
+	ethash_h256_t const header_hash,
+	uint64_t nonce
+);
+/**
+ * Get a pointer to the full DAG data
+ */
+void const* ethash_full_dag(ethash_full_t full);
+/**
+ * Get the size of the DAG data
+ */
+uint64_t ethash_full_dag_size(ethash_full_t full);
+
+/**
+ * Calculate the seedhash for a given epoch
+ */
+ethash_h256_t ethash_get_seedhash(uint64_t epoch);
+
+/**
+ * KeccakF800 for ProgPoW
+ */
+void ethash_keccakf800(uint32_t state[25]);
+
+#ifdef __cplusplus
+}
+#endif
--- a/src/3rdparty/libethash/ethash_internal.c
+++ b/src/3rdparty/libethash/ethash_internal.c
--- a/src/3rdparty/libethash/ethash_internal.h
+++ b/src/3rdparty/libethash/ethash_internal.h
@ -0,0 +1,192 @@
+#pragma once
+#include "endian.h"
+#include "ethash.h"
+#include <stdio.h>
+
+#define ENABLE_SSE 0
+
+#if defined(_M_X64) && ENABLE_SSE
+#include <smmintrin.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// compile time settings
+#define NODE_WORDS (64/4)
+#define MIX_WORDS (ETHASH_MIX_BYTES/4)
+#define MIX_NODES (MIX_WORDS / NODE_WORDS)
+#include <stdint.h>
+
+typedef union node {
+	uint8_t bytes[NODE_WORDS * 4];
+	uint32_t words[NODE_WORDS];
+	uint64_t double_words[NODE_WORDS / 2];
+
+#if defined(_M_X64) && ENABLE_SSE
+	__m128i xmm[NODE_WORDS/4];
+#endif
+
+} node;
+
+static inline uint8_t ethash_h256_get(ethash_h256_t const* hash, unsigned int i)
+{
+	return hash->b[i];
+}
+
+static inline void ethash_h256_set(ethash_h256_t* hash, unsigned int i, uint8_t v)
+{
+	hash->b[i] = v;
+}
+
+static inline void ethash_h256_reset(ethash_h256_t* hash)
+{
+	memset(hash, 0, 32);
+}
+
+// Returns if hash is less than or equal to boundary (2^256/difficulty)
+static inline bool ethash_check_difficulty(
+	ethash_h256_t const* hash,
+	ethash_h256_t const* boundary
+)
+{
+	// Boundary is big endian
+	for (int i = 0; i < 32; i++) {
+		if (ethash_h256_get(hash, i) == ethash_h256_get(boundary, i)) {
+			continue;
+		}
+		return ethash_h256_get(hash, i) < ethash_h256_get(boundary, i);
+	}
+	return true;
+}
+
+/**
+ *  Difficulty quick check for POW preverification
+ *
+ * @param header_hash      The hash of the header
+ * @param nonce            The block's nonce
+ * @param mix_hash         The mix digest hash
+ * @param boundary         The boundary is defined as (2^256 / difficulty)
+ * @return                 true for succesful pre-verification and false otherwise
+ */
+bool ethash_quick_check_difficulty(
+	ethash_h256_t const* header_hash,
+	uint64_t const nonce,
+	ethash_h256_t const* mix_hash,
+	ethash_h256_t const* boundary
+);
+
+struct ethash_light {
+	void* cache;
+	uint64_t cache_size;
+	uint64_t block_number;
+
+	// Used for fast division
+	uint32_t num_parent_nodes;
+	uint32_t reciprocal;
+	uint32_t increment;
+	uint32_t shift;
+};
+
+/**
+ * Allocate and initialize a new ethash_light handler. Internal version
+ *
+ * @param cache_size    The size of the cache in bytes
+ * @param seed          Block seedhash to be used during the computation of the
+ *                      cache nodes
+ * @return              Newly allocated ethash_light handler or NULL in case of
+ *                      ERRNOMEM or invalid parameters used for @ref ethash_compute_cache_nodes()
+ */
+ethash_light_t ethash_light_new_internal(uint64_t cache_size, ethash_h256_t const* seed);
+
+/**
+ * Calculate the light client data. Internal version.
+ *
+ * @param light          The light client handler
+ * @param full_size      The size of the full data in bytes.
+ * @param header_hash    The header hash to pack into the mix
+ * @param nonce          The nonce to pack into the mix
+ * @return               The resulting hash.
+ */
+ethash_return_value_t ethash_light_compute_internal(
+	ethash_light_t light,
+	uint64_t full_size,
+	ethash_h256_t const header_hash,
+	uint64_t nonce
+);
+
+struct ethash_full {
+	FILE* file;
+	uint64_t file_size;
+	node* data;
+};
+
+/**
+ * Allocate and initialize a new ethash_full handler. Internal version.
+ *
+ * @param dirname        The directory in which to put the DAG file.
+ * @param seedhash       The seed hash of the block. Used in the DAG file naming.
+ * @param full_size      The size of the full data in bytes.
+ * @param cache          A cache object to use that was allocated with @ref ethash_cache_new().
+ *                       Iff this function succeeds the ethash_full_t will take memory
+ *                       memory ownership of the cache and free it at deletion. If
+ *                       not then the user still has to handle freeing of the cache himself.
+ * @param callback       A callback function with signature of @ref ethash_callback_t
+ *                       It accepts an unsigned with which a progress of DAG calculation
+ *                       can be displayed. If all goes well the callback should return 0.
+ *                       If a non-zero value is returned then DAG generation will stop.
+ * @return               Newly allocated ethash_full handler or NULL in case of
+ *                       ERRNOMEM or invalid parameters used for @ref ethash_compute_full_data()
+ */
+ethash_full_t ethash_full_new_internal(
+	char const* dirname,
+	ethash_h256_t const seed_hash,
+	uint64_t full_size,
+	ethash_light_t const light,
+	ethash_callback_t callback
+);
+
+void ethash_calculate_dag_item(
+	node* const ret,
+	uint32_t node_index,
+	uint32_t num_parents,
+	ethash_light_t const cache
+);
+
+void ethash_calculate_dag_item_opt(
+	node* const ret,
+	uint32_t node_index,
+	uint32_t num_parents,
+	ethash_light_t const cache
+);
+
+void ethash_quick_hash(
+	ethash_h256_t* return_hash,
+	ethash_h256_t const* header_hash,
+	const uint64_t nonce,
+	ethash_h256_t const* mix_hash
+);
+
+uint64_t ethash_get_datasize(uint64_t const block_number);
+uint64_t ethash_get_cachesize(uint64_t const block_number);
+
+/**
+ * Compute the memory data for a full node's memory
+ *
+ * @param mem         A pointer to an ethash full's memory
+ * @param full_size   The size of the full data in bytes
+ * @param cache       A cache object to use in the calculation
+ * @param callback    The callback function. Check @ref ethash_full_new() for details.
+ * @return            true if all went fine and false for invalid parameters
+ */
+bool ethash_compute_full_data(
+	void* mem,
+	uint64_t full_size,
+	ethash_light_t const light,
+	ethash_callback_t callback
+);
+
+#ifdef __cplusplus
+}
+#endif
--- a/src/3rdparty/libethash/fnv.h
+++ b/src/3rdparty/libethash/fnv.h
@ -0,0 +1,42 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file fnv.h
+* @author Matthew Wampler-Doty <negacthulhu@gmail.com>
+* @date 2015
+*/
+
+#pragma once
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FNV_PRIME 0x01000193
+
+/* The FNV-1 spec multiplies the prime with the input one byte (octet) in turn.
+   We instead multiply it with the full 32-bit input.
+   This gives a different result compared to a canonical FNV-1 implementation.
+*/
+static inline uint32_t fnv_hash(uint32_t const x, uint32_t const y)
+{
+	return x * FNV_PRIME ^ y;
+}
+
+#ifdef __cplusplus
+}
+#endif
--- a/src/3rdparty/libethash/keccakf800.c
+++ b/src/3rdparty/libethash/keccakf800.c
@ -0,0 +1,253 @@
+/* ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm.
+ * Copyright 2018-2019 Pawel Bylica.
+ * Licensed under the Apache License, Version 2.0.
+ */
+
+#include <stdint.h>
+
+static uint32_t rol(uint32_t x, unsigned s)
+{
+    return (x << s) | (x >> (32 - s));
+}
+
+static const uint32_t round_constants[22] = {
+    0x00000001,
+    0x00008082,
+    0x0000808A,
+    0x80008000,
+    0x0000808B,
+    0x80000001,
+    0x80008081,
+    0x00008009,
+    0x0000008A,
+    0x00000088,
+    0x80008009,
+    0x8000000A,
+    0x8000808B,
+    0x0000008B,
+    0x00008089,
+    0x00008003,
+    0x00008002,
+    0x00000080,
+    0x0000800A,
+    0x8000000A,
+    0x80008081,
+    0x00008080,
+};
+
+void ethash_keccakf800(uint32_t state[25])
+{
+    /* The implementation directly translated from ethash_keccakf1600. */
+
+    int round;
+
+    uint32_t Aba, Abe, Abi, Abo, Abu;
+    uint32_t Aga, Age, Agi, Ago, Agu;
+    uint32_t Aka, Ake, Aki, Ako, Aku;
+    uint32_t Ama, Ame, Ami, Amo, Amu;
+    uint32_t Asa, Ase, Asi, Aso, Asu;
+
+    uint32_t Eba, Ebe, Ebi, Ebo, Ebu;
+    uint32_t Ega, Ege, Egi, Ego, Egu;
+    uint32_t Eka, Eke, Eki, Eko, Eku;
+    uint32_t Ema, Eme, Emi, Emo, Emu;
+    uint32_t Esa, Ese, Esi, Eso, Esu;
+
+    uint32_t Ba, Be, Bi, Bo, Bu;
+
+    uint32_t Da, De, Di, Do, Du;
+
+    Aba = state[0];
+    Abe = state[1];
+    Abi = state[2];
+    Abo = state[3];
+    Abu = state[4];
+    Aga = state[5];
+    Age = state[6];
+    Agi = state[7];
+    Ago = state[8];
+    Agu = state[9];
+    Aka = state[10];
+    Ake = state[11];
+    Aki = state[12];
+    Ako = state[13];
+    Aku = state[14];
+    Ama = state[15];
+    Ame = state[16];
+    Ami = state[17];
+    Amo = state[18];
+    Amu = state[19];
+    Asa = state[20];
+    Ase = state[21];
+    Asi = state[22];
+    Aso = state[23];
+    Asu = state[24];
+
+    for (round = 0; round < 22; round += 2)
+    {
+        /* Round (round + 0): Axx -> Exx */
+
+        Ba = Aba ^ Aga ^ Aka ^ Ama ^ Asa;
+        Be = Abe ^ Age ^ Ake ^ Ame ^ Ase;
+        Bi = Abi ^ Agi ^ Aki ^ Ami ^ Asi;
+        Bo = Abo ^ Ago ^ Ako ^ Amo ^ Aso;
+        Bu = Abu ^ Agu ^ Aku ^ Amu ^ Asu;
+
+        Da = Bu ^ rol(Be, 1);
+        De = Ba ^ rol(Bi, 1);
+        Di = Be ^ rol(Bo, 1);
+        Do = Bi ^ rol(Bu, 1);
+        Du = Bo ^ rol(Ba, 1);
+
+        Ba = Aba ^ Da;
+        Be = rol(Age ^ De, 12);
+        Bi = rol(Aki ^ Di, 11);
+        Bo = rol(Amo ^ Do, 21);
+        Bu = rol(Asu ^ Du, 14);
+        Eba = Ba ^ (~Be & Bi) ^ round_constants[round];
+        Ebe = Be ^ (~Bi & Bo);
+        Ebi = Bi ^ (~Bo & Bu);
+        Ebo = Bo ^ (~Bu & Ba);
+        Ebu = Bu ^ (~Ba & Be);
+
+        Ba = rol(Abo ^ Do, 28);
+        Be = rol(Agu ^ Du, 20);
+        Bi = rol(Aka ^ Da, 3);
+        Bo = rol(Ame ^ De, 13);
+        Bu = rol(Asi ^ Di, 29);
+        Ega = Ba ^ (~Be & Bi);
+        Ege = Be ^ (~Bi & Bo);
+        Egi = Bi ^ (~Bo & Bu);
+        Ego = Bo ^ (~Bu & Ba);
+        Egu = Bu ^ (~Ba & Be);
+
+        Ba = rol(Abe ^ De, 1);
+        Be = rol(Agi ^ Di, 6);
+        Bi = rol(Ako ^ Do, 25);
+        Bo = rol(Amu ^ Du, 8);
+        Bu = rol(Asa ^ Da, 18);
+        Eka = Ba ^ (~Be & Bi);
+        Eke = Be ^ (~Bi & Bo);
+        Eki = Bi ^ (~Bo & Bu);
+        Eko = Bo ^ (~Bu & Ba);
+        Eku = Bu ^ (~Ba & Be);
+
+        Ba = rol(Abu ^ Du, 27);
+        Be = rol(Aga ^ Da, 4);
+        Bi = rol(Ake ^ De, 10);
+        Bo = rol(Ami ^ Di, 15);
+        Bu = rol(Aso ^ Do, 24);
+        Ema = Ba ^ (~Be & Bi);
+        Eme = Be ^ (~Bi & Bo);
+        Emi = Bi ^ (~Bo & Bu);
+        Emo = Bo ^ (~Bu & Ba);
+        Emu = Bu ^ (~Ba & Be);
+
+        Ba = rol(Abi ^ Di, 30);
+        Be = rol(Ago ^ Do, 23);
+        Bi = rol(Aku ^ Du, 7);
+        Bo = rol(Ama ^ Da, 9);
+        Bu = rol(Ase ^ De, 2);
+        Esa = Ba ^ (~Be & Bi);
+        Ese = Be ^ (~Bi & Bo);
+        Esi = Bi ^ (~Bo & Bu);
+        Eso = Bo ^ (~Bu & Ba);
+        Esu = Bu ^ (~Ba & Be);
+
+
+        /* Round (round + 1): Exx -> Axx */
+
+        Ba = Eba ^ Ega ^ Eka ^ Ema ^ Esa;
+        Be = Ebe ^ Ege ^ Eke ^ Eme ^ Ese;
+        Bi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi;
+        Bo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso;
+        Bu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu;
+
+        Da = Bu ^ rol(Be, 1);
+        De = Ba ^ rol(Bi, 1);
+        Di = Be ^ rol(Bo, 1);
+        Do = Bi ^ rol(Bu, 1);
+        Du = Bo ^ rol(Ba, 1);
+
+        Ba = Eba ^ Da;
+        Be = rol(Ege ^ De, 12);
+        Bi = rol(Eki ^ Di, 11);
+        Bo = rol(Emo ^ Do, 21);
+        Bu = rol(Esu ^ Du, 14);
+        Aba = Ba ^ (~Be & Bi) ^ round_constants[round + 1];
+        Abe = Be ^ (~Bi & Bo);
+        Abi = Bi ^ (~Bo & Bu);
+        Abo = Bo ^ (~Bu & Ba);
+        Abu = Bu ^ (~Ba & Be);
+
+        Ba = rol(Ebo ^ Do, 28);
+        Be = rol(Egu ^ Du, 20);
+        Bi = rol(Eka ^ Da, 3);
+        Bo = rol(Eme ^ De, 13);
+        Bu = rol(Esi ^ Di, 29);
+        Aga = Ba ^ (~Be & Bi);
+        Age = Be ^ (~Bi & Bo);
+        Agi = Bi ^ (~Bo & Bu);
+        Ago = Bo ^ (~Bu & Ba);
+        Agu = Bu ^ (~Ba & Be);
+
+        Ba = rol(Ebe ^ De, 1);
+        Be = rol(Egi ^ Di, 6);
+        Bi = rol(Eko ^ Do, 25);
+        Bo = rol(Emu ^ Du, 8);
+        Bu = rol(Esa ^ Da, 18);
+        Aka = Ba ^ (~Be & Bi);
+        Ake = Be ^ (~Bi & Bo);
+        Aki = Bi ^ (~Bo & Bu);
+        Ako = Bo ^ (~Bu & Ba);
+        Aku = Bu ^ (~Ba & Be);
+
+        Ba = rol(Ebu ^ Du, 27);
+        Be = rol(Ega ^ Da, 4);
+        Bi = rol(Eke ^ De, 10);
+        Bo = rol(Emi ^ Di, 15);
+        Bu = rol(Eso ^ Do, 24);
+        Ama = Ba ^ (~Be & Bi);
+        Ame = Be ^ (~Bi & Bo);
+        Ami = Bi ^ (~Bo & Bu);
+        Amo = Bo ^ (~Bu & Ba);
+        Amu = Bu ^ (~Ba & Be);
+
+        Ba = rol(Ebi ^ Di, 30);
+        Be = rol(Ego ^ Do, 23);
+        Bi = rol(Eku ^ Du, 7);
+        Bo = rol(Ema ^ Da, 9);
+        Bu = rol(Ese ^ De, 2);
+        Asa = Ba ^ (~Be & Bi);
+        Ase = Be ^ (~Bi & Bo);
+        Asi = Bi ^ (~Bo & Bu);
+        Aso = Bo ^ (~Bu & Ba);
+        Asu = Bu ^ (~Ba & Be);
+    }
+
+    state[0] = Aba;
+    state[1] = Abe;
+    state[2] = Abi;
+    state[3] = Abo;
+    state[4] = Abu;
+    state[5] = Aga;
+    state[6] = Age;
+    state[7] = Agi;
+    state[8] = Ago;
+    state[9] = Agu;
+    state[10] = Aka;
+    state[11] = Ake;
+    state[12] = Aki;
+    state[13] = Ako;
+    state[14] = Aku;
+    state[15] = Ama;
+    state[16] = Ame;
+    state[17] = Ami;
+    state[18] = Amo;
+    state[19] = Amu;
+    state[20] = Asa;
+    state[21] = Ase;
+    state[22] = Asi;
+    state[23] = Aso;
+    state[24] = Asu;
+}
--- a/src/backend/common/WorkerJob.h
+++ b/src/backend/common/WorkerJob.h
@ -41,7 +41,7 @@ class WorkerJob
 {
 public:
    inline const Job &currentJob() const    { return m_jobs[index()]; }
-    inline uint32_t *nonce(size_t i = 0)    { return reinterpret_cast<uint32_t*>(blob() + (i * currentJob().size()) + 39); }
+    inline uint32_t *nonce(size_t i = 0)    { return reinterpret_cast<uint32_t*>(blob() + (i * currentJob().size()) + nonce_offset()); }
    inline uint64_t sequence() const        { return m_sequence; }
    inline uint8_t *blob()                  { return m_blobs[index()]; }
    inline uint8_t index() const            { return m_index; }
@ -88,6 +88,9 @@ public:


 private:
+    inline int32_t nonce_offset() const { return currentJob().nonce_offset(); }
+    inline size_t nonce_size() const { return currentJob().nonce_size(); }
+
    inline void save(const Job &job, uint32_t reserveCount, Nonce::Backend backend)
    {
        m_index           = job.index();
@ -115,7 +118,7 @@ private:
 template<>
 inline uint32_t *xmrig::WorkerJob<1>::nonce(size_t)
 {
-    return reinterpret_cast<uint32_t*>(blob() + 39);
+    return reinterpret_cast<uint32_t*>(blob() + nonce_offset());
 }


@ -125,11 +128,22 @@ inline bool xmrig::WorkerJob<1>::nextRound(uint32_t rounds, uint32_t roundSize)
    bool ok = true;
    m_rounds[index()]++;

+    uint32_t* n = nonce();
+    const uint32_t prev_nonce = *n;
+
    if ((m_rounds[index()] % rounds) == 0) {
-        *nonce() = Nonce::next(index(), *nonce(), rounds * roundSize, currentJob().isNicehash(), &ok);
+        *n = Nonce::next(index(), *n, rounds * roundSize, currentJob().isNicehash(), &ok);
    }
    else {
-        *nonce() += roundSize;
+        *n += roundSize;
+    }
+
+    // Increment higher 32 bits of a 64-bit nonce when lower 32 bits overflow
+    if (!currentJob().isNicehash() && (nonce_size() == sizeof(uint64_t)) && (*n < prev_nonce)) {
+        ++n[1];
+
+        Job& job = m_jobs[index()];
+        memcpy(job.blob(), blob(), job.size());
    }

    return ok;
--- a/src/backend/cpu/CpuConfig_gen.h
+++ b/src/backend/cpu/CpuConfig_gen.h
@ -60,10 +60,6 @@ size_t inline generate<Algorithm::CN>(Threads<CpuThreads> &threads, uint32_t lim
        ++count;
    }

-#   ifdef XMRIG_ALGO_CN_GPU
-    count += generate("cn/gpu", threads, Algorithm::CN_GPU, limit);
-#   endif
-
    return count;
 }

--- a/src/backend/cpu/CpuWorker.cpp
+++ b/src/backend/cpu/CpuWorker.cpp
@ -148,15 +148,7 @@ bool xmrig::CpuWorker<N>::selfTest()
                        verify(Algorithm::CN_ZLS,    test_output_zls)  &&
                        verify(Algorithm::CN_DOUBLE, test_output_double);

-#       ifdef XMRIG_ALGO_CN_GPU
-        if (!rc || N > 1) {
-            return rc;
-        }
-
-        return verify(Algorithm::CN_GPU, test_output_gpu);
-#       else
        return rc;
-#       endif
    }

 #   ifdef XMRIG_ALGO_CN_LITE
--- a/src/backend/cpu/platform/AdvancedCpuInfo.cpp
+++ b/src/backend/cpu/platform/AdvancedCpuInfo.cpp
@ -76,12 +76,6 @@ xmrig::CpuThreads xmrig::AdvancedCpuInfo::threads(const Algorithm &algorithm, ui
        return 1;
    }

-#   ifdef XMRIG_ALGO_CN_GPU
-    if (algorithm == Algorithm::CN_GPU) {
-        return CpuThreads(threads());
-    }
-#   endif
-
    size_t cache = 0;
    size_t count = 0;

--- a/src/backend/cpu/platform/BasicCpuInfo.cpp
+++ b/src/backend/cpu/platform/BasicCpuInfo.cpp
@ -212,12 +212,6 @@ xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint3
        return 1;
    }

-#   ifdef XMRIG_ALGO_CN_GPU
-    if (algorithm == Algorithm::CN_GPU) {
-        return count;
-    }
-#   endif
-
 #   ifdef XMRIG_ALGO_CN_LITE
    if (algorithm.family() == Algorithm::CN_LITE) {
        return CpuThreads(count, 1);
--- a/src/backend/cpu/platform/HwlocCpuInfo.cpp
+++ b/src/backend/cpu/platform/HwlocCpuInfo.cpp
@ -318,12 +318,6 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
    }
 #   endif

-#   ifdef XMRIG_ALGO_CN_GPU
-    if (algorithm == Algorithm::CN_GPU) {
-        cacheHashes = PUs;
-    }
-#   endif
-
 #   ifdef XMRIG_ALGO_RANDOMX
    if (extra == 0 && algorithm.l2() > 0) {
        cacheHashes = std::min<size_t>(std::max<size_t>(L2 / algorithm.l2(), cores.size()), cacheHashes);
--- a/src/backend/cuda/CudaConfig.cpp
+++ b/src/backend/cuda/CudaConfig.cpp
@ -181,6 +181,7 @@ void xmrig::CudaConfig::generate()
    count += xmrig::generate<Algorithm::CN_PICO>(m_threads, devices);
    count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, devices);
    count += xmrig::generate<Algorithm::ASTROBWT>(m_threads, devices);
+    count += xmrig::generate<Algorithm::KAWPOW>(m_threads, devices);

    generated    = true;
    m_shouldSave = count > 0;
--- a/src/backend/cuda/CudaConfig_gen.h
+++ b/src/backend/cuda/CudaConfig_gen.h
@ -64,10 +64,6 @@ size_t inline generate<Algorithm::CN>(Threads<CudaThreads> &threads, const std::
        count++;
    }

-#   ifdef XMRIG_ALGO_CN_GPU
-    count += generate("cn/gpu", threads, Algorithm::CN_GPU, devices);
-#   endif
-
    return count;
 }

@ -145,6 +141,15 @@ size_t inline generate<Algorithm::ASTROBWT>(Threads<CudaThreads> &threads, const
 #endif


+#ifdef XMRIG_ALGO_KAWPOW
+template<>
+size_t inline generate<Algorithm::KAWPOW>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
+{
+    return generate("kawpow", threads, Algorithm::KAWPOW_RVN, devices);
+}
+#endif
+
+
 } /* namespace xmrig */


--- a/src/backend/cuda/CudaWorker.cpp
+++ b/src/backend/cuda/CudaWorker.cpp
@ -44,6 +44,11 @@
 #endif


+#ifdef XMRIG_ALGO_KAWPOW
+#   include "backend/cuda/runners/CudaKawPowRunner.h"
+#endif
+
+
 #include <cassert>
 #include <thread>

@ -84,6 +89,12 @@ xmrig::CudaWorker::CudaWorker(size_t id, const CudaLaunchData &data) :
 #       endif
        break;

+    case Algorithm::KAWPOW:
+#       ifdef XMRIG_ALGO_KAWPOW
+        m_runner = new CudaKawPowRunner(id, data);
+#       endif
+        break;
+
    default:
        m_runner = new CudaCnRunner(id, data);
        break;
@ -138,7 +149,7 @@ void xmrig::CudaWorker::start()
        }

        while (!Nonce::isOutdated(Nonce::CUDA, m_job.sequence())) {
-            uint32_t foundNonce[10] = { 0 };
+            uint32_t foundNonce[16] = { 0 };
            uint32_t foundCount     = 0;

            if (!m_runner->run(*m_job.nonce(), &foundCount, foundNonce)) {
@ -150,7 +161,7 @@ void xmrig::CudaWorker::start()
            }

            const size_t batch_size = intensity();
-            if (!m_job.nextRound(roundSize(batch_size), batch_size)) {
+            if (!Nonce::isOutdated(Nonce::CUDA, m_job.sequence()) && !m_job.nextRound(roundSize(batch_size), batch_size)) {
                JobResults::done(m_job.currentJob());
            }

@ -174,7 +185,7 @@ bool xmrig::CudaWorker::consumeJob()
    const size_t batch_size = intensity();
    m_job.add(m_miner->job(), roundSize(batch_size) * batch_size, Nonce::CUDA);

-    return m_runner->set(m_job.currentJob(), m_job.blob());;
+    return m_runner->set(m_job.currentJob(), m_job.blob());
 }


--- a/src/backend/cuda/cuda.cmake
+++ b/src/backend/cuda/cuda.cmake
@ -52,6 +52,11 @@ if (WITH_CUDA)
       list(APPEND HEADERS_BACKEND_CUDA src/backend/cuda/runners/CudaAstroBWTRunner.h)
       list(APPEND SOURCES_BACKEND_CUDA src/backend/cuda/runners/CudaAstroBWTRunner.cpp)
   endif()
+
+   if (WITH_KAWPOW)
+       list(APPEND HEADERS_BACKEND_CUDA src/backend/cuda/runners/CudaKawPowRunner.h)
+       list(APPEND SOURCES_BACKEND_CUDA src/backend/cuda/runners/CudaKawPowRunner.cpp)
+   endif()
 else()
    remove_definitions(/DXMRIG_FEATURE_CUDA)
    remove_definitions(/DXMRIG_FEATURE_NVML)
--- a/src/backend/cuda/runners/CudaAstroBWTRunner.cpp
+++ b/src/backend/cuda/runners/CudaAstroBWTRunner.cpp
@ -27,8 +27,6 @@
 #include "backend/cuda/CudaLaunchData.h"
 #include "backend/cuda/wrappers/CudaLib.h"
 #include "base/net/stratum/Job.h"
-#include "crypto/rx/Rx.h"
-#include "crypto/rx/RxDataset.h"


 constexpr uint32_t xmrig::CudaAstroBWTRunner::BWT_DATA_STRIDE;
--- a/src/backend/cuda/runners/CudaKawPowRunner.cpp
+++ b/src/backend/cuda/runners/CudaKawPowRunner.cpp
@ -0,0 +1,78 @@
+/* XMRig
+ * Copyright 2010      Jeff Garzik <jgarzik@pobox.com>
+ * Copyright 2012-2014 pooler      <pooler@litecoinpool.org>
+ * Copyright 2014      Lucas Jones <https://github.com/lucasjones>
+ * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
+ * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
+ * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
+ * Copyright 2018-2020 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2016-2020 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include "backend/cuda/runners/CudaKawPowRunner.h"
+#include "backend/cuda/CudaLaunchData.h"
+#include "backend/cuda/wrappers/CudaLib.h"
+#include "base/io/log/Log.h"
+#include "base/net/stratum/Job.h"
+#include "base/tools/Chrono.h"
+
+#include "crypto/kawpow/KPCache.h"
+#include "crypto/kawpow/KPHash.h"
+
+#include "3rdparty/libethash/data_sizes.h"
+
+
+xmrig::CudaKawPowRunner::CudaKawPowRunner(size_t index, const CudaLaunchData &data) :
+    CudaBaseRunner(index, data)
+{
+}
+
+
+bool xmrig::CudaKawPowRunner::run(uint32_t /*startNonce*/, uint32_t *rescount, uint32_t *resnonce)
+{
+    return callWrapper(CudaLib::KawPowHash(m_ctx, m_jobBlob, m_target, rescount, resnonce));
+}
+
+
+bool xmrig::CudaKawPowRunner::set(const Job &job, uint8_t *blob)
+{
+    if (!CudaBaseRunner::set(job, blob)) {
+        return false;
+    }
+
+    m_jobBlob = blob;
+
+    const uint64_t height = job.height();
+    const uint32_t epoch = height / KPHash::EPOCH_LENGTH;
+
+    KPCache& cache = KPCache::s_cache;
+    {
+        std::lock_guard<std::mutex> lock(KPCache::s_cacheMutex);
+        cache.init(epoch);
+    }
+
+    const uint64_t start_ms = Chrono::steadyMSecs();
+
+    const bool result = CudaLib::KawPowPrepare(m_ctx, cache.data(), cache.size(), cache.dag_size(epoch), height, dag_sizes);
+
+    const int64_t dt = Chrono::steadyMSecs() - start_ms;
+    if (dt > 500) {
+        LOG_INFO("KawPow DAG for epoch %u calculated (%" PRIu64 " ms)", epoch, dt);
+    }
+
+    return result;
+}
--- a/src/backend/cuda/runners/CudaKawPowRunner.h
+++ b/src/backend/cuda/runners/CudaKawPowRunner.h
@ -5,8 +5,8 @@
 * Copyright 2014-2016 Wolf9466    <https://github.com/OhGodAPet>
 * Copyright 2016      Jay D Dee   <jayddee246@gmail.com>
 * Copyright 2017-2018 XMR-Stak    <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
- * Copyright 2018-2019 SChernykh   <https://github.com/SChernykh>
- * Copyright 2016-2019 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
+ * Copyright 2018-2020 SChernykh   <https://github.com/SChernykh>
+ * Copyright 2016-2020 XMRig       <https://github.com/xmrig>, <support@xmrig.com>
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@ -22,28 +22,31 @@
 *   along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

-#ifndef XMRIG_CN2RYOKERNEL_H
-#define XMRIG_CN2RYOKERNEL_H
+#ifndef XMRIG_CUDAKAWPOWRUNNER_H
+#define XMRIG_CUDAKAWPOWRUNNER_H


-#include "backend/opencl/wrappers/OclKernel.h"
+#include "backend/cuda/runners/CudaBaseRunner.h"


 namespace xmrig {


-class Cn2RyoKernel : public OclKernel
+class CudaKawPowRunner : public CudaBaseRunner
 {
 public:
-    inline Cn2RyoKernel(cl_program program) : OclKernel(program, "cn2") {}
+    CudaKawPowRunner(size_t index, const CudaLaunchData &data);

-    void enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
-    void setArgs(cl_mem scratchpads, cl_mem states, cl_mem output, uint32_t threads);
-    void setTarget(uint64_t target);
+protected:
+    bool run(uint32_t startNonce, uint32_t *rescount, uint32_t *resnonce) override;
+    bool set(const Job &job, uint8_t *blob) override;
+
+private:
+    uint8_t* m_jobBlob;
 };


-} // namespace xmrig
+} /* namespace xmrig */


-#endif /* XMRIG_CN2RYOKERNEL_H */
+#endif // XMRIG_CUDAKAWPOWRUNNER_H
--- a/src/backend/cuda/wrappers/CudaLib.cpp
+++ b/src/backend/cuda/wrappers/CudaLib.cpp
@ -29,6 +29,7 @@

 #include "backend/cuda/wrappers/CudaLib.h"
 #include "base/io/Env.h"
+#include "base/io/log/Log.h"
 #include "crypto/rx/RxAlgo.h"


@ -64,9 +65,10 @@ static const char *kPluginVersion                       = "pluginVersion";
 static const char *kRelease                             = "release";
 static const char *kRxHash                              = "rxHash";
 static const char *kRxPrepare                           = "rxPrepare";
+static const char *kKawPowHash                          = "KawPowHash";
+static const char *kKawPowPrepare                       = "KawPowPrepare";
 static const char *kSetJob                              = "setJob";
 static const char *kSetJob_v2                           = "setJob_v2";
-static const char *kSymbolNotFound                      = "symbol not found";
 static const char *kVersion                             = "version";


@ -88,6 +90,8 @@ using pluginVersion_t                                   = const char * (*)();
 using release_t                                         = void (*)(nvid_ctx *);
 using rxHash_t                                          = bool (*)(nvid_ctx *, uint32_t, uint64_t, uint32_t *, uint32_t *);
 using rxPrepare_t                                       = bool (*)(nvid_ctx *, const void *, size_t, bool, uint32_t);
+using KawPowHash_t                                      = bool (*)(nvid_ctx *, uint8_t*, uint64_t, uint32_t *, uint32_t *);
+using KawPowPrepare_t                                   = bool (*)(nvid_ctx *, const void *, size_t, size_t, uint32_t, const uint64_t*);
 using setJob_t                                          = bool (*)(nvid_ctx *, const void *, size_t, int32_t);
 using setJob_v2_t                                       = bool (*)(nvid_ctx *, const void *, size_t, const char *);
 using version_t                                         = uint32_t (*)(Version);
@ -111,12 +115,14 @@ static pluginVersion_t pPluginVersion                   = nullptr;
 static release_t pRelease                               = nullptr;
 static rxHash_t pRxHash                                 = nullptr;
 static rxPrepare_t pRxPrepare                           = nullptr;
+static KawPowHash_t pKawPowHash                         = nullptr;
+static KawPowPrepare_t pKawPowPrepare                   = nullptr;
 static setJob_t pSetJob                                 = nullptr;
 static setJob_v2_t pSetJob_v2                           = nullptr;
 static version_t pVersion                               = nullptr;


-#define DLSYM(x) if (uv_dlsym(&cudaLib, k##x, reinterpret_cast<void**>(&p##x)) == -1) { throw std::runtime_error(kSymbolNotFound); }
+#define DLSYM(x) if (uv_dlsym(&cudaLib, k##x, reinterpret_cast<void**>(&p##x)) == -1) { throw std::runtime_error("symbol not found (" #x ")"); }


 bool CudaLib::m_initialized = false;
@ -199,6 +205,18 @@ bool xmrig::CudaLib::rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datase
 }


+bool xmrig::CudaLib::KawPowHash(nvid_ctx *ctx, uint8_t* job_blob, uint64_t target, uint32_t *rescount, uint32_t *resnonce) noexcept
+{
+    return pKawPowHash(ctx, job_blob, target, rescount, resnonce);
+}
+
+
+bool xmrig::CudaLib::KawPowPrepare(nvid_ctx *ctx, const void* cache, size_t cache_size, size_t dag_size, uint32_t height, const uint64_t* dag_sizes) noexcept
+{
+    return pKawPowPrepare(ctx, cache, cache_size, dag_size, height, dag_sizes);
+}
+
+
 bool xmrig::CudaLib::setJob(nvid_ctx *ctx, const void *data, size_t size, const Algorithm &algorithm) noexcept
 {
    const Algorithm algo = RxAlgo::id(algorithm);
@ -323,7 +341,7 @@ bool xmrig::CudaLib::load()
        return false;
    }

-    if (pVersion(ApiVersion) != 3u) {
+    if (pVersion(ApiVersion) != 6u) {
        return false;
    }

@ -347,6 +365,8 @@ bool xmrig::CudaLib::load()
        DLSYM(RxPrepare);
        DLSYM(AstroBWTHash);
        DLSYM(AstroBWTPrepare);
+        DLSYM(KawPowHash);
+        DLSYM(KawPowPrepare);
        DLSYM(Version);

        if (!pDeviceInfo_v2) {
@ -357,6 +377,7 @@ bool xmrig::CudaLib::load()
            DLSYM(SetJob);
        }
    } catch (std::exception &ex) {
+        LOG_ERR("Error loading CUDA library: %s", ex.what());
        return false;
    }

--- a/src/backend/cuda/wrappers/CudaLib.h
+++ b/src/backend/cuda/wrappers/CudaLib.h
@ -80,6 +80,8 @@ public:
    static bool deviceInit(nvid_ctx *ctx) noexcept;
    static bool rxHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce) noexcept;
    static bool rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, bool dataset_host, uint32_t batchSize) noexcept;
+    static bool KawPowHash(nvid_ctx *ctx, uint8_t* job_blob, uint64_t target, uint32_t *rescount, uint32_t *resnonce) noexcept;
+    static bool KawPowPrepare(nvid_ctx *ctx, const void* cache, size_t cache_size, size_t dag_size, uint32_t height, const uint64_t* dag_sizes) noexcept;
    static bool setJob(nvid_ctx *ctx, const void *data, size_t size, const Algorithm &algorithm) noexcept;
    static const char *deviceName(nvid_ctx *ctx) noexcept;
    static const char *lastError(nvid_ctx *ctx) noexcept;
--- a/Show More
+++ b/Show More