Merge branch 'classic-dev' into classic
This commit is contained in:
@ -11,6 +11,7 @@ set(HEADERS
|
|||||||
algo/cryptonight/cryptonight_monero.h
|
algo/cryptonight/cryptonight_monero.h
|
||||||
algo/cryptonight/cryptonight_softaes.h
|
algo/cryptonight/cryptonight_softaes.h
|
||||||
algo/cryptonight/cryptonight_test.h
|
algo/cryptonight/cryptonight_test.h
|
||||||
|
algo/cryptonight/variant4_random_math.h
|
||||||
compat.h
|
compat.h
|
||||||
cpu.h
|
cpu.h
|
||||||
donate.h
|
donate.h
|
||||||
@ -29,6 +30,7 @@ set(HEADERS_CRYPTO
|
|||||||
crypto/c_blake256.h
|
crypto/c_blake256.h
|
||||||
crypto/c_jh.h
|
crypto/c_jh.h
|
||||||
crypto/c_skein.h
|
crypto/c_skein.h
|
||||||
|
crypto/soft_aes.h
|
||||||
)
|
)
|
||||||
|
|
||||||
set(HEADERS_COMPAT
|
set(HEADERS_COMPAT
|
||||||
@ -48,6 +50,10 @@ set(SOURCES
|
|||||||
algo/cryptonight/cryptonight_av2.c
|
algo/cryptonight/cryptonight_av2.c
|
||||||
algo/cryptonight/cryptonight_av3.c
|
algo/cryptonight/cryptonight_av3.c
|
||||||
algo/cryptonight/cryptonight_av4.c
|
algo/cryptonight/cryptonight_av4.c
|
||||||
|
algo/cryptonight/cryptonight_r_av1.c
|
||||||
|
algo/cryptonight/cryptonight_r_av2.c
|
||||||
|
algo/cryptonight/cryptonight_r_av3.c
|
||||||
|
algo/cryptonight/cryptonight_r_av4.c
|
||||||
util.c
|
util.c
|
||||||
options.c
|
options.c
|
||||||
stratum.c
|
stratum.c
|
||||||
@ -61,7 +67,6 @@ set(SOURCES_CRYPTO
|
|||||||
crypto/c_blake256.c
|
crypto/c_blake256.c
|
||||||
crypto/c_jh.c
|
crypto/c_jh.c
|
||||||
crypto/c_skein.c
|
crypto/c_skein.c
|
||||||
crypto/soft_aes.c
|
|
||||||
)
|
)
|
||||||
|
|
||||||
set(SOURCES_UTILS
|
set(SOURCES_UTILS
|
||||||
|
@ -4,9 +4,9 @@
|
|||||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
*
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@ -22,16 +22,15 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __CRYPTONIGHT_LITE_SOFTAES_H__
|
#ifndef XMRIG_CRYPTONIGHT_LITE_SOFTAES_H
|
||||||
#define __CRYPTONIGHT_LITE_SOFTAES_H__
|
#define XMRIG_CRYPTONIGHT_LITE_SOFTAES_H
|
||||||
|
|
||||||
|
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
|
||||||
extern __m128i soft_aesenc(__m128i in, __m128i key);
|
#include "crypto/soft_aes.h"
|
||||||
extern __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
|
|
||||||
|
|
||||||
|
|
||||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||||
@ -253,4 +252,4 @@ static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif /* __CRYPTONIGHT_LITE_SOFTAES_H__ */
|
#endif /* XMRIG_CRYPTONIGHT_LITE_SOFTAES_H */
|
||||||
|
@ -6,7 +6,8 @@
|
|||||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@ -38,9 +39,13 @@
|
|||||||
#include "crypto/c_groestl.h"
|
#include "crypto/c_groestl.h"
|
||||||
#include "crypto/c_jh.h"
|
#include "crypto/c_jh.h"
|
||||||
#include "crypto/c_skein.h"
|
#include "crypto/c_skein.h"
|
||||||
#include "cryptonight.h"
|
|
||||||
#include "cryptonight_test.h"
|
#include "cryptonight_test.h"
|
||||||
|
#include "cryptonight.h"
|
||||||
#include "options.h"
|
#include "options.h"
|
||||||
|
#include "persistent_memory.h"
|
||||||
|
|
||||||
|
|
||||||
|
static cn_hash_fun asm_func_map[AV_MAX][VARIANT_MAX][ASM_MAX] = {};
|
||||||
|
|
||||||
|
|
||||||
void cryptonight_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
void cryptonight_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
@ -56,6 +61,11 @@ void cryptonight_av4_v0(const uint8_t *input, size_t size, uint8_t *output, stru
|
|||||||
void cryptonight_av4_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
void cryptonight_av4_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
void cryptonight_av4_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
void cryptonight_av4_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
|
|
||||||
|
void cryptonight_r_av1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
|
void cryptonight_r_av2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
|
void cryptonight_r_av3(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
|
void cryptonight_r_av4(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
|
|
||||||
|
|
||||||
#ifndef XMRIG_NO_AEON
|
#ifndef XMRIG_NO_AEON
|
||||||
void cryptonight_lite_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
void cryptonight_lite_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
@ -72,7 +82,13 @@ void cryptonight_lite_av4_v1(const uint8_t *input, size_t size, uint8_t *output,
|
|||||||
#ifndef XMRIG_NO_ASM
|
#ifndef XMRIG_NO_ASM
|
||||||
void cryptonight_single_hash_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
void cryptonight_single_hash_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
void cryptonight_single_hash_asm_ryzen(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
void cryptonight_single_hash_asm_ryzen(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
|
void cryptonight_single_hash_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
void cryptonight_double_hash_asm(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
void cryptonight_double_hash_asm(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
|
|
||||||
|
void cryptonight_r_av1_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
|
void cryptonight_r_av1_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
|
void cryptonight_r_av2_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
|
void cryptonight_r_av2_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@ -89,6 +105,46 @@ static inline bool verify(enum Variant variant, uint8_t *output, struct cryptoni
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline bool verify2(enum Variant variant, uint8_t *output, struct cryptonight_ctx **ctx, const uint8_t *referenceValue)
|
||||||
|
{
|
||||||
|
cn_hash_fun func = cryptonight_hash_fn(opt_algo, opt_av, variant);
|
||||||
|
if (func == NULL) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opt_double_hash) {
|
||||||
|
uint8_t input[128];
|
||||||
|
|
||||||
|
for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) {
|
||||||
|
const size_t size = cn_r_test_input[i].size;
|
||||||
|
memcpy(input, cn_r_test_input[i].data, size);
|
||||||
|
memcpy(input + size, cn_r_test_input[i].data, size);
|
||||||
|
|
||||||
|
ctx[0]->height = ctx[1]->height = cn_r_test_input[i].height;
|
||||||
|
|
||||||
|
func(input, size, output, ctx);
|
||||||
|
|
||||||
|
if (memcmp(output, referenceValue + i * 32, 32) != 0 || memcmp(output + 32, referenceValue + i * 32, 32) != 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) {
|
||||||
|
ctx[0]->height = cn_r_test_input[i].height;
|
||||||
|
|
||||||
|
func(cn_r_test_input[i].data, cn_r_test_input[i].size, output, ctx);
|
||||||
|
|
||||||
|
if (memcmp(output, referenceValue + i * 32, 32) != 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool self_test() {
|
static bool self_test() {
|
||||||
struct cryptonight_ctx *ctx[2];
|
struct cryptonight_ctx *ctx[2];
|
||||||
uint8_t output[64];
|
uint8_t output[64];
|
||||||
@ -97,15 +153,18 @@ static bool self_test() {
|
|||||||
const size_t size = opt_algo == ALGO_CRYPTONIGHT ? MEMORY : MEMORY_LITE;
|
const size_t size = opt_algo == ALGO_CRYPTONIGHT ? MEMORY : MEMORY_LITE;
|
||||||
bool result = false;
|
bool result = false;
|
||||||
|
|
||||||
for (int i = 0; i < count; ++i) {
|
for (size_t i = 0; i < count; ++i) {
|
||||||
ctx[i] = _mm_malloc(sizeof(struct cryptonight_ctx), 16);
|
ctx[i] = _mm_malloc(sizeof(struct cryptonight_ctx), 16);
|
||||||
ctx[i]->memory = _mm_malloc(size, 16);
|
ctx[i]->memory = _mm_malloc(size, 16);
|
||||||
|
|
||||||
|
init_cn_r(ctx[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opt_algo == ALGO_CRYPTONIGHT) {
|
if (opt_algo == ALGO_CRYPTONIGHT) {
|
||||||
result = verify(VARIANT_0, output, ctx, test_output_v0) &&
|
result = verify(VARIANT_0, output, ctx, test_output_v0) &&
|
||||||
verify(VARIANT_1, output, ctx, test_output_v1) &&
|
verify(VARIANT_1, output, ctx, test_output_v1) &&
|
||||||
verify(VARIANT_2, output, ctx, test_output_v2);
|
verify(VARIANT_2, output, ctx, test_output_v2) &&
|
||||||
|
verify2(VARIANT_4, output, ctx, test_output_r);
|
||||||
}
|
}
|
||||||
# ifndef XMRIG_NO_AEON
|
# ifndef XMRIG_NO_AEON
|
||||||
else {
|
else {
|
||||||
@ -115,7 +174,7 @@ static bool self_test() {
|
|||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
|
||||||
for (int i = 0; i < count; ++i) {
|
for (size_t i = 0; i < count; ++i) {
|
||||||
_mm_free(ctx[i]->memory);
|
_mm_free(ctx[i]->memory);
|
||||||
_mm_free(ctx[i]);
|
_mm_free(ctx[i]);
|
||||||
}
|
}
|
||||||
@ -124,34 +183,20 @@ static bool self_test() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
size_t fn_index(enum Algo algorithm, enum AlgoVariant av, enum Variant variant, enum Assembly assembly)
|
#ifndef XMRIG_NO_ASM
|
||||||
|
cn_hash_fun cryptonight_hash_asm_fn(enum AlgoVariant av, enum Variant variant, enum Assembly assembly)
|
||||||
{
|
{
|
||||||
const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1;
|
|
||||||
|
|
||||||
# ifndef XMRIG_NO_ASM
|
|
||||||
if (assembly == ASM_AUTO) {
|
if (assembly == ASM_AUTO) {
|
||||||
assembly = cpu_info.assembly;
|
assembly = (enum Assembly) cpu_info.assembly;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (assembly == ASM_NONE) {
|
if (assembly == ASM_NONE) {
|
||||||
return index;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
const size_t offset = VARIANT_MAX * 4 * 2;
|
return asm_func_map[av][variant][assembly];
|
||||||
|
|
||||||
if (algorithm == ALGO_CRYPTONIGHT && variant == VARIANT_2) {
|
|
||||||
if (av == AV_SINGLE) {
|
|
||||||
return offset + assembly - 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (av == AV_DOUBLE) {
|
|
||||||
return offset + 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# endif
|
|
||||||
|
|
||||||
return index;
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant)
|
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant)
|
||||||
@ -160,10 +205,15 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
|
|||||||
assert(variant > VARIANT_AUTO && variant < VARIANT_MAX);
|
assert(variant > VARIANT_AUTO && variant < VARIANT_MAX);
|
||||||
|
|
||||||
# ifndef XMRIG_NO_ASM
|
# ifndef XMRIG_NO_ASM
|
||||||
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2 + 3] = {
|
if (algorithm == ALGO_CRYPTONIGHT) {
|
||||||
# else
|
cn_hash_fun fun = cryptonight_hash_asm_fn(av, variant, opt_assembly);
|
||||||
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = {
|
if (fun) {
|
||||||
|
return fun;
|
||||||
|
}
|
||||||
|
}
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = {
|
||||||
cryptonight_av1_v0,
|
cryptonight_av1_v0,
|
||||||
cryptonight_av2_v0,
|
cryptonight_av2_v0,
|
||||||
cryptonight_av3_v0,
|
cryptonight_av3_v0,
|
||||||
@ -177,6 +227,11 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
|
|||||||
cryptonight_av3_v2,
|
cryptonight_av3_v2,
|
||||||
cryptonight_av4_v2,
|
cryptonight_av4_v2,
|
||||||
|
|
||||||
|
cryptonight_r_av1,
|
||||||
|
cryptonight_r_av2,
|
||||||
|
cryptonight_r_av3,
|
||||||
|
cryptonight_r_av4,
|
||||||
|
|
||||||
# ifndef XMRIG_NO_AEON
|
# ifndef XMRIG_NO_AEON
|
||||||
cryptonight_lite_av1_v0,
|
cryptonight_lite_av1_v0,
|
||||||
cryptonight_lite_av2_v0,
|
cryptonight_lite_av2_v0,
|
||||||
@ -190,6 +245,10 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
|
|||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
|
NULL,
|
||||||
|
NULL,
|
||||||
|
NULL,
|
||||||
|
NULL,
|
||||||
# else
|
# else
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
@ -203,16 +262,15 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
|
|||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
# endif
|
NULL,
|
||||||
# ifndef XMRIG_NO_ASM
|
NULL,
|
||||||
cryptonight_single_hash_asm_intel,
|
NULL,
|
||||||
cryptonight_single_hash_asm_ryzen,
|
NULL,
|
||||||
cryptonight_double_hash_asm
|
|
||||||
# endif
|
# endif
|
||||||
};
|
};
|
||||||
|
|
||||||
# ifndef NDEBUG
|
# ifndef NDEBUG
|
||||||
const size_t index = fn_index(algorithm, av, variant, opt_assembly);
|
const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1;
|
||||||
|
|
||||||
cn_hash_fun func = func_table[index];
|
cn_hash_fun func = func_table[index];
|
||||||
|
|
||||||
@ -221,7 +279,7 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
|
|||||||
|
|
||||||
return func;
|
return func;
|
||||||
# else
|
# else
|
||||||
return func_table[fn_index(algorithm, av, variant, opt_assembly)];
|
return func_table[VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1];
|
||||||
# endif
|
# endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -230,6 +288,24 @@ bool cryptonight_init(int av)
|
|||||||
{
|
{
|
||||||
opt_double_hash = av == AV_DOUBLE || av == AV_DOUBLE_SOFT;
|
opt_double_hash = av == AV_DOUBLE || av == AV_DOUBLE_SOFT;
|
||||||
|
|
||||||
|
# ifndef XMRIG_NO_ASM
|
||||||
|
asm_func_map[AV_SINGLE][VARIANT_2][ASM_INTEL] = cryptonight_single_hash_asm_intel;
|
||||||
|
asm_func_map[AV_SINGLE][VARIANT_2][ASM_RYZEN] = cryptonight_single_hash_asm_intel;
|
||||||
|
asm_func_map[AV_SINGLE][VARIANT_2][ASM_BULLDOZER] = cryptonight_single_hash_asm_bulldozer;
|
||||||
|
|
||||||
|
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_INTEL] = cryptonight_double_hash_asm;
|
||||||
|
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_RYZEN] = cryptonight_double_hash_asm;
|
||||||
|
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_BULLDOZER] = cryptonight_double_hash_asm;
|
||||||
|
|
||||||
|
asm_func_map[AV_SINGLE][VARIANT_4][ASM_INTEL] = cryptonight_r_av1_asm_intel;
|
||||||
|
asm_func_map[AV_SINGLE][VARIANT_4][ASM_RYZEN] = cryptonight_r_av1_asm_intel;
|
||||||
|
asm_func_map[AV_SINGLE][VARIANT_4][ASM_BULLDOZER] = cryptonight_r_av1_asm_bulldozer;
|
||||||
|
|
||||||
|
asm_func_map[AV_DOUBLE][VARIANT_4][ASM_INTEL] = cryptonight_r_av2_asm_intel;
|
||||||
|
asm_func_map[AV_DOUBLE][VARIANT_4][ASM_RYZEN] = cryptonight_r_av2_asm_intel;
|
||||||
|
asm_func_map[AV_DOUBLE][VARIANT_4][ASM_BULLDOZER] = cryptonight_r_av2_asm_bulldozer;
|
||||||
|
# endif
|
||||||
|
|
||||||
return self_test();
|
return self_test();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -267,6 +343,10 @@ static inline enum Variant cryptonight_variant(uint8_t version)
|
|||||||
return VARIANT_1;
|
return VARIANT_1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (version >= 10) {
|
||||||
|
return VARIANT_4;
|
||||||
|
}
|
||||||
|
|
||||||
if (version >= 8) {
|
if (version >= 8) {
|
||||||
return VARIANT_2;
|
return VARIANT_2;
|
||||||
}
|
}
|
||||||
@ -276,7 +356,7 @@ static inline enum Variant cryptonight_variant(uint8_t version)
|
|||||||
|
|
||||||
|
|
||||||
#ifndef BUILD_TEST
|
#ifndef BUILD_TEST
|
||||||
int scanhash_cryptonight(int thr_id, uint32_t *hash, const uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx) {
|
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx) {
|
||||||
uint32_t *nonceptr = (uint32_t*) (((char*) blob) + 39);
|
uint32_t *nonceptr = (uint32_t*) (((char*) blob) + 39);
|
||||||
enum Variant variant = cryptonight_variant(blob[0]);
|
enum Variant variant = cryptonight_variant(blob[0]);
|
||||||
|
|
||||||
@ -296,7 +376,7 @@ int scanhash_cryptonight(int thr_id, uint32_t *hash, const uint8_t *restrict blo
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, const uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx) {
|
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx) {
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
uint32_t *nonceptr0 = (uint32_t*) (((char*) blob) + 39);
|
uint32_t *nonceptr0 = (uint32_t*) (((char*) blob) + 39);
|
||||||
uint32_t *nonceptr1 = (uint32_t*) (((char*) blob) + 39 + blob_size);
|
uint32_t *nonceptr1 = (uint32_t*) (((char*) blob) + 39 + blob_size);
|
||||||
|
@ -6,7 +6,8 @@
|
|||||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@ -38,9 +39,30 @@
|
|||||||
#define MEMORY_LITE 1048576 /* 1 MiB */
|
#define MEMORY_LITE 1048576 /* 1 MiB */
|
||||||
|
|
||||||
|
|
||||||
|
#if defined _MSC_VER || defined XMRIG_ARM
|
||||||
|
#define ABI_ATTRIBUTE
|
||||||
|
#else
|
||||||
|
#define ABI_ATTRIBUTE __attribute__((ms_abi))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
struct cryptonight_ctx;
|
||||||
|
typedef void(*cn_mainloop_fun_ms_abi)(struct cryptonight_ctx*) ABI_ATTRIBUTE;
|
||||||
|
typedef void(*cn_mainloop_double_fun_ms_abi)(struct cryptonight_ctx*, struct cryptonight_ctx*) ABI_ATTRIBUTE;
|
||||||
|
|
||||||
|
|
||||||
struct cryptonight_ctx {
|
struct cryptonight_ctx {
|
||||||
uint8_t state[224] __attribute__((aligned(16)));
|
uint8_t state[224] __attribute__((aligned(16)));
|
||||||
uint8_t* memory __attribute__((aligned(16)));
|
uint8_t *memory __attribute__((aligned(16)));
|
||||||
|
|
||||||
|
uint8_t unused[40];
|
||||||
|
const uint32_t *saes_table;
|
||||||
|
|
||||||
|
cn_mainloop_fun_ms_abi generated_code;
|
||||||
|
cn_mainloop_double_fun_ms_abi generated_code_double;
|
||||||
|
uint64_t generated_code_height;
|
||||||
|
uint64_t generated_code_double_height;
|
||||||
|
uint64_t height;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -52,7 +74,8 @@ extern void (* const extra_hashes[4])(const void *, size_t, char *);
|
|||||||
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant);
|
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant);
|
||||||
|
|
||||||
bool cryptonight_init(int av);
|
bool cryptonight_init(int av);
|
||||||
int scanhash_cryptonight(int thr_id, uint32_t *hash, const uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx);
|
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint8_t *blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *hashes_done, struct cryptonight_ctx **ctx);
|
||||||
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, const uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx);
|
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *hashes_done, struct cryptonight_ctx **ctx);
|
||||||
|
|
||||||
|
|
||||||
#endif /* XMRIG_CRYPTONIGHT_H */
|
#endif /* XMRIG_CRYPTONIGHT_H */
|
||||||
|
@ -196,6 +196,7 @@ void cryptonight_av1_v2(const uint8_t *restrict input, size_t size, uint8_t *res
|
|||||||
#ifndef XMRIG_NO_ASM
|
#ifndef XMRIG_NO_ASM
|
||||||
extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx);
|
extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx);
|
||||||
extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx);
|
extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx);
|
||||||
|
extern void cnv2_mainloop_bulldozer_asm(struct cryptonight_ctx *ctx);
|
||||||
extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1);
|
extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1);
|
||||||
|
|
||||||
|
|
||||||
@ -225,6 +226,19 @@ void cryptonight_single_hash_asm_ryzen(const uint8_t *restrict input, size_t siz
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void cryptonight_single_hash_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||||
|
{
|
||||||
|
keccak(input, size, ctx[0]->state, 200);
|
||||||
|
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||||
|
|
||||||
|
cnv2_mainloop_bulldozer_asm(ctx[0]);
|
||||||
|
|
||||||
|
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||||
|
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||||
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||||
{
|
{
|
||||||
keccak(input, size, ctx[0]->state, 200);
|
keccak(input, size, ctx[0]->state, 200);
|
||||||
|
@ -6,8 +6,8 @@
|
|||||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||||
* Copyright 2018 SChernykh <https://github.com/SChernykh>
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@ -29,6 +29,8 @@
|
|||||||
|
|
||||||
#include <fenv.h>
|
#include <fenv.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <x86intrin.h>
|
||||||
|
|
||||||
|
|
||||||
static inline __m128i int_sqrt_v2(const uint64_t n0)
|
static inline __m128i int_sqrt_v2(const uint64_t n0)
|
||||||
@ -87,6 +89,17 @@ static inline __m128i int_sqrt_v2(const uint64_t n0)
|
|||||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# define VARIANT4_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c) \
|
||||||
|
{ \
|
||||||
|
const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))); \
|
||||||
|
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
|
||||||
|
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
|
||||||
|
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
|
||||||
|
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
|
||||||
|
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
||||||
|
_c = _mm_xor_si128(_mm_xor_si128(_c, chunk3), _mm_xor_si128(chunk1, chunk2)); \
|
||||||
|
}
|
||||||
|
|
||||||
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
|
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
|
||||||
{ \
|
{ \
|
||||||
const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \
|
const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \
|
||||||
@ -99,4 +112,39 @@ static inline __m128i int_sqrt_v2(const uint64_t n0)
|
|||||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef NOINLINE
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#define NOINLINE __attribute__ ((noinline))
|
||||||
|
#elif _MSC_VER
|
||||||
|
#define NOINLINE __declspec(noinline)
|
||||||
|
#else
|
||||||
|
#define NOINLINE
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "variant4_random_math.h"
|
||||||
|
|
||||||
|
#define VARIANT4_RANDOM_MATH_INIT(part) \
|
||||||
|
uint32_t r##part[9]; \
|
||||||
|
struct V4_Instruction code##part[256]; \
|
||||||
|
{ \
|
||||||
|
r##part[0] = (uint32_t)(h##part[12]); \
|
||||||
|
r##part[1] = (uint32_t)(h##part[12] >> 32); \
|
||||||
|
r##part[2] = (uint32_t)(h##part[13]); \
|
||||||
|
r##part[3] = (uint32_t)(h##part[13] >> 32); \
|
||||||
|
} \
|
||||||
|
v4_random_math_init(code##part, ctx[part]->height);
|
||||||
|
|
||||||
|
#define VARIANT4_RANDOM_MATH(part, al, ah, cl, bx0, bx1) \
|
||||||
|
{ \
|
||||||
|
cl ^= (r##part[0] + r##part[1]) | ((uint64_t)(r##part[2] + r##part[3]) << 32); \
|
||||||
|
r##part[4] = (uint32_t)(al); \
|
||||||
|
r##part[5] = (uint32_t)(ah); \
|
||||||
|
r##part[6] = (uint32_t)(_mm_cvtsi128_si32(bx0)); \
|
||||||
|
r##part[7] = (uint32_t)(_mm_cvtsi128_si32(bx1)); \
|
||||||
|
r##part[8] = (uint32_t)(_mm_cvtsi128_si32(_mm_srli_si128(bx1, 8))); \
|
||||||
|
v4_random_math(code##part, r##part); \
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* XMRIG_CRYPTONIGHT_MONERO_H */
|
#endif /* XMRIG_CRYPTONIGHT_MONERO_H */
|
||||||
|
143
algo/cryptonight/cryptonight_r_av1.c
Normal file
143
algo/cryptonight/cryptonight_r_av1.c
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
/* XMRig
|
||||||
|
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||||
|
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||||
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
|
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||||
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
|
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||||
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <x86intrin.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "crypto/c_keccak.h"
|
||||||
|
#include "cryptonight.h"
|
||||||
|
#include "cryptonight_aesni.h"
|
||||||
|
#include "cryptonight_monero.h"
|
||||||
|
|
||||||
|
|
||||||
|
void cryptonight_r_av1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||||
|
{
|
||||||
|
keccak(input, size, ctx[0]->state, 200);
|
||||||
|
|
||||||
|
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||||
|
|
||||||
|
const uint8_t* l0 = ctx[0]->memory;
|
||||||
|
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||||
|
|
||||||
|
VARIANT2_INIT(0);
|
||||||
|
VARIANT2_SET_ROUNDING_MODE();
|
||||||
|
VARIANT4_RANDOM_MATH_INIT(0);
|
||||||
|
|
||||||
|
uint64_t al0 = h0[0] ^ h0[4];
|
||||||
|
uint64_t ah0 = h0[1] ^ h0[5];
|
||||||
|
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||||
|
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||||
|
|
||||||
|
uint64_t idx0 = al0;
|
||||||
|
|
||||||
|
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||||
|
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||||
|
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||||
|
|
||||||
|
cx = _mm_aesenc_si128(cx, ax0);
|
||||||
|
|
||||||
|
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
|
||||||
|
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||||
|
|
||||||
|
idx0 = _mm_cvtsi128_si64(cx);
|
||||||
|
|
||||||
|
uint64_t hi, lo, cl, ch;
|
||||||
|
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||||
|
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||||
|
|
||||||
|
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
|
||||||
|
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||||
|
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||||
|
|
||||||
|
lo = _umul128(idx0, cl, &hi);
|
||||||
|
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
|
||||||
|
|
||||||
|
al0 += hi;
|
||||||
|
ah0 += lo;
|
||||||
|
|
||||||
|
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||||
|
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||||
|
|
||||||
|
al0 ^= cl;
|
||||||
|
ah0 ^= ch;
|
||||||
|
idx0 = al0;
|
||||||
|
|
||||||
|
bx1 = bx0;
|
||||||
|
bx0 = cx;
|
||||||
|
}
|
||||||
|
|
||||||
|
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||||
|
|
||||||
|
keccakf(h0, 24);
|
||||||
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef XMRIG_NO_ASM
|
||||||
|
void v4_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM);
|
||||||
|
|
||||||
|
|
||||||
|
void cryptonight_r_av1_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||||
|
{
|
||||||
|
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||||
|
struct V4_Instruction code[256];
|
||||||
|
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||||
|
|
||||||
|
v4_compile_code(code, code_size, (void*)(ctx[0]->generated_code), ASM_INTEL);
|
||||||
|
ctx[0]->generated_code_height = ctx[0]->height;
|
||||||
|
}
|
||||||
|
|
||||||
|
keccak(input, size, ctx[0]->state, 200);
|
||||||
|
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||||
|
|
||||||
|
ctx[0]->generated_code(ctx[0]);
|
||||||
|
|
||||||
|
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||||
|
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||||
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void cryptonight_r_av1_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||||
|
{
|
||||||
|
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||||
|
struct V4_Instruction code[256];
|
||||||
|
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||||
|
|
||||||
|
v4_compile_code(code, code_size, (void*)(ctx[0]->generated_code), ASM_BULLDOZER);
|
||||||
|
ctx[0]->generated_code_height = ctx[0]->height;
|
||||||
|
}
|
||||||
|
|
||||||
|
keccak(input, size, ctx[0]->state, 200);
|
||||||
|
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||||
|
|
||||||
|
ctx[0]->generated_code(ctx[0]);
|
||||||
|
|
||||||
|
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||||
|
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||||
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||||
|
}
|
||||||
|
#endif
|
202
algo/cryptonight/cryptonight_r_av2.c
Normal file
202
algo/cryptonight/cryptonight_r_av2.c
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
/* XMRig
|
||||||
|
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||||
|
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||||
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
|
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||||
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
|
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||||
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <x86intrin.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "crypto/c_keccak.h"
|
||||||
|
#include "cryptonight.h"
|
||||||
|
#include "cryptonight_aesni.h"
|
||||||
|
#include "cryptonight_monero.h"
|
||||||
|
|
||||||
|
|
||||||
|
void cryptonight_r_av2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||||
|
{
|
||||||
|
keccak(input, size, ctx[0]->state, 200);
|
||||||
|
keccak(input + size, size, ctx[1]->state, 200);
|
||||||
|
|
||||||
|
const uint8_t* l0 = ctx[0]->memory;
|
||||||
|
const uint8_t* l1 = ctx[1]->memory;
|
||||||
|
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||||
|
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||||
|
|
||||||
|
VARIANT2_INIT(0);
|
||||||
|
VARIANT2_INIT(1);
|
||||||
|
VARIANT2_SET_ROUNDING_MODE();
|
||||||
|
VARIANT4_RANDOM_MATH_INIT(0);
|
||||||
|
VARIANT4_RANDOM_MATH_INIT(1);
|
||||||
|
|
||||||
|
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||||
|
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||||
|
|
||||||
|
uint64_t al0 = h0[0] ^ h0[4];
|
||||||
|
uint64_t al1 = h1[0] ^ h1[4];
|
||||||
|
uint64_t ah0 = h0[1] ^ h0[5];
|
||||||
|
uint64_t ah1 = h1[1] ^ h1[5];
|
||||||
|
|
||||||
|
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||||
|
__m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||||
|
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||||
|
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
|
||||||
|
|
||||||
|
uint64_t idx0 = al0;
|
||||||
|
uint64_t idx1 = al1;
|
||||||
|
|
||||||
|
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||||
|
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||||
|
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||||
|
|
||||||
|
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||||
|
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||||
|
|
||||||
|
cx0 = _mm_aesenc_si128(cx0, ax0);
|
||||||
|
cx1 = _mm_aesenc_si128(cx1, ax1);
|
||||||
|
|
||||||
|
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
|
||||||
|
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx00, cx0));
|
||||||
|
|
||||||
|
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
|
||||||
|
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx10, cx1));
|
||||||
|
|
||||||
|
idx0 = _mm_cvtsi128_si64(cx0);
|
||||||
|
idx1 = _mm_cvtsi128_si64(cx1);
|
||||||
|
|
||||||
|
uint64_t hi, lo, cl, ch;
|
||||||
|
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||||
|
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||||
|
|
||||||
|
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
|
||||||
|
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||||
|
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||||
|
|
||||||
|
lo = _umul128(idx0, cl, &hi);
|
||||||
|
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
|
||||||
|
|
||||||
|
al0 += hi;
|
||||||
|
ah0 += lo;
|
||||||
|
|
||||||
|
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||||
|
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||||
|
|
||||||
|
al0 ^= cl;
|
||||||
|
ah0 ^= ch;
|
||||||
|
idx0 = al0;
|
||||||
|
|
||||||
|
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||||
|
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||||
|
|
||||||
|
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
|
||||||
|
al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
|
||||||
|
ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
|
||||||
|
|
||||||
|
lo = _umul128(idx1, cl, &hi);
|
||||||
|
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
|
||||||
|
|
||||||
|
al1 += hi;
|
||||||
|
ah1 += lo;
|
||||||
|
|
||||||
|
((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||||
|
((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||||
|
|
||||||
|
al1 ^= cl;
|
||||||
|
ah1 ^= ch;
|
||||||
|
idx1 = al1;
|
||||||
|
|
||||||
|
bx01 = bx00;
|
||||||
|
bx11 = bx10;
|
||||||
|
|
||||||
|
bx00 = cx0;
|
||||||
|
bx10 = cx1;
|
||||||
|
}
|
||||||
|
|
||||||
|
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||||
|
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||||
|
|
||||||
|
keccakf(h0, 24);
|
||||||
|
keccakf(h1, 24);
|
||||||
|
|
||||||
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||||
|
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef XMRIG_NO_ASM
|
||||||
|
void v4_compile_code_double(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM);
|
||||||
|
|
||||||
|
|
||||||
|
void cryptonight_r_av2_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||||
|
{
|
||||||
|
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||||
|
struct V4_Instruction code[256];
|
||||||
|
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||||
|
v4_compile_code_double(code, code_size, (void*)(ctx[0]->generated_code_double), ASM_INTEL);
|
||||||
|
ctx[0]->generated_code_height = ctx[0]->height;
|
||||||
|
}
|
||||||
|
|
||||||
|
keccak(input, size, ctx[0]->state, 200);
|
||||||
|
keccak(input + size, size, ctx[1]->state, 200);
|
||||||
|
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||||
|
cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
|
||||||
|
|
||||||
|
ctx[0]->generated_code_double(ctx[0], ctx[1]);
|
||||||
|
|
||||||
|
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||||
|
cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
|
||||||
|
|
||||||
|
keccakf((uint64_t *) ctx[0]->state, 24);
|
||||||
|
keccakf((uint64_t *) ctx[1]->state, 24);
|
||||||
|
|
||||||
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||||
|
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void cryptonight_r_av2_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||||
|
{
|
||||||
|
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||||
|
struct V4_Instruction code[256];
|
||||||
|
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||||
|
v4_compile_code_double(code, code_size, (void*)(ctx[0]->generated_code_double), ASM_BULLDOZER);
|
||||||
|
ctx[0]->generated_code_height = ctx[0]->height;
|
||||||
|
}
|
||||||
|
|
||||||
|
keccak(input, size, ctx[0]->state, 200);
|
||||||
|
keccak(input + size, size, ctx[1]->state, 200);
|
||||||
|
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||||
|
cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
|
||||||
|
|
||||||
|
ctx[0]->generated_code_double(ctx[0], ctx[1]);
|
||||||
|
|
||||||
|
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||||
|
cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
|
||||||
|
|
||||||
|
keccakf((uint64_t *) ctx[0]->state, 24);
|
||||||
|
keccakf((uint64_t *) ctx[1]->state, 24);
|
||||||
|
|
||||||
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||||
|
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||||
|
}
|
||||||
|
#endif
|
112
algo/cryptonight/cryptonight_r_av3.c
Normal file
112
algo/cryptonight/cryptonight_r_av3.c
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
/* XMRig
|
||||||
|
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||||
|
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||||
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
|
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||||
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
|
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||||
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <x86intrin.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "crypto/c_keccak.h"
|
||||||
|
#include "cryptonight.h"
|
||||||
|
#include "cryptonight_monero.h"
|
||||||
|
#include "cryptonight_softaes.h"
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef XMRIG_NO_ASM
|
||||||
|
void v4_soft_aes_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
void cryptonight_r_av3(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||||
|
{
|
||||||
|
keccak(input, size, ctx[0]->state, 200);
|
||||||
|
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||||
|
|
||||||
|
# ifndef XMRIG_NO_ASM
|
||||||
|
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||||
|
struct V4_Instruction code[256];
|
||||||
|
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||||
|
|
||||||
|
v4_soft_aes_compile_code(code, code_size, (void*)(ctx[0]->generated_code), ASM_NONE);
|
||||||
|
ctx[0]->generated_code_height = ctx[0]->height;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx[0]->saes_table = (const uint32_t*)saes_table;
|
||||||
|
ctx[0]->generated_code(ctx[0]);
|
||||||
|
# else
|
||||||
|
const uint8_t* l0 = ctx[0]->memory;
|
||||||
|
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||||
|
|
||||||
|
VARIANT2_INIT(0);
|
||||||
|
VARIANT2_SET_ROUNDING_MODE();
|
||||||
|
VARIANT4_RANDOM_MATH_INIT(0);
|
||||||
|
|
||||||
|
uint64_t al0 = h0[0] ^ h0[4];
|
||||||
|
uint64_t ah0 = h0[1] ^ h0[5];
|
||||||
|
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||||
|
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||||
|
|
||||||
|
uint64_t idx0 = al0;
|
||||||
|
|
||||||
|
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||||
|
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||||
|
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||||
|
|
||||||
|
cx = soft_aesenc(cx, ax0);
|
||||||
|
|
||||||
|
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
|
||||||
|
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||||
|
|
||||||
|
idx0 = _mm_cvtsi128_si64(cx);
|
||||||
|
|
||||||
|
uint64_t hi, lo, cl, ch;
|
||||||
|
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||||
|
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||||
|
|
||||||
|
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
|
||||||
|
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||||
|
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||||
|
|
||||||
|
lo = _umul128(idx0, cl, &hi);
|
||||||
|
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
|
||||||
|
|
||||||
|
al0 += hi;
|
||||||
|
ah0 += lo;
|
||||||
|
|
||||||
|
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||||
|
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||||
|
|
||||||
|
al0 ^= cl;
|
||||||
|
ah0 ^= ch;
|
||||||
|
idx0 = al0;
|
||||||
|
|
||||||
|
bx1 = bx0;
|
||||||
|
bx0 = cx;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
|
|
||||||
|
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||||
|
keccakf((uint64_t *) ctx[0]->state, 24);
|
||||||
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||||
|
}
|
143
algo/cryptonight/cryptonight_r_av4.c
Normal file
143
algo/cryptonight/cryptonight_r_av4.c
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
/* XMRig
|
||||||
|
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||||
|
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||||
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
|
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||||
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
|
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||||
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <x86intrin.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "crypto/c_keccak.h"
|
||||||
|
#include "cryptonight.h"
|
||||||
|
#include "cryptonight_monero.h"
|
||||||
|
#include "cryptonight_softaes.h"
|
||||||
|
|
||||||
|
|
||||||
|
void cryptonight_r_av4(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||||
|
{
|
||||||
|
keccak(input, size, ctx[0]->state, 200);
|
||||||
|
keccak(input + size, size, ctx[1]->state, 200);
|
||||||
|
|
||||||
|
const uint8_t* l0 = ctx[0]->memory;
|
||||||
|
const uint8_t* l1 = ctx[1]->memory;
|
||||||
|
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||||
|
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||||
|
|
||||||
|
VARIANT2_INIT(0);
|
||||||
|
VARIANT2_INIT(1);
|
||||||
|
VARIANT2_SET_ROUNDING_MODE();
|
||||||
|
VARIANT4_RANDOM_MATH_INIT(0);
|
||||||
|
VARIANT4_RANDOM_MATH_INIT(1);
|
||||||
|
|
||||||
|
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||||
|
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||||
|
|
||||||
|
uint64_t al0 = h0[0] ^ h0[4];
|
||||||
|
uint64_t al1 = h1[0] ^ h1[4];
|
||||||
|
uint64_t ah0 = h0[1] ^ h0[5];
|
||||||
|
uint64_t ah1 = h1[1] ^ h1[5];
|
||||||
|
|
||||||
|
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||||
|
__m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||||
|
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||||
|
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
|
||||||
|
|
||||||
|
uint64_t idx0 = al0;
|
||||||
|
uint64_t idx1 = al1;
|
||||||
|
|
||||||
|
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||||
|
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||||
|
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||||
|
|
||||||
|
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||||
|
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||||
|
|
||||||
|
cx0 = soft_aesenc(cx0, ax0);
|
||||||
|
cx1 = soft_aesenc(cx1, ax1);
|
||||||
|
|
||||||
|
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
|
||||||
|
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx00, cx0));
|
||||||
|
|
||||||
|
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
|
||||||
|
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx10, cx1));
|
||||||
|
|
||||||
|
idx0 = _mm_cvtsi128_si64(cx0);
|
||||||
|
idx1 = _mm_cvtsi128_si64(cx1);
|
||||||
|
|
||||||
|
uint64_t hi, lo, cl, ch;
|
||||||
|
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||||
|
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||||
|
|
||||||
|
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
|
||||||
|
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||||
|
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||||
|
|
||||||
|
lo = _umul128(idx0, cl, &hi);
|
||||||
|
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
|
||||||
|
|
||||||
|
al0 += hi;
|
||||||
|
ah0 += lo;
|
||||||
|
|
||||||
|
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||||
|
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||||
|
|
||||||
|
al0 ^= cl;
|
||||||
|
ah0 ^= ch;
|
||||||
|
idx0 = al0;
|
||||||
|
|
||||||
|
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||||
|
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||||
|
|
||||||
|
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
|
||||||
|
al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
|
||||||
|
ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
|
||||||
|
|
||||||
|
lo = _umul128(idx1, cl, &hi);
|
||||||
|
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
|
||||||
|
|
||||||
|
al1 += hi;
|
||||||
|
ah1 += lo;
|
||||||
|
|
||||||
|
((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||||
|
((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||||
|
|
||||||
|
al1 ^= cl;
|
||||||
|
ah1 ^= ch;
|
||||||
|
idx1 = al1;
|
||||||
|
|
||||||
|
bx01 = bx00;
|
||||||
|
bx11 = bx10;
|
||||||
|
|
||||||
|
bx00 = cx0;
|
||||||
|
bx10 = cx1;
|
||||||
|
}
|
||||||
|
|
||||||
|
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||||
|
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||||
|
|
||||||
|
keccakf(h0, 24);
|
||||||
|
keccakf(h1, 24);
|
||||||
|
|
||||||
|
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||||
|
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||||
|
}
|
@ -4,9 +4,9 @@
|
|||||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
*
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@ -30,8 +30,7 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
|
||||||
extern __m128i soft_aesenc(__m128i in, __m128i key);
|
#include "crypto/soft_aes.h"
|
||||||
extern __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
|
|
||||||
|
|
||||||
|
|
||||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||||
|
@ -6,8 +6,8 @@
|
|||||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||||
* Copyright 2018 SChernykh <https://github.com/SChernykh>
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@ -27,6 +27,9 @@
|
|||||||
#define XMRIG_CRYPTONIGHT_TEST_H
|
#define XMRIG_CRYPTONIGHT_TEST_H
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
|
||||||
const static uint8_t test_input[152] = {
|
const static uint8_t test_input[152] = {
|
||||||
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
|
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
|
||||||
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
|
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
|
||||||
@ -67,6 +70,42 @@ const static uint8_t test_output_v2[64] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct cn_r_test_input_data
|
||||||
|
{
|
||||||
|
uint64_t height;
|
||||||
|
size_t size;
|
||||||
|
uint8_t data[64];
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const static struct cn_r_test_input_data cn_r_test_input[] = {
|
||||||
|
{ 1806260, 44, { 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74 } },
|
||||||
|
{ 1806261, 50, { 0x4c, 0x6f, 0x72, 0x65, 0x6d, 0x20, 0x69, 0x70, 0x73, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x73, 0x69, 0x74, 0x20, 0x61, 0x6d, 0x65, 0x74, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x63, 0x74, 0x65, 0x74, 0x75, 0x72, 0x20, 0x61, 0x64, 0x69, 0x70, 0x69, 0x73, 0x63, 0x69, 0x6e, 0x67 } },
|
||||||
|
{ 1806262, 48, { 0x65, 0x6c, 0x69, 0x74, 0x2c, 0x20, 0x73, 0x65, 0x64, 0x20, 0x64, 0x6f, 0x20, 0x65, 0x69, 0x75, 0x73, 0x6d, 0x6f, 0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x63, 0x69, 0x64, 0x69, 0x64, 0x75, 0x6e, 0x74, 0x20, 0x75, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x65 } },
|
||||||
|
{ 1806263, 48, { 0x65, 0x74, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x6d, 0x61, 0x67, 0x6e, 0x61, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x61, 0x2e, 0x20, 0x55, 0x74, 0x20, 0x65, 0x6e, 0x69, 0x6d, 0x20, 0x61, 0x64, 0x20, 0x6d, 0x69, 0x6e, 0x69, 0x6d, 0x20, 0x76, 0x65, 0x6e, 0x69, 0x61, 0x6d, 0x2c } },
|
||||||
|
{ 1806264, 46, { 0x71, 0x75, 0x69, 0x73, 0x20, 0x6e, 0x6f, 0x73, 0x74, 0x72, 0x75, 0x64, 0x20, 0x65, 0x78, 0x65, 0x72, 0x63, 0x69, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x75, 0x6c, 0x6c, 0x61, 0x6d, 0x63, 0x6f, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x69, 0x73, 0x20, 0x6e, 0x69, 0x73, 0x69 } },
|
||||||
|
{ 1806265, 45, { 0x75, 0x74, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x69, 0x70, 0x20, 0x65, 0x78, 0x20, 0x65, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x64, 0x6f, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x71, 0x75, 0x61, 0x74, 0x2e, 0x20, 0x44, 0x75, 0x69, 0x73, 0x20, 0x61, 0x75, 0x74, 0x65 } },
|
||||||
|
{ 1806266, 47, { 0x69, 0x72, 0x75, 0x72, 0x65, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x20, 0x72, 0x65, 0x70, 0x72, 0x65, 0x68, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x69, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x76, 0x6f, 0x6c, 0x75, 0x70, 0x74, 0x61, 0x74, 0x65, 0x20, 0x76, 0x65, 0x6c, 0x69, 0x74 } },
|
||||||
|
{ 1806267, 44, { 0x65, 0x73, 0x73, 0x65, 0x20, 0x63, 0x69, 0x6c, 0x6c, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x65, 0x75, 0x20, 0x66, 0x75, 0x67, 0x69, 0x61, 0x74, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x20, 0x70, 0x61, 0x72, 0x69, 0x61, 0x74, 0x75, 0x72, 0x2e } },
|
||||||
|
{ 1806268, 47, { 0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x65, 0x75, 0x72, 0x20, 0x73, 0x69, 0x6e, 0x74, 0x20, 0x6f, 0x63, 0x63, 0x61, 0x65, 0x63, 0x61, 0x74, 0x20, 0x63, 0x75, 0x70, 0x69, 0x64, 0x61, 0x74, 0x61, 0x74, 0x20, 0x6e, 0x6f, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x2c } },
|
||||||
|
{ 1806269, 62, { 0x73, 0x75, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x63, 0x75, 0x6c, 0x70, 0x61, 0x20, 0x71, 0x75, 0x69, 0x20, 0x6f, 0x66, 0x66, 0x69, 0x63, 0x69, 0x61, 0x20, 0x64, 0x65, 0x73, 0x65, 0x72, 0x75, 0x6e, 0x74, 0x20, 0x6d, 0x6f, 0x6c, 0x6c, 0x69, 0x74, 0x20, 0x61, 0x6e, 0x69, 0x6d, 0x20, 0x69, 0x64, 0x20, 0x65, 0x73, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x75, 0x6d, 0x2e } },
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// "cn/r"
|
||||||
|
const static uint8_t test_output_r[] = {
|
||||||
|
0xf7, 0x59, 0x58, 0x8a, 0xd5, 0x7e, 0x75, 0x84, 0x67, 0x29, 0x54, 0x43, 0xa9, 0xbd, 0x71, 0x49, 0x0a, 0xbf, 0xf8, 0xe9, 0xda, 0xd1, 0xb9, 0x5b, 0x6b, 0xf2, 0xf5, 0xd0, 0xd7, 0x83, 0x87, 0xbc,
|
||||||
|
0x5b, 0xb8, 0x33, 0xde, 0xca, 0x2b, 0xdd, 0x72, 0x52, 0xa9, 0xcc, 0xd7, 0xb4, 0xce, 0x0b, 0x6a, 0x48, 0x54, 0x51, 0x57, 0x94, 0xb5, 0x6c, 0x20, 0x72, 0x62, 0xf7, 0xa5, 0xb9, 0xbd, 0xb5, 0x66,
|
||||||
|
0x1e, 0xe6, 0x72, 0x8d, 0xa6, 0x0f, 0xbd, 0x8d, 0x7d, 0x55, 0xb2, 0xb1, 0xad, 0xe4, 0x87, 0xa3, 0xcf, 0x52, 0xa2, 0xc3, 0xac, 0x6f, 0x52, 0x0d, 0xb1, 0x2c, 0x27, 0xd8, 0x92, 0x1f, 0x6c, 0xab,
|
||||||
|
0x69, 0x69, 0xfe, 0x2d, 0xdf, 0xb7, 0x58, 0x43, 0x8d, 0x48, 0x04, 0x9f, 0x30, 0x2f, 0xc2, 0x10, 0x8a, 0x4f, 0xcc, 0x93, 0xe3, 0x76, 0x69, 0x17, 0x0e, 0x6d, 0xb4, 0xb0, 0xb9, 0xb4, 0xc4, 0xcb,
|
||||||
|
0x7f, 0x30, 0x48, 0xb4, 0xe9, 0x0d, 0x0c, 0xbe, 0x7a, 0x57, 0xc0, 0x39, 0x4f, 0x37, 0x33, 0x8a, 0x01, 0xfa, 0xe3, 0xad, 0xfd, 0xc0, 0xe5, 0x12, 0x6d, 0x86, 0x3a, 0x89, 0x5e, 0xb0, 0x4e, 0x02,
|
||||||
|
0x1d, 0x29, 0x04, 0x43, 0xa4, 0xb5, 0x42, 0xaf, 0x04, 0xa8, 0x2f, 0x6b, 0x24, 0x94, 0xa6, 0xee, 0x7f, 0x20, 0xf2, 0x75, 0x4c, 0x58, 0xe0, 0x84, 0x90, 0x32, 0x48, 0x3a, 0x56, 0xe8, 0xe2, 0xef,
|
||||||
|
0xc4, 0x3c, 0xc6, 0x56, 0x74, 0x36, 0xa8, 0x6a, 0xfb, 0xd6, 0xaa, 0x9e, 0xaa, 0x7c, 0x27, 0x6e, 0x98, 0x06, 0x83, 0x03, 0x34, 0xb6, 0x14, 0xb2, 0xbe, 0xe2, 0x3c, 0xc7, 0x66, 0x34, 0xf6, 0xfd,
|
||||||
|
0x87, 0xbe, 0x24, 0x79, 0xc0, 0xc4, 0xe8, 0xed, 0xfd, 0xfa, 0xa5, 0x60, 0x3e, 0x93, 0xf4, 0x26, 0x5b, 0x3f, 0x82, 0x24, 0xc1, 0xc5, 0x94, 0x6f, 0xeb, 0x42, 0x48, 0x19, 0xd1, 0x89, 0x90, 0xa4,
|
||||||
|
0xdd, 0x9d, 0x6a, 0x6d, 0x8e, 0x47, 0x46, 0x5c, 0xce, 0xac, 0x08, 0x77, 0xef, 0x88, 0x9b, 0x93, 0xe7, 0xeb, 0xa9, 0x79, 0x55, 0x7e, 0x39, 0x35, 0xd7, 0xf8, 0x6d, 0xce, 0x11, 0xb0, 0x70, 0xf3,
|
||||||
|
0x75, 0xc6, 0xf2, 0xae, 0x49, 0xa2, 0x05, 0x21, 0xde, 0x97, 0x28, 0x5b, 0x43, 0x1e, 0x71, 0x71, 0x25, 0x84, 0x7f, 0xb8, 0x93, 0x5e, 0xd8, 0x4a, 0x61, 0xe7, 0xf8, 0xd3, 0x6a, 0x2c, 0x3d, 0x8e,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
#ifndef XMRIG_NO_AEON
|
#ifndef XMRIG_NO_AEON
|
||||||
const static uint8_t test_output_v0_lite[64] = {
|
const static uint8_t test_output_v0_lite[64] = {
|
||||||
|
449
algo/cryptonight/variant4_random_math.h
Normal file
449
algo/cryptonight/variant4_random_math.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,30 +1,24 @@
|
|||||||
if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
set(XMRIG_ASM_LIBRARY "xmrig-asm")
|
set(XMRIG_ASM_LIBRARY "xmrig-asm")
|
||||||
|
|
||||||
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
enable_language(ASM)
|
||||||
enable_language(ASM_MASM)
|
|
||||||
|
|
||||||
if (MSVC_TOOLSET_VERSION GREATER_EQUAL 141)
|
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
|
||||||
set(XMRIG_ASM_FILE "crypto/asm/cnv2_main_loop.asm")
|
set(XMRIG_ASM_FILES
|
||||||
else()
|
"crypto/asm/win64/cn_main_loop.S"
|
||||||
set(XMRIG_ASM_FILE "crypto/asm/win64/cnv2_main_loop.asm")
|
"crypto/asm/CryptonightR_template.S"
|
||||||
endif()
|
)
|
||||||
|
|
||||||
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM)
|
|
||||||
else()
|
else()
|
||||||
enable_language(ASM)
|
set(XMRIG_ASM_FILES
|
||||||
|
"crypto/asm/cn_main_loop.S"
|
||||||
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
|
"crypto/asm/CryptonightR_template.S"
|
||||||
set(XMRIG_ASM_FILE "crypto/asm/win64/cnv2_main_loop.S")
|
)
|
||||||
else()
|
|
||||||
set(XMRIG_ASM_FILE "crypto/asm/cnv2_main_loop.S")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C)
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILE})
|
set_property(SOURCE ${XMRIG_ASM_FILES} PROPERTY C)
|
||||||
set(XMRIG_ASM_SOURCES "")
|
|
||||||
|
add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILES})
|
||||||
|
set(XMRIG_ASM_SOURCES "crypto/CryptonightR_gen.c")
|
||||||
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
|
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
|
||||||
else()
|
else()
|
||||||
set(XMRIG_ASM_SOURCES "")
|
set(XMRIG_ASM_SOURCES "")
|
||||||
|
23
cpu.c
23
cpu.c
@ -4,8 +4,9 @@
|
|||||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
*
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@ -64,20 +65,20 @@ void cpu_init_common() {
|
|||||||
|
|
||||||
if (data.flags[CPU_FEATURE_AES]) {
|
if (data.flags[CPU_FEATURE_AES]) {
|
||||||
cpu_info.flags |= CPU_FLAG_AES;
|
cpu_info.flags |= CPU_FLAG_AES;
|
||||||
|
|
||||||
# ifndef XMRIG_NO_ASM
|
|
||||||
if (data.vendor == VENDOR_AMD) {
|
|
||||||
cpu_info.assembly = ASM_RYZEN;
|
|
||||||
}
|
|
||||||
else if (data.vendor == VENDOR_INTEL) {
|
|
||||||
cpu_info.assembly = ASM_INTEL;
|
|
||||||
}
|
|
||||||
# endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (data.flags[CPU_FEATURE_BMI2]) {
|
if (data.flags[CPU_FEATURE_BMI2]) {
|
||||||
cpu_info.flags |= CPU_FLAG_BMI2;
|
cpu_info.flags |= CPU_FLAG_BMI2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ifndef XMRIG_NO_ASM
|
||||||
|
if (data.vendor == VENDOR_AMD) {
|
||||||
|
cpu_info.assembly = (data.ext_family >= 23) ? ASM_RYZEN : ASM_BULLDOZER;
|
||||||
|
}
|
||||||
|
else if (data.vendor == VENDOR_INTEL) {
|
||||||
|
cpu_info.assembly = ASM_INTEL;
|
||||||
|
}
|
||||||
|
# endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
5
cpu.h
5
cpu.h
@ -4,8 +4,9 @@
|
|||||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
*
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
146
crypto/CryptonightR_gen.c
Normal file
146
crypto/CryptonightR_gen.c
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
/* XMRig
|
||||||
|
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||||
|
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||||
|
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||||
|
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||||
|
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||||
|
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||||
|
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||||
|
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "algo/cryptonight/cryptonight_monero.h"
|
||||||
|
#include "crypto/asm/CryptonightR_template.h"
|
||||||
|
#include "persistent_memory.h"
|
||||||
|
|
||||||
|
|
||||||
|
static inline void add_code(uint8_t **p, void (*p1)(), void (*p2)())
|
||||||
|
{
|
||||||
|
const ptrdiff_t size = (const uint8_t*)(p2) - (const uint8_t*)(p1);
|
||||||
|
if (size > 0) {
|
||||||
|
memcpy(*p, (const void *) p1, size);
|
||||||
|
*p += size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void add_random_math(uint8_t **p, const struct V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, enum Assembly ASM)
|
||||||
|
{
|
||||||
|
uint32_t prev_rot_src = (uint32_t)(-1);
|
||||||
|
|
||||||
|
for (int i = 0;; ++i) {
|
||||||
|
const struct V4_Instruction inst = code[i];
|
||||||
|
if (inst.opcode == RET) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2);
|
||||||
|
uint8_t dst_index = inst.dst_index;
|
||||||
|
uint8_t src_index = inst.src_index;
|
||||||
|
|
||||||
|
const uint32_t a = inst.dst_index;
|
||||||
|
const uint32_t b = inst.src_index;
|
||||||
|
const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (((src_index == 8) ? dst_index : src_index) << (V4_OPCODE_BITS + V4_DST_INDEX_BITS));
|
||||||
|
|
||||||
|
switch (inst.opcode) {
|
||||||
|
case ROR:
|
||||||
|
case ROL:
|
||||||
|
if (b != prev_rot_src) {
|
||||||
|
prev_rot_src = b;
|
||||||
|
add_code(p, instructions_mov[c], instructions_mov[c + 1]);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a == prev_rot_src) {
|
||||||
|
prev_rot_src = (uint32_t)(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void_func begin = instructions[c];
|
||||||
|
|
||||||
|
if ((ASM = ASM_BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) {
|
||||||
|
// AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL
|
||||||
|
// Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41
|
||||||
|
uint8_t* prefix = (uint8_t*) begin;
|
||||||
|
|
||||||
|
if (*prefix == 0x49) {
|
||||||
|
**p = 0x41;
|
||||||
|
*p += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
begin = (void_func)(prefix + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
add_code(p, begin, instructions[c + 1]);
|
||||||
|
|
||||||
|
if (inst.opcode == ADD) {
|
||||||
|
*(uint32_t*)(*p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C;
|
||||||
|
if (is_64_bit) {
|
||||||
|
prev_rot_src = (uint32_t)(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void v4_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM)
|
||||||
|
{
|
||||||
|
uint8_t* p0 = machine_code;
|
||||||
|
uint8_t* p = p0;
|
||||||
|
|
||||||
|
add_code(&p, CryptonightR_template_part1, CryptonightR_template_part2);
|
||||||
|
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||||
|
add_code(&p, CryptonightR_template_part2, CryptonightR_template_part3);
|
||||||
|
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0));
|
||||||
|
add_code(&p, CryptonightR_template_part3, CryptonightR_template_end);
|
||||||
|
|
||||||
|
flush_instruction_cache(machine_code, p - p0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void v4_compile_code_double(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM)
|
||||||
|
{
|
||||||
|
uint8_t* p0 = (uint8_t*) machine_code;
|
||||||
|
uint8_t* p = p0;
|
||||||
|
|
||||||
|
add_code(&p, CryptonightR_template_double_part1, CryptonightR_template_double_part2);
|
||||||
|
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||||
|
add_code(&p, CryptonightR_template_double_part2, CryptonightR_template_double_part3);
|
||||||
|
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||||
|
add_code(&p, CryptonightR_template_double_part3, CryptonightR_template_double_part4);
|
||||||
|
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0));
|
||||||
|
add_code(&p, CryptonightR_template_double_part4, CryptonightR_template_double_end);
|
||||||
|
|
||||||
|
flush_instruction_cache(machine_code, p - p0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void v4_soft_aes_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM)
|
||||||
|
{
|
||||||
|
uint8_t* p0 = machine_code;
|
||||||
|
uint8_t* p = p0;
|
||||||
|
|
||||||
|
add_code(&p, CryptonightR_soft_aes_template_part1, CryptonightR_soft_aes_template_part2);
|
||||||
|
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||||
|
add_code(&p, CryptonightR_soft_aes_template_part2, CryptonightR_soft_aes_template_part3);
|
||||||
|
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightR_soft_aes_template_part1)) - (p - p0));
|
||||||
|
add_code(&p, CryptonightR_soft_aes_template_part3, CryptonightR_soft_aes_template_end);
|
||||||
|
|
||||||
|
flush_instruction_cache(machine_code, p - p0);
|
||||||
|
}
|
279
crypto/asm/CryptonightR_soft_aes_template.inc
Normal file
279
crypto/asm/CryptonightR_soft_aes_template.inc
Normal file
@ -0,0 +1,279 @@
|
|||||||
|
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1)
|
||||||
|
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
||||||
|
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2)
|
||||||
|
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3)
|
||||||
|
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end)
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
FN_PREFIX(CryptonightR_soft_aes_template_part1):
|
||||||
|
mov QWORD PTR [rsp+8], rcx
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 232
|
||||||
|
|
||||||
|
mov eax, [rcx+96]
|
||||||
|
mov ebx, [rcx+100]
|
||||||
|
mov esi, [rcx+104]
|
||||||
|
mov edx, [rcx+108]
|
||||||
|
mov [rsp+144], eax
|
||||||
|
mov [rsp+148], ebx
|
||||||
|
mov [rsp+152], esi
|
||||||
|
mov [rsp+156], edx
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r10, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r9, QWORD PTR [rcx+40]
|
||||||
|
xor r9, QWORD PTR [rcx+8]
|
||||||
|
movq xmm4, rax
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov r11, QWORD PTR [rcx+224]
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r10+72]
|
||||||
|
mov rax, QWORD PTR [r10+80]
|
||||||
|
movq xmm0, rdx
|
||||||
|
xor rax, QWORD PTR [r10+64]
|
||||||
|
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm6
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm8
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm9
|
||||||
|
movaps XMMWORD PTR [rsp+80], xmm10
|
||||||
|
movaps XMMWORD PTR [rsp+96], xmm11
|
||||||
|
movaps XMMWORD PTR [rsp+112], xmm12
|
||||||
|
movaps XMMWORD PTR [rsp+128], xmm13
|
||||||
|
|
||||||
|
movq xmm5, rax
|
||||||
|
|
||||||
|
mov rax, r8
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
and eax, 2097136
|
||||||
|
movq xmm10, QWORD PTR [r10+96]
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov rcx, QWORD PTR [r10+104]
|
||||||
|
xorps xmm9, xmm9
|
||||||
|
mov QWORD PTR [rsp+328], rax
|
||||||
|
movq xmm12, r11
|
||||||
|
mov QWORD PTR [rsp+320], r9
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
movq xmm13, rcx
|
||||||
|
mov r12d, 524288
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
FN_PREFIX(CryptonightR_soft_aes_template_mainloop):
|
||||||
|
movd xmm11, r12d
|
||||||
|
mov r12, QWORD PTR [r10+272]
|
||||||
|
lea r13, QWORD PTR [rax+r11]
|
||||||
|
mov esi, DWORD PTR [r13]
|
||||||
|
movq xmm0, r9
|
||||||
|
mov r10d, DWORD PTR [r13+4]
|
||||||
|
movq xmm7, r8
|
||||||
|
mov ebp, DWORD PTR [r13+12]
|
||||||
|
mov r14d, DWORD PTR [r13+8]
|
||||||
|
mov rdx, QWORD PTR [rsp+328]
|
||||||
|
movzx ecx, sil
|
||||||
|
shr esi, 8
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
mov r15d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
mov edi, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r14b
|
||||||
|
shr r14d, 8
|
||||||
|
mov ebx, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, bpl
|
||||||
|
shr ebp, 8
|
||||||
|
mov r9d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
movzx ecx, r14b
|
||||||
|
shr r14d, 8
|
||||||
|
mov eax, r14d
|
||||||
|
shr eax, 8
|
||||||
|
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
add eax, 256
|
||||||
|
movzx ecx, bpl
|
||||||
|
shr ebp, 8
|
||||||
|
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
movzx ecx, sil
|
||||||
|
shr esi, 8
|
||||||
|
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
add r12, 2048
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
add r10d, 256
|
||||||
|
mov r11d, DWORD PTR [r12+rax*4]
|
||||||
|
xor r11d, DWORD PTR [r12+rcx*4]
|
||||||
|
xor r11d, r9d
|
||||||
|
movzx ecx, sil
|
||||||
|
mov r10d, DWORD PTR [r12+r10*4]
|
||||||
|
shr esi, 8
|
||||||
|
add esi, 256
|
||||||
|
xor r10d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, bpl
|
||||||
|
xor r10d, ebx
|
||||||
|
shr ebp, 8
|
||||||
|
movd xmm1, r11d
|
||||||
|
add ebp, 256
|
||||||
|
movq r11, xmm12
|
||||||
|
mov r9d, DWORD PTR [r12+rcx*4]
|
||||||
|
xor r9d, DWORD PTR [r12+rsi*4]
|
||||||
|
mov eax, DWORD PTR [r12+rbp*4]
|
||||||
|
xor r9d, edi
|
||||||
|
movzx ecx, r14b
|
||||||
|
movd xmm0, r10d
|
||||||
|
movd xmm2, r9d
|
||||||
|
xor eax, DWORD PTR [r12+rcx*4]
|
||||||
|
mov rcx, rdx
|
||||||
|
xor eax, r15d
|
||||||
|
punpckldq xmm2, xmm1
|
||||||
|
xor rcx, 16
|
||||||
|
movd xmm6, eax
|
||||||
|
mov rax, rdx
|
||||||
|
punpckldq xmm6, xmm0
|
||||||
|
xor rax, 32
|
||||||
|
punpckldq xmm6, xmm2
|
||||||
|
xor rdx, 48
|
||||||
|
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||||
|
pxor xmm6, xmm2
|
||||||
|
pxor xmm6, xmm7
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||||
|
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||||
|
pxor xmm6, xmm1
|
||||||
|
pxor xmm6, xmm0
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||||
|
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||||
|
movq rcx, xmm13
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||||
|
movq rdi, xmm6
|
||||||
|
mov r10, rdi
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
pxor xmm0, xmm4
|
||||||
|
movdqu XMMWORD PTR [r13], xmm0
|
||||||
|
|
||||||
|
mov ebx, [rsp+144]
|
||||||
|
mov ebp, [rsp+152]
|
||||||
|
add ebx, [rsp+148]
|
||||||
|
add ebp, [rsp+156]
|
||||||
|
shl rbp, 32
|
||||||
|
or rbx, rbp
|
||||||
|
|
||||||
|
xor rbx, QWORD PTR [r10+r11]
|
||||||
|
lea r14, QWORD PTR [r10+r11]
|
||||||
|
mov rbp, QWORD PTR [r14+8]
|
||||||
|
|
||||||
|
mov [rsp+160], rbx
|
||||||
|
mov [rsp+168], rdi
|
||||||
|
mov [rsp+176], rbp
|
||||||
|
mov [rsp+184], r10
|
||||||
|
mov r10, rsp
|
||||||
|
|
||||||
|
mov ebx, [rsp+144]
|
||||||
|
mov esi, [rsp+148]
|
||||||
|
mov edi, [rsp+152]
|
||||||
|
mov ebp, [rsp+156]
|
||||||
|
|
||||||
|
movd esp, xmm7
|
||||||
|
movaps xmm0, xmm7
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movd r15d, xmm0
|
||||||
|
movd eax, xmm4
|
||||||
|
movd edx, xmm5
|
||||||
|
movaps xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movd r9d, xmm0
|
||||||
|
|
||||||
|
FN_PREFIX(CryptonightR_soft_aes_template_part2):
|
||||||
|
mov rsp, r10
|
||||||
|
mov [rsp+144], ebx
|
||||||
|
mov [rsp+148], esi
|
||||||
|
mov [rsp+152], edi
|
||||||
|
mov [rsp+156], ebp
|
||||||
|
|
||||||
|
mov edi, edi
|
||||||
|
shl rbp, 32
|
||||||
|
or rbp, rdi
|
||||||
|
xor r8, rbp
|
||||||
|
|
||||||
|
mov ebx, ebx
|
||||||
|
shl rsi, 32
|
||||||
|
or rsi, rbx
|
||||||
|
xor QWORD PTR [rsp+320], rsi
|
||||||
|
|
||||||
|
mov rbx, [rsp+160]
|
||||||
|
mov rdi, [rsp+168]
|
||||||
|
mov rbp, [rsp+176]
|
||||||
|
mov r10, [rsp+184]
|
||||||
|
|
||||||
|
mov r9, r10
|
||||||
|
xor r9, 16
|
||||||
|
mov rcx, r10
|
||||||
|
xor rcx, 32
|
||||||
|
xor r10, 48
|
||||||
|
mov rax, rbx
|
||||||
|
mul rdi
|
||||||
|
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||||
|
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||||
|
pxor xmm6, xmm2
|
||||||
|
pxor xmm6, xmm1
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
add r8, rdx
|
||||||
|
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||||
|
pxor xmm6, xmm0
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||||
|
movdqa xmm5, xmm4
|
||||||
|
mov r9, QWORD PTR [rsp+320]
|
||||||
|
movdqa xmm4, xmm6
|
||||||
|
add r9, rax
|
||||||
|
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||||
|
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||||
|
mov r10, QWORD PTR [rsp+304]
|
||||||
|
movd r12d, xmm11
|
||||||
|
mov QWORD PTR [r14], r8
|
||||||
|
xor r8, rbx
|
||||||
|
mov rax, r8
|
||||||
|
mov QWORD PTR [r14+8], r9
|
||||||
|
and eax, 2097136
|
||||||
|
xor r9, rbp
|
||||||
|
mov QWORD PTR [rsp+320], r9
|
||||||
|
mov QWORD PTR [rsp+328], rax
|
||||||
|
sub r12d, 1
|
||||||
|
jne FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
||||||
|
|
||||||
|
FN_PREFIX(CryptonightR_soft_aes_template_part3):
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||||
|
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||||
|
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||||
|
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||||
|
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||||
|
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||||
|
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||||
|
|
||||||
|
add rsp, 232
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
ret
|
||||||
|
FN_PREFIX(CryptonightR_soft_aes_template_end):
|
1593
crypto/asm/CryptonightR_template.S
Normal file
1593
crypto/asm/CryptonightR_template.S
Normal file
File diff suppressed because it is too large
Load Diff
1060
crypto/asm/CryptonightR_template.h
Normal file
1060
crypto/asm/CryptonightR_template.h
Normal file
File diff suppressed because it is too large
Load Diff
531
crypto/asm/CryptonightR_template.inc
Normal file
531
crypto/asm/CryptonightR_template.inc
Normal file
File diff suppressed because it is too large
Load Diff
@ -94,7 +94,7 @@
|
|||||||
lea r9, QWORD PTR [rdx+r13]
|
lea r9, QWORD PTR [rdx+r13]
|
||||||
movdqu xmm15, XMMWORD PTR [r9]
|
movdqu xmm15, XMMWORD PTR [r9]
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN(64)
|
||||||
main_loop_double_sandybridge:
|
main_loop_double_sandybridge:
|
||||||
movdqu xmm9, xmm15
|
movdqu xmm9, xmm15
|
||||||
mov eax, edx
|
mov eax, edx
|
180
crypto/asm/cn2/cnv2_main_loop_bulldozer.inc
Normal file
180
crypto/asm/cn2/cnv2_main_loop_bulldozer.inc
Normal file
@ -0,0 +1,180 @@
|
|||||||
|
mov QWORD PTR [rsp+16], rbx
|
||||||
|
mov QWORD PTR [rsp+24], rbp
|
||||||
|
mov QWORD PTR [rsp+32], rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 64
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov ebp, 524288
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movq xmm3, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movq xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
mov rdi, QWORD PTR [r9+104]
|
||||||
|
and r10d, 2097136
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm6
|
||||||
|
movq xmm4, rax
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm8
|
||||||
|
xorps xmm8, xmm8
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm7, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_main_loop_bulldozer:
|
||||||
|
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||||
|
movq xmm6, r8
|
||||||
|
pinsrq xmm6, r11, 1
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
lea r9, QWORD PTR [rdi+rdi]
|
||||||
|
shl rdi, 32
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
aesenc xmm5, xmm6
|
||||||
|
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||||
|
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
paddq xmm0, xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movaps xmm1, xmm8
|
||||||
|
mov rsi, r15
|
||||||
|
xor rsi, rdi
|
||||||
|
|
||||||
|
mov edi, 1023
|
||||||
|
shl rdi, 52
|
||||||
|
|
||||||
|
movq r14, xmm5
|
||||||
|
pextrq rax, xmm5, 1
|
||||||
|
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
mov r10, r14
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rsi, QWORD PTR [r10+rbx]
|
||||||
|
lea r12, QWORD PTR [r10+rbx]
|
||||||
|
mov r13, QWORD PTR [r10+rbx+8]
|
||||||
|
|
||||||
|
add r9d, r14d
|
||||||
|
or r9d, -2147483647
|
||||||
|
xor edx, edx
|
||||||
|
div r9
|
||||||
|
mov eax, eax
|
||||||
|
shl rdx, 32
|
||||||
|
lea r15, [rax+rdx]
|
||||||
|
lea rax, [r14+r15]
|
||||||
|
shr rax, 12
|
||||||
|
add rax, rdi
|
||||||
|
movq xmm0, rax
|
||||||
|
sqrtsd xmm1, xmm0
|
||||||
|
movq rdi, xmm1
|
||||||
|
test rdi, 524287
|
||||||
|
je sqrt_fixup_bulldozer
|
||||||
|
shr rdi, 19
|
||||||
|
|
||||||
|
sqrt_fixup_bulldozer_ret:
|
||||||
|
mov rax, rsi
|
||||||
|
mul r14
|
||||||
|
movq xmm1, rax
|
||||||
|
movq xmm0, rdx
|
||||||
|
punpcklqdq xmm0, xmm1
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
mov ecx, r10d
|
||||||
|
xor r9d, 16
|
||||||
|
xor ecx, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
pxor xmm2, xmm0
|
||||||
|
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movdqa xmm4, xmm3
|
||||||
|
add r8, rdx
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r12], r8
|
||||||
|
xor r8, rsi
|
||||||
|
mov QWORD PTR [r12+8], r11
|
||||||
|
mov r10, r8
|
||||||
|
xor r11, r13
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa xmm3, xmm5
|
||||||
|
dec ebp
|
||||||
|
jne cnv2_main_loop_bulldozer
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||||
|
lea r11, QWORD PTR [rsp+64]
|
||||||
|
mov rbx, QWORD PTR [r11+56]
|
||||||
|
mov rbp, QWORD PTR [r11+64]
|
||||||
|
mov rsi, QWORD PTR [r11+72]
|
||||||
|
movaps xmm8, XMMWORD PTR [r11-48]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||||
|
mov rsp, r11
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
jmp cnv2_main_loop_bulldozer_endp
|
||||||
|
|
||||||
|
sqrt_fixup_bulldozer:
|
||||||
|
movq r9, xmm5
|
||||||
|
add r9, r15
|
||||||
|
dec rdi
|
||||||
|
mov edx, -1022
|
||||||
|
shl rdx, 32
|
||||||
|
mov rax, rdi
|
||||||
|
shr rdi, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdi
|
||||||
|
sub rcx, rax
|
||||||
|
lea rcx, [rcx+rdx+1]
|
||||||
|
add rax, rdx
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdi, 0
|
||||||
|
jmp sqrt_fixup_bulldozer_ret
|
||||||
|
|
||||||
|
cnv2_main_loop_bulldozer_endp:
|
@ -50,7 +50,7 @@
|
|||||||
punpcklqdq xmm5, xmm0
|
punpcklqdq xmm5, xmm0
|
||||||
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN(64)
|
||||||
main_loop_ivybridge:
|
main_loop_ivybridge:
|
||||||
lea rdx, QWORD PTR [r10+rbx]
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
mov ecx, r10d
|
mov ecx, r10d
|
@ -45,7 +45,7 @@
|
|||||||
movq xmm0, rcx
|
movq xmm0, rcx
|
||||||
punpcklqdq xmm4, xmm0
|
punpcklqdq xmm4, xmm0
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN(64)
|
||||||
main_loop_ryzen:
|
main_loop_ryzen:
|
||||||
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||||
movq xmm0, r11
|
movq xmm0, r11
|
@ -1,4 +1,8 @@
|
|||||||
#define ALIGN .align
|
#ifdef __APPLE__
|
||||||
|
# define ALIGN(x) .align 6
|
||||||
|
#else
|
||||||
|
# define ALIGN(x) .align 64
|
||||||
|
#endif
|
||||||
.intel_syntax noprefix
|
.intel_syntax noprefix
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
# define FN_PREFIX(fn) _ ## fn
|
# define FN_PREFIX(fn) _ ## fn
|
||||||
@ -9,29 +13,42 @@
|
|||||||
#endif
|
#endif
|
||||||
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
||||||
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||||
|
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
||||||
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN(64)
|
||||||
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
||||||
sub rsp, 48
|
sub rsp, 48
|
||||||
mov rcx, rdi
|
mov rcx, rdi
|
||||||
#include "cnv2_main_loop_ivybridge.inc"
|
#include "cn2/cnv2_main_loop_ivybridge.inc"
|
||||||
add rsp, 48
|
add rsp, 48
|
||||||
ret 0
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN(64)
|
||||||
FN_PREFIX(cnv2_mainloop_ryzen_asm):
|
FN_PREFIX(cnv2_mainloop_ryzen_asm):
|
||||||
sub rsp, 48
|
sub rsp, 48
|
||||||
mov rcx, rdi
|
mov rcx, rdi
|
||||||
#include "cnv2_main_loop_ryzen.inc"
|
#include "cn2/cnv2_main_loop_ryzen.inc"
|
||||||
add rsp, 48
|
add rsp, 48
|
||||||
ret 0
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
||||||
ALIGN 16
|
ALIGN(64)
|
||||||
|
FN_PREFIX(cnv2_mainloop_bulldozer_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cn2/cnv2_main_loop_bulldozer.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
||||||
sub rsp, 48
|
sub rsp, 48
|
||||||
mov rcx, rdi
|
mov rcx, rdi
|
||||||
mov rdx, rsi
|
mov rdx, rsi
|
||||||
#include "cnv2_double_main_loop_sandybridge.inc"
|
#include "cn2/cnv2_double_main_loop_sandybridge.inc"
|
||||||
add rsp, 48
|
add rsp, 48
|
||||||
ret 0
|
ret 0
|
||||||
|
mov eax, 3735929054
|
@ -1,25 +0,0 @@
|
|||||||
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
|
|
||||||
PUBLIC cnv2_mainloop_ivybridge_asm
|
|
||||||
PUBLIC cnv2_mainloop_ryzen_asm
|
|
||||||
PUBLIC cnv2_double_mainloop_sandybridge_asm
|
|
||||||
|
|
||||||
ALIGN 64
|
|
||||||
cnv2_mainloop_ivybridge_asm PROC
|
|
||||||
INCLUDE cnv2_main_loop_ivybridge.inc
|
|
||||||
ret 0
|
|
||||||
cnv2_mainloop_ivybridge_asm ENDP
|
|
||||||
|
|
||||||
ALIGN 64
|
|
||||||
cnv2_mainloop_ryzen_asm PROC
|
|
||||||
INCLUDE cnv2_main_loop_ryzen.inc
|
|
||||||
ret 0
|
|
||||||
cnv2_mainloop_ryzen_asm ENDP
|
|
||||||
|
|
||||||
ALIGN 64
|
|
||||||
cnv2_double_mainloop_sandybridge_asm PROC
|
|
||||||
INCLUDE cnv2_double_main_loop_sandybridge.inc
|
|
||||||
ret 0
|
|
||||||
cnv2_double_mainloop_sandybridge_asm ENDP
|
|
||||||
|
|
||||||
_TEXT_CNV2_MAINLOOP ENDS
|
|
||||||
END
|
|
31
crypto/asm/win64/cn_main_loop.S
Normal file
31
crypto/asm/win64/cn_main_loop.S
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#define ALIGN(x) .align 64
|
||||||
|
.intel_syntax noprefix
|
||||||
|
.section .text
|
||||||
|
.global cnv2_mainloop_ivybridge_asm
|
||||||
|
.global cnv2_mainloop_ryzen_asm
|
||||||
|
.global cnv2_mainloop_bulldozer_asm
|
||||||
|
.global cnv2_double_mainloop_sandybridge_asm
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_mainloop_ivybridge_asm:
|
||||||
|
#include "../cn2/cnv2_main_loop_ivybridge.inc"
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_mainloop_ryzen_asm:
|
||||||
|
#include "../cn2/cnv2_main_loop_ryzen.inc"
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_mainloop_bulldozer_asm:
|
||||||
|
#include "../cn2/cnv2_main_loop_bulldozer.inc"
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
||||||
|
|
||||||
|
ALIGN(64)
|
||||||
|
cnv2_double_mainloop_sandybridge_asm:
|
||||||
|
#include "../cn2/cnv2_double_main_loop_sandybridge.inc"
|
||||||
|
ret 0
|
||||||
|
mov eax, 3735929054
|
@ -1,21 +0,0 @@
|
|||||||
#define ALIGN .align
|
|
||||||
.intel_syntax noprefix
|
|
||||||
.section .text
|
|
||||||
.global cnv2_mainloop_ivybridge_asm
|
|
||||||
.global cnv2_mainloop_ryzen_asm
|
|
||||||
.global cnv2_double_mainloop_sandybridge_asm
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
cnv2_mainloop_ivybridge_asm:
|
|
||||||
#include "../cnv2_main_loop_ivybridge.inc"
|
|
||||||
ret 0
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
cnv2_mainloop_ryzen_asm:
|
|
||||||
#include "../cnv2_main_loop_ryzen.inc"
|
|
||||||
ret 0
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
cnv2_double_mainloop_sandybridge_asm:
|
|
||||||
#include "../cnv2_double_main_loop_sandybridge.inc"
|
|
||||||
ret 0
|
|
@ -1,212 +0,0 @@
|
|||||||
/*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
* Additional permission under GNU GPL version 3 section 7
|
|
||||||
*
|
|
||||||
* If you modify this Program, or any covered work, by linking or combining
|
|
||||||
* it with OpenSSL (or a modified version of that library), containing parts
|
|
||||||
* covered by the terms of OpenSSL License and SSLeay License, the licensors
|
|
||||||
* of this Program grant you additional permission to convey the resulting work.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The orginal author of this AES implementation is Karl Malbrain.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifdef __GNUC__
|
|
||||||
#include <x86intrin.h>
|
|
||||||
#else
|
|
||||||
#include <intrin.h>
|
|
||||||
#endif // __GNUC__
|
|
||||||
|
|
||||||
#include <inttypes.h>
|
|
||||||
|
|
||||||
#define TABLE_ALIGN 32
|
|
||||||
#define WPOLY 0x011b
|
|
||||||
#define N_COLS 4
|
|
||||||
#define AES_BLOCK_SIZE 16
|
|
||||||
#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2))
|
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
#define ALIGN __declspec(align(TABLE_ALIGN))
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
#define ALIGN __attribute__ ((aligned(16)))
|
|
||||||
#else
|
|
||||||
#define ALIGN
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define rf1(r,c) (r)
|
|
||||||
#define word_in(x,c) (*((uint32_t*)(x)+(c)))
|
|
||||||
#define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v))
|
|
||||||
|
|
||||||
#define s(x,c) x[c]
|
|
||||||
#define si(y,x,c) (s(y,c) = word_in(x, c))
|
|
||||||
#define so(y,x,c) word_out(y, c, s(x,c))
|
|
||||||
#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)
|
|
||||||
#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
|
|
||||||
#define round(y,x,k) \
|
|
||||||
y[0] = (k)[0] ^ (t_fn[0][x[0] & 0xff] ^ t_fn[1][(x[1] >> 8) & 0xff] ^ t_fn[2][(x[2] >> 16) & 0xff] ^ t_fn[3][x[3] >> 24]); \
|
|
||||||
y[1] = (k)[1] ^ (t_fn[0][x[1] & 0xff] ^ t_fn[1][(x[2] >> 8) & 0xff] ^ t_fn[2][(x[3] >> 16) & 0xff] ^ t_fn[3][x[0] >> 24]); \
|
|
||||||
y[2] = (k)[2] ^ (t_fn[0][x[2] & 0xff] ^ t_fn[1][(x[3] >> 8) & 0xff] ^ t_fn[2][(x[0] >> 16) & 0xff] ^ t_fn[3][x[1] >> 24]); \
|
|
||||||
y[3] = (k)[3] ^ (t_fn[0][x[3] & 0xff] ^ t_fn[1][(x[0] >> 8) & 0xff] ^ t_fn[2][(x[1] >> 16) & 0xff] ^ t_fn[3][x[2] >> 24]);
|
|
||||||
#define to_byte(x) ((x) & 0xff)
|
|
||||||
#define bval(x,n) to_byte((x) >> (8 * (n)))
|
|
||||||
|
|
||||||
#define fwd_var(x,r,c)\
|
|
||||||
( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
|
|
||||||
: r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
|
|
||||||
: r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
|
|
||||||
: ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
|
|
||||||
|
|
||||||
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
|
|
||||||
|
|
||||||
#define sb_data(w) {\
|
|
||||||
w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
|
|
||||||
w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
|
|
||||||
w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
|
|
||||||
w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
|
|
||||||
w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
|
|
||||||
w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
|
|
||||||
w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
|
|
||||||
w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
|
|
||||||
w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
|
|
||||||
w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
|
|
||||||
w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
|
|
||||||
w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
|
|
||||||
w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
|
|
||||||
w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
|
|
||||||
w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
|
|
||||||
w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
|
|
||||||
w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
|
|
||||||
w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
|
|
||||||
w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
|
|
||||||
w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
|
|
||||||
w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
|
|
||||||
w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
|
|
||||||
w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
|
|
||||||
w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
|
|
||||||
w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
|
|
||||||
w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
|
|
||||||
w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
|
|
||||||
w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
|
|
||||||
w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
|
|
||||||
w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
|
|
||||||
w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
|
|
||||||
w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
|
|
||||||
|
|
||||||
#define rc_data(w) {\
|
|
||||||
w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
|
|
||||||
w(0x1b), w(0x36) }
|
|
||||||
|
|
||||||
#define bytes2word(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
|
|
||||||
((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
|
|
||||||
|
|
||||||
#define h0(x) (x)
|
|
||||||
#define w0(p) bytes2word(p, 0, 0, 0)
|
|
||||||
#define w1(p) bytes2word(0, p, 0, 0)
|
|
||||||
#define w2(p) bytes2word(0, 0, p, 0)
|
|
||||||
#define w3(p) bytes2word(0, 0, 0, p)
|
|
||||||
|
|
||||||
#define u0(p) bytes2word(f2(p), p, p, f3(p))
|
|
||||||
#define u1(p) bytes2word(f3(p), f2(p), p, p)
|
|
||||||
#define u2(p) bytes2word(p, f3(p), f2(p), p)
|
|
||||||
#define u3(p) bytes2word(p, p, f3(p), f2(p))
|
|
||||||
|
|
||||||
#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p))
|
|
||||||
#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p))
|
|
||||||
#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p))
|
|
||||||
#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p))
|
|
||||||
|
|
||||||
#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY))
|
|
||||||
#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
|
|
||||||
#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) ^ (((x>>5) & 4) * WPOLY))
|
|
||||||
#define f3(x) (f2(x) ^ x)
|
|
||||||
#define f9(x) (f8(x) ^ x)
|
|
||||||
#define fb(x) (f8(x) ^ f2(x) ^ x)
|
|
||||||
#define fd(x) (f8(x) ^ f4(x) ^ x)
|
|
||||||
#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
|
|
||||||
|
|
||||||
#define t_dec(m,n) t_##m##n
|
|
||||||
#define t_set(m,n) t_##m##n
|
|
||||||
#define t_use(m,n) t_##m##n
|
|
||||||
|
|
||||||
#define d_4(t,n,b,e,f,g,h) ALIGN const t n[4][256] = { b(e), b(f), b(g), b(h) }
|
|
||||||
|
|
||||||
#define four_tables(x,tab,vf,rf,c) \
|
|
||||||
(tab[0][bval(vf(x,0,c),rf(0,c))] \
|
|
||||||
^ tab[1][bval(vf(x,1,c),rf(1,c))] \
|
|
||||||
^ tab[2][bval(vf(x,2,c),rf(2,c))] \
|
|
||||||
^ tab[3][bval(vf(x,3,c),rf(3,c))])
|
|
||||||
|
|
||||||
d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3);
|
|
||||||
|
|
||||||
__m128i soft_aesenc(__m128i in, __m128i key)
|
|
||||||
{
|
|
||||||
uint32_t x0, x1, x2, x3;
|
|
||||||
x0 = _mm_cvtsi128_si32(in);
|
|
||||||
x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55));
|
|
||||||
x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA));
|
|
||||||
x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF));
|
|
||||||
|
|
||||||
__m128i out = _mm_set_epi32(
|
|
||||||
(t_fn[0][x3 & 0xff] ^ t_fn[1][(x0 >> 8) & 0xff] ^ t_fn[2][(x1 >> 16) & 0xff] ^ t_fn[3][x2 >> 24]),
|
|
||||||
(t_fn[0][x2 & 0xff] ^ t_fn[1][(x3 >> 8) & 0xff] ^ t_fn[2][(x0 >> 16) & 0xff] ^ t_fn[3][x1 >> 24]),
|
|
||||||
(t_fn[0][x1 & 0xff] ^ t_fn[1][(x2 >> 8) & 0xff] ^ t_fn[2][(x3 >> 16) & 0xff] ^ t_fn[3][x0 >> 24]),
|
|
||||||
(t_fn[0][x0 & 0xff] ^ t_fn[1][(x1 >> 8) & 0xff] ^ t_fn[2][(x2 >> 16) & 0xff] ^ t_fn[3][x3 >> 24]));
|
|
||||||
|
|
||||||
return _mm_xor_si128(out, key);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint8_t Sbox[256] = { // forward s-box
|
|
||||||
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
|
|
||||||
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
|
|
||||||
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
|
|
||||||
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
|
|
||||||
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
|
|
||||||
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
|
|
||||||
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
|
|
||||||
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
|
|
||||||
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
|
|
||||||
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
|
|
||||||
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
|
|
||||||
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
|
|
||||||
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
|
|
||||||
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
|
|
||||||
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
|
|
||||||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
|
|
||||||
|
|
||||||
static inline void sub_word(uint8_t* key)
|
|
||||||
{
|
|
||||||
key[0] = Sbox[key[0]];
|
|
||||||
key[1] = Sbox[key[1]];
|
|
||||||
key[2] = Sbox[key[2]];
|
|
||||||
key[3] = Sbox[key[3]];
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __clang__
|
|
||||||
uint32_t _rotr(uint32_t value, uint32_t amount)
|
|
||||||
{
|
|
||||||
return (value >> amount) | (value << ((32 - amount) & 31));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
__m128i soft_aeskeygenassist(__m128i key, uint8_t rcon)
|
|
||||||
{
|
|
||||||
uint32_t X1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55));
|
|
||||||
uint32_t X3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF));
|
|
||||||
sub_word((uint8_t*)&X1);
|
|
||||||
sub_word((uint8_t*)&X3);
|
|
||||||
return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3,_rotr(X1, 8) ^ rcon, X1);
|
|
||||||
}
|
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user