From ef2e8bed6e4d0c83b2ede77e8b1db1a6f7932316 Mon Sep 17 00:00:00 2001 From: XMRig Date: Mon, 4 Mar 2019 13:31:25 +0700 Subject: [PATCH] Use new style method to call ASM functions for cn/2 & added bulldozer ASM code. --- algo/cryptonight/cryptonight.c | 60 +++--- algo/cryptonight/cryptonight_av1.c | 14 ++ algo/cryptonight/cryptonight_r_av1.c | 49 ----- cmake/asm.cmake | 24 +-- cpu.c | 23 +-- cpu.h | 5 +- .../cnv2_double_main_loop_sandybridge.inc | 2 +- crypto/asm/cn2/cnv2_main_loop_bulldozer.inc | 180 ++++++++++++++++++ .../{ => cn2}/cnv2_main_loop_ivybridge.inc | 2 +- crypto/asm/{ => cn2}/cnv2_main_loop_ryzen.inc | 2 +- .../asm/{cnv2_main_loop.S => cn_main_loop.S} | 31 ++- .../{cnv2_main_loop.asm => cn_main_loop.asm} | 25 ++- crypto/asm/win64/cn_main_loop.S | 31 +++ crypto/asm/win64/cnv2_main_loop.S | 21 -- options.c | 5 +- options.h | 1 + 16 files changed, 325 insertions(+), 150 deletions(-) rename crypto/asm/{ => cn2}/cnv2_double_main_loop_sandybridge.inc (99%) create mode 100644 crypto/asm/cn2/cnv2_main_loop_bulldozer.inc rename crypto/asm/{ => cn2}/cnv2_main_loop_ivybridge.inc (99%) rename crypto/asm/{ => cn2}/cnv2_main_loop_ryzen.inc (99%) rename crypto/asm/{cnv2_main_loop.S => cn_main_loop.S} (51%) rename crypto/asm/{cnv2_main_loop.asm => cn_main_loop.asm} (50%) create mode 100644 crypto/asm/win64/cn_main_loop.S delete mode 100644 crypto/asm/win64/cnv2_main_loop.S diff --git a/algo/cryptonight/cryptonight.c b/algo/cryptonight/cryptonight.c index 1b67f7dd..420e9153 100644 --- a/algo/cryptonight/cryptonight.c +++ b/algo/cryptonight/cryptonight.c @@ -44,6 +44,9 @@ #include "options.h" +static cn_hash_fun asm_func_map[AV_MAX][VARIANT_MAX][ASM_MAX] = {}; + + void cryptonight_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); void cryptonight_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); void cryptonight_av1_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); @@ -78,6 +81,7 @@ void cryptonight_lite_av4_v1(const uint8_t *input, size_t size, uint8_t *output, #ifndef XMRIG_NO_ASM void cryptonight_single_hash_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); void cryptonight_single_hash_asm_ryzen(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); +void cryptonight_single_hash_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); void cryptonight_double_hash_asm(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx); #endif @@ -171,34 +175,20 @@ static bool self_test() { } -size_t fn_index(enum Algo algorithm, enum AlgoVariant av, enum Variant variant, enum Assembly assembly) +#ifndef XMRIG_NO_ASM +cn_hash_fun cryptonight_hash_asm_fn(enum AlgoVariant av, enum Variant variant, enum Assembly assembly) { - const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1; - -# ifndef XMRIG_NO_ASM if (assembly == ASM_AUTO) { - assembly = cpu_info.assembly; + assembly = (enum Assembly) cpu_info.assembly; } if (assembly == ASM_NONE) { - return index; + return NULL; } - const size_t offset = VARIANT_MAX * 4 * 2; - - if (algorithm == ALGO_CRYPTONIGHT && variant == VARIANT_2) { - if (av == AV_SINGLE) { - return offset + assembly - 2; - } - - if (av == AV_DOUBLE) { - return offset + 2; - } - } -# endif - - return index; + return asm_func_map[av][variant][assembly]; } +#endif cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant) @@ -207,10 +197,15 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V assert(variant > VARIANT_AUTO && variant < VARIANT_MAX); # ifndef XMRIG_NO_ASM - static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2 + 3] = { -# else - static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = { + if (algorithm == ALGO_CRYPTONIGHT) { + cn_hash_fun fun = cryptonight_hash_asm_fn(av, variant, opt_assembly); + if (fun) { + return fun; + } + } # endif + + static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = { cryptonight_av1_v0, cryptonight_av2_v0, cryptonight_av3_v0, @@ -263,16 +258,11 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V NULL, NULL, NULL, -# endif -# ifndef XMRIG_NO_ASM - cryptonight_single_hash_asm_intel, - cryptonight_single_hash_asm_ryzen, - cryptonight_double_hash_asm # endif }; # ifndef NDEBUG - const size_t index = fn_index(algorithm, av, variant, opt_assembly); + const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1; cn_hash_fun func = func_table[index]; @@ -281,7 +271,7 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V return func; # else - return func_table[fn_index(algorithm, av, variant, opt_assembly)]; + return func_table[VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1]; # endif } @@ -290,6 +280,16 @@ bool cryptonight_init(int av) { opt_double_hash = av == AV_DOUBLE || av == AV_DOUBLE_SOFT; +# ifndef XMRIG_NO_ASM + asm_func_map[AV_SINGLE][VARIANT_2][ASM_INTEL] = cryptonight_single_hash_asm_intel; + asm_func_map[AV_SINGLE][VARIANT_2][ASM_RYZEN] = cryptonight_single_hash_asm_intel; + asm_func_map[AV_SINGLE][VARIANT_2][ASM_BULLDOZER] = cryptonight_single_hash_asm_bulldozer; + + asm_func_map[AV_DOUBLE][VARIANT_2][ASM_INTEL] = cryptonight_double_hash_asm; + asm_func_map[AV_DOUBLE][VARIANT_2][ASM_RYZEN] = cryptonight_double_hash_asm; + asm_func_map[AV_DOUBLE][VARIANT_2][ASM_BULLDOZER] = cryptonight_double_hash_asm; +# endif + return self_test(); } diff --git a/algo/cryptonight/cryptonight_av1.c b/algo/cryptonight/cryptonight_av1.c index c71635ea..e4a0662d 100644 --- a/algo/cryptonight/cryptonight_av1.c +++ b/algo/cryptonight/cryptonight_av1.c @@ -196,6 +196,7 @@ void cryptonight_av1_v2(const uint8_t *restrict input, size_t size, uint8_t *res #ifndef XMRIG_NO_ASM extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx); extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx); +extern void cnv2_mainloop_bulldozer_asm(struct cryptonight_ctx *ctx); extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1); @@ -225,6 +226,19 @@ void cryptonight_single_hash_asm_ryzen(const uint8_t *restrict input, size_t siz } +void cryptonight_single_hash_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) +{ + keccak(input, size, ctx[0]->state, 200); + cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); + + cnv2_mainloop_bulldozer_asm(ctx[0]); + + cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); + keccakf((uint64_t*) ctx[0]->state, 24); + extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); +} + + void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) { keccak(input, size, ctx[0]->state, 200); diff --git a/algo/cryptonight/cryptonight_r_av1.c b/algo/cryptonight/cryptonight_r_av1.c index af9b7f2e..09e858d1 100644 --- a/algo/cryptonight/cryptonight_r_av1.c +++ b/algo/cryptonight/cryptonight_r_av1.c @@ -97,54 +97,5 @@ void cryptonight_r_av1(const uint8_t *restrict input, size_t size, uint8_t *rest #ifndef XMRIG_NO_ASM -extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx); -extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx); -extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1); - -void cryptonight_single_hash_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) -{ - keccak(input, size, ctx[0]->state, 200); - cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); - - cnv2_mainloop_ivybridge_asm(ctx[0]); - - cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); - keccakf((uint64_t*) ctx[0]->state, 24); - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); -} - - -void cryptonight_single_hash_asm_ryzen(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) -{ - keccak(input, size, ctx[0]->state, 200); - cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); - - cnv2_mainloop_ryzen_asm(ctx[0]); - - cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); - keccakf((uint64_t*) ctx[0]->state, 24); - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); -} - - -void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx) -{ - keccak(input, size, ctx[0]->state, 200); - keccak(input + size, size, ctx[1]->state, 200); - - cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); - cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory); - - cnv2_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); - - cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); - cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state); - - keccakf((uint64_t*) ctx[0]->state, 24); - keccakf((uint64_t*) ctx[1]->state, 24); - - extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output); - extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32); -} #endif diff --git a/cmake/asm.cmake b/cmake/asm.cmake index 4420342c..2fa14b39 100644 --- a/cmake/asm.cmake +++ b/cmake/asm.cmake @@ -1,28 +1,16 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8) set(XMRIG_ASM_LIBRARY "xmrig-asm") - if (CMAKE_C_COMPILER_ID MATCHES MSVC) - enable_language(ASM_MASM) + enable_language(ASM) - if (MSVC_TOOLSET_VERSION GREATER_EQUAL 141) - set(XMRIG_ASM_FILE "crypto/asm/cnv2_main_loop.asm") - else() - set(XMRIG_ASM_FILE "crypto/asm/win64/cnv2_main_loop.asm") - endif() - - set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM) + if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU) + set(XMRIG_ASM_FILE "crypto/asm/win64/cn_main_loop.S") else() - enable_language(ASM) - - if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU) - set(XMRIG_ASM_FILE "crypto/asm/win64/cnv2_main_loop.S") - else() - set(XMRIG_ASM_FILE "crypto/asm/cnv2_main_loop.S") - endif() - - set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C) + set(XMRIG_ASM_FILE "crypto/asm/cn_main_loop.S") endif() + set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C) + add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILE}) set(XMRIG_ASM_SOURCES "") set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C) diff --git a/cpu.c b/cpu.c index 0d28559a..2509a27e 100644 --- a/cpu.c +++ b/cpu.c @@ -4,8 +4,9 @@ * Copyright 2014 Lucas Jones * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee - * Copyright 2016-2017 XMRig - * + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -64,20 +65,20 @@ void cpu_init_common() { if (data.flags[CPU_FEATURE_AES]) { cpu_info.flags |= CPU_FLAG_AES; - -# ifndef XMRIG_NO_ASM - if (data.vendor == VENDOR_AMD) { - cpu_info.assembly = ASM_RYZEN; - } - else if (data.vendor == VENDOR_INTEL) { - cpu_info.assembly = ASM_INTEL; - } -# endif } if (data.flags[CPU_FEATURE_BMI2]) { cpu_info.flags |= CPU_FLAG_BMI2; } + +# ifndef XMRIG_NO_ASM + if (data.vendor == VENDOR_AMD) { + cpu_info.assembly = (data.ext_family >= 23) ? ASM_RYZEN : ASM_BULLDOZER; + } + else if (data.vendor == VENDOR_INTEL) { + cpu_info.assembly = ASM_INTEL; + } +# endif } #endif diff --git a/cpu.h b/cpu.h index e9314bbe..d979c86f 100644 --- a/cpu.h +++ b/cpu.h @@ -4,8 +4,9 @@ * Copyright 2014 Lucas Jones * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee - * Copyright 2016-2017 XMRig - * + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/crypto/asm/cnv2_double_main_loop_sandybridge.inc b/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc similarity index 99% rename from crypto/asm/cnv2_double_main_loop_sandybridge.inc rename to crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc index e8251bc7..aa5101a8 100644 --- a/crypto/asm/cnv2_double_main_loop_sandybridge.inc +++ b/crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc @@ -94,7 +94,7 @@ lea r9, QWORD PTR [rdx+r13] movdqu xmm15, XMMWORD PTR [r9] - ALIGN 16 + ALIGN(64) main_loop_double_sandybridge: movdqu xmm9, xmm15 mov eax, edx diff --git a/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc b/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc new file mode 100644 index 00000000..c764501d --- /dev/null +++ b/crypto/asm/cn2/cnv2_main_loop_bulldozer.inc @@ -0,0 +1,180 @@ + mov QWORD PTR [rsp+16], rbx + mov QWORD PTR [rsp+24], rbp + mov QWORD PTR [rsp+32], rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 64 + + stmxcsr DWORD PTR [rsp] + mov DWORD PTR [rsp+4], 24448 + ldmxcsr DWORD PTR [rsp+4] + + mov rax, QWORD PTR [rcx+48] + mov r9, rcx + xor rax, QWORD PTR [rcx+16] + mov ebp, 524288 + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + mov r11, QWORD PTR [rcx+40] + mov r10, r8 + mov rdx, QWORD PTR [rcx+56] + movq xmm3, rax + xor rdx, QWORD PTR [rcx+24] + xor r11, QWORD PTR [rcx+8] + mov rbx, QWORD PTR [rcx+224] + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + movq xmm0, rdx + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r9+72] + mov rdi, QWORD PTR [r9+104] + and r10d, 2097136 + movaps XMMWORD PTR [rsp+48], xmm6 + movq xmm4, rax + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+16], xmm8 + xorps xmm8, xmm8 + mov ax, 1023 + shl rax, 52 + movq xmm7, rax + mov r15, QWORD PTR [r9+96] + punpcklqdq xmm3, xmm0 + movq xmm0, rcx + punpcklqdq xmm4, xmm0 + + ALIGN(64) +cnv2_main_loop_bulldozer: + movdqa xmm5, XMMWORD PTR [r10+rbx] + movq xmm6, r8 + pinsrq xmm6, r11, 1 + lea rdx, QWORD PTR [r10+rbx] + lea r9, QWORD PTR [rdi+rdi] + shl rdi, 32 + + mov ecx, r10d + mov eax, r10d + xor ecx, 16 + xor eax, 32 + xor r10d, 48 + aesenc xmm5, xmm6 + movdqa xmm2, XMMWORD PTR [rcx+rbx] + movdqa xmm1, XMMWORD PTR [rax+rbx] + movdqa xmm0, XMMWORD PTR [r10+rbx] + paddq xmm2, xmm3 + paddq xmm1, xmm6 + paddq xmm0, xmm4 + movdqa XMMWORD PTR [rcx+rbx], xmm0 + movdqa XMMWORD PTR [rax+rbx], xmm2 + movdqa XMMWORD PTR [r10+rbx], xmm1 + + movaps xmm1, xmm8 + mov rsi, r15 + xor rsi, rdi + + mov edi, 1023 + shl rdi, 52 + + movq r14, xmm5 + pextrq rax, xmm5, 1 + + movdqa xmm0, xmm5 + pxor xmm0, xmm3 + mov r10, r14 + and r10d, 2097136 + movdqa XMMWORD PTR [rdx], xmm0 + xor rsi, QWORD PTR [r10+rbx] + lea r12, QWORD PTR [r10+rbx] + mov r13, QWORD PTR [r10+rbx+8] + + add r9d, r14d + or r9d, -2147483647 + xor edx, edx + div r9 + mov eax, eax + shl rdx, 32 + lea r15, [rax+rdx] + lea rax, [r14+r15] + shr rax, 12 + add rax, rdi + movq xmm0, rax + sqrtsd xmm1, xmm0 + movq rdi, xmm1 + test rdi, 524287 + je sqrt_fixup_bulldozer + shr rdi, 19 + +sqrt_fixup_bulldozer_ret: + mov rax, rsi + mul r14 + movq xmm1, rax + movq xmm0, rdx + punpcklqdq xmm0, xmm1 + + mov r9d, r10d + mov ecx, r10d + xor r9d, 16 + xor ecx, 32 + xor r10d, 48 + movdqa xmm1, XMMWORD PTR [rcx+rbx] + xor rdx, [rcx+rbx] + xor rax, [rcx+rbx+8] + movdqa xmm2, XMMWORD PTR [r9+rbx] + pxor xmm2, xmm0 + paddq xmm4, XMMWORD PTR [r10+rbx] + paddq xmm2, xmm3 + paddq xmm1, xmm6 + movdqa XMMWORD PTR [r9+rbx], xmm4 + movdqa XMMWORD PTR [rcx+rbx], xmm2 + movdqa XMMWORD PTR [r10+rbx], xmm1 + + movdqa xmm4, xmm3 + add r8, rdx + add r11, rax + mov QWORD PTR [r12], r8 + xor r8, rsi + mov QWORD PTR [r12+8], r11 + mov r10, r8 + xor r11, r13 + and r10d, 2097136 + movdqa xmm3, xmm5 + dec ebp + jne cnv2_main_loop_bulldozer + + ldmxcsr DWORD PTR [rsp] + movaps xmm6, XMMWORD PTR [rsp+48] + lea r11, QWORD PTR [rsp+64] + mov rbx, QWORD PTR [r11+56] + mov rbp, QWORD PTR [r11+64] + mov rsi, QWORD PTR [r11+72] + movaps xmm8, XMMWORD PTR [r11-48] + movaps xmm7, XMMWORD PTR [rsp+32] + mov rsp, r11 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + jmp cnv2_main_loop_bulldozer_endp + +sqrt_fixup_bulldozer: + movq r9, xmm5 + add r9, r15 + dec rdi + mov edx, -1022 + shl rdx, 32 + mov rax, rdi + shr rdi, 19 + shr rax, 20 + mov rcx, rdi + sub rcx, rax + lea rcx, [rcx+rdx+1] + add rax, rdx + imul rcx, rax + sub rcx, r9 + adc rdi, 0 + jmp sqrt_fixup_bulldozer_ret + +cnv2_main_loop_bulldozer_endp: diff --git a/crypto/asm/cnv2_main_loop_ivybridge.inc b/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc similarity index 99% rename from crypto/asm/cnv2_main_loop_ivybridge.inc rename to crypto/asm/cn2/cnv2_main_loop_ivybridge.inc index 8c2c2d3b..06f1d28b 100644 --- a/crypto/asm/cnv2_main_loop_ivybridge.inc +++ b/crypto/asm/cn2/cnv2_main_loop_ivybridge.inc @@ -50,7 +50,7 @@ punpcklqdq xmm5, xmm0 movdqu xmm6, XMMWORD PTR [r10+rbx] - ALIGN 16 + ALIGN(64) main_loop_ivybridge: lea rdx, QWORD PTR [r10+rbx] mov ecx, r10d diff --git a/crypto/asm/cnv2_main_loop_ryzen.inc b/crypto/asm/cn2/cnv2_main_loop_ryzen.inc similarity index 99% rename from crypto/asm/cnv2_main_loop_ryzen.inc rename to crypto/asm/cn2/cnv2_main_loop_ryzen.inc index d386aa2d..5dbf5917 100644 --- a/crypto/asm/cnv2_main_loop_ryzen.inc +++ b/crypto/asm/cn2/cnv2_main_loop_ryzen.inc @@ -45,7 +45,7 @@ movq xmm0, rcx punpcklqdq xmm4, xmm0 - ALIGN 16 + ALIGN(64) main_loop_ryzen: movdqa xmm5, XMMWORD PTR [r10+rbx] movq xmm0, r11 diff --git a/crypto/asm/cnv2_main_loop.S b/crypto/asm/cn_main_loop.S similarity index 51% rename from crypto/asm/cnv2_main_loop.S rename to crypto/asm/cn_main_loop.S index 4dbcbbda..a792337f 100644 --- a/crypto/asm/cnv2_main_loop.S +++ b/crypto/asm/cn_main_loop.S @@ -1,4 +1,8 @@ -#define ALIGN .align +#ifdef __APPLE__ +# define ALIGN(x) .align 6 +#else +# define ALIGN(x) .align 64 +#endif .intel_syntax noprefix #ifdef __APPLE__ # define FN_PREFIX(fn) _ ## fn @@ -9,29 +13,42 @@ #endif .global FN_PREFIX(cnv2_mainloop_ivybridge_asm) .global FN_PREFIX(cnv2_mainloop_ryzen_asm) +.global FN_PREFIX(cnv2_mainloop_bulldozer_asm) .global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm) -ALIGN 16 +ALIGN(64) FN_PREFIX(cnv2_mainloop_ivybridge_asm): sub rsp, 48 mov rcx, rdi - #include "cnv2_main_loop_ivybridge.inc" + #include "cn2/cnv2_main_loop_ivybridge.inc" add rsp, 48 ret 0 + mov eax, 3735929054 -ALIGN 16 +ALIGN(64) FN_PREFIX(cnv2_mainloop_ryzen_asm): sub rsp, 48 mov rcx, rdi - #include "cnv2_main_loop_ryzen.inc" + #include "cn2/cnv2_main_loop_ryzen.inc" add rsp, 48 ret 0 + mov eax, 3735929054 -ALIGN 16 +ALIGN(64) +FN_PREFIX(cnv2_mainloop_bulldozer_asm): + sub rsp, 48 + mov rcx, rdi + #include "cn2/cnv2_main_loop_bulldozer.inc" + add rsp, 48 + ret 0 + mov eax, 3735929054 + +ALIGN(64) FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): sub rsp, 48 mov rcx, rdi mov rdx, rsi - #include "cnv2_double_main_loop_sandybridge.inc" + #include "cn2/cnv2_double_main_loop_sandybridge.inc" add rsp, 48 ret 0 + mov eax, 3735929054 diff --git a/crypto/asm/cnv2_main_loop.asm b/crypto/asm/cn_main_loop.asm similarity index 50% rename from crypto/asm/cnv2_main_loop.asm rename to crypto/asm/cn_main_loop.asm index d9522267..f1384be8 100644 --- a/crypto/asm/cnv2_main_loop.asm +++ b/crypto/asm/cn_main_loop.asm @@ -1,24 +1,35 @@ _TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE PUBLIC cnv2_mainloop_ivybridge_asm PUBLIC cnv2_mainloop_ryzen_asm +PUBLIC cnv2_mainloop_bulldozer_asm PUBLIC cnv2_double_mainloop_sandybridge_asm -ALIGN 64 +ALIGN(64) cnv2_mainloop_ivybridge_asm PROC - INCLUDE cnv2_main_loop_ivybridge.inc + INCLUDE cn2/cnv2_main_loop_ivybridge.inc ret 0 + mov eax, 3735929054 cnv2_mainloop_ivybridge_asm ENDP -ALIGN 64 +ALIGN(64) cnv2_mainloop_ryzen_asm PROC - INCLUDE cnv2_main_loop_ryzen.inc + INCLUDE cn2/cnv2_main_loop_ryzen.inc ret 0 + mov eax, 3735929054 cnv2_mainloop_ryzen_asm ENDP -ALIGN 64 -cnv2_double_mainloop_sandybridge_asm PROC - INCLUDE cnv2_double_main_loop_sandybridge.inc +ALIGN(64) +cnv2_mainloop_bulldozer_asm PROC + INCLUDE cn2/cnv2_main_loop_bulldozer.inc ret 0 + mov eax, 3735929054 +cnv2_mainloop_bulldozer_asm ENDP + +ALIGN(64) +cnv2_double_mainloop_sandybridge_asm PROC + INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc + ret 0 + mov eax, 3735929054 cnv2_double_mainloop_sandybridge_asm ENDP _TEXT_CNV2_MAINLOOP ENDS diff --git a/crypto/asm/win64/cn_main_loop.S b/crypto/asm/win64/cn_main_loop.S new file mode 100644 index 00000000..1200c4df --- /dev/null +++ b/crypto/asm/win64/cn_main_loop.S @@ -0,0 +1,31 @@ +#define ALIGN(x) .align 64 +.intel_syntax noprefix +.section .text +.global cnv2_mainloop_ivybridge_asm +.global cnv2_mainloop_ryzen_asm +.global cnv2_mainloop_bulldozer_asm +.global cnv2_double_mainloop_sandybridge_asm + +ALIGN(64) +cnv2_mainloop_ivybridge_asm: + #include "../cn2/cnv2_main_loop_ivybridge.inc" + ret 0 + mov eax, 3735929054 + +ALIGN(64) +cnv2_mainloop_ryzen_asm: + #include "../cn2/cnv2_main_loop_ryzen.inc" + ret 0 + mov eax, 3735929054 + +ALIGN(64) +cnv2_mainloop_bulldozer_asm: + #include "../cn2/cnv2_main_loop_bulldozer.inc" + ret 0 + mov eax, 3735929054 + +ALIGN(64) +cnv2_double_mainloop_sandybridge_asm: + #include "../cn2/cnv2_double_main_loop_sandybridge.inc" + ret 0 + mov eax, 3735929054 diff --git a/crypto/asm/win64/cnv2_main_loop.S b/crypto/asm/win64/cnv2_main_loop.S deleted file mode 100644 index 78eb1185..00000000 --- a/crypto/asm/win64/cnv2_main_loop.S +++ /dev/null @@ -1,21 +0,0 @@ -#define ALIGN .align -.intel_syntax noprefix -.section .text -.global cnv2_mainloop_ivybridge_asm -.global cnv2_mainloop_ryzen_asm -.global cnv2_double_mainloop_sandybridge_asm - -ALIGN 16 -cnv2_mainloop_ivybridge_asm: - #include "../cnv2_main_loop_ivybridge.inc" - ret 0 - -ALIGN 16 -cnv2_mainloop_ryzen_asm: - #include "../cnv2_main_loop_ryzen.inc" - ret 0 - -ALIGN 16 -cnv2_double_mainloop_sandybridge_asm: - #include "../cnv2_double_main_loop_sandybridge.inc" - ret 0 diff --git a/options.c b/options.c index eaa4355a..cfe1bfe0 100644 --- a/options.c +++ b/options.c @@ -127,7 +127,7 @@ static struct option const options[] = { { "cpu-affinity", 1, NULL, 1020 }, { "donate-level", 1, NULL, 1003 }, { "help", 0, NULL, 'h' }, - { "keepalive", 0, NULL ,'k' }, + { "keepalive", 0, NULL, 'k' }, { "max-cpu-usage", 1, NULL, 1004 }, { "nicehash", 0, NULL, 1006 }, { "no-color", 0, NULL, 1002 }, @@ -167,7 +167,8 @@ static const char *asm_names[] = { "none", "auto", "intel", - "ryzen" + "ryzen", + "bulldozer" }; diff --git a/options.h b/options.h index 19e37883..f29a7fc8 100644 --- a/options.h +++ b/options.h @@ -65,6 +65,7 @@ enum Assembly { ASM_AUTO, ASM_INTEL, ASM_RYZEN, + ASM_BULLDOZER, ASM_MAX };