diff --git a/CMakeLists.txt b/CMakeLists.txt
index e7417c76..51b8b167 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,6 +3,7 @@ project(xmrig C)
option(WITH_LIBCPUID "Use Libcpuid" ON)
option(WITH_AEON "CryptoNight-Lite support" ON)
+option(WITH_ASM "Enable ASM PoW implementations" ON)
set(HEADERS
algo/cryptonight/cryptonight.h
@@ -125,6 +126,8 @@ else()
set(SOURCES_CPUID cpu_stub.c)
endif()
+include(cmake/asm.cmake)
+
if (WITH_AEON)
set(SOURCES_AEON
algo/cryptonight-lite/cryptonight_lite_av1.c
@@ -139,10 +142,10 @@ else()
endif()
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
- add_executable(xmrig ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON})
- target_link_libraries(xmrig jansson ${CURL_LIBRARY} ${CPUID_LIB} ${EXTRA_LIBS})
+ add_executable(xmrig ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON} ${XMRIG_ASM_SOURCES})
+ target_link_libraries(xmrig ${XMRIG_ASM_LIBRARY} jansson ${CURL_LIBRARY} ${CPUID_LIB} ${EXTRA_LIBS})
else()
- add_executable(xmrig32 ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON})
- target_link_libraries(xmrig32 jansson ${CURL_LIBRARY} ${CPUID_LIB} ${EXTRA_LIBS})
+ add_executable(xmrig32 ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON} ${XMRIG_ASM_SOURCES})
+ target_link_libraries(xmrig32 ${XMRIG_ASM_LIBRARY} jansson ${CURL_LIBRARY} ${CPUID_LIB} ${EXTRA_LIBS})
endif()
diff --git a/algo/cryptonight/cryptonight.c b/algo/cryptonight/cryptonight.c
index 728e5822..62dbdc50 100644
--- a/algo/cryptonight/cryptonight.c
+++ b/algo/cryptonight/cryptonight.c
@@ -33,6 +33,7 @@
# include "xmrig.h"
#endif
+#include "cpu.h"
#include "crypto/c_blake256.h"
#include "crypto/c_groestl.h"
#include "crypto/c_jh.h"
@@ -68,6 +69,13 @@ void cryptonight_lite_av4_v1(const uint8_t *input, size_t size, uint8_t *output,
#endif
+#ifndef XMRIG_NO_ASM
+void cryptonight_single_hash_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_single_hash_asm_ryzen(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+void cryptonight_double_hash_asm(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
+#endif
+
+
static inline bool verify(enum Variant variant, uint8_t *output, struct cryptonight_ctx **ctx, const uint8_t *referenceValue)
{
cn_hash_fun func = cryptonight_hash_fn(opt_algo, opt_av, variant);
@@ -116,12 +124,46 @@ static bool self_test() {
}
+size_t fn_index(enum Algo algorithm, enum AlgoVariant av, enum Variant variant, enum Assembly assembly)
+{
+ const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1;
+
+# ifndef XMRIG_NO_ASM
+ if (assembly == ASM_AUTO) {
+ assembly = cpu_info.assembly;
+ }
+
+ if (assembly == ASM_NONE) {
+ return index;
+ }
+
+ const size_t offset = VARIANT_MAX * 4 * 2;
+
+ if (algorithm == ALGO_CRYPTONIGHT && variant == VARIANT_2) {
+ if (av == AV_SINGLE) {
+ return offset + assembly - 2;
+ }
+
+ if (av == AV_DOUBLE) {
+ return offset + 2;
+ }
+ }
+# endif
+
+ return index;
+}
+
+
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant)
{
assert(av > AV_AUTO && av < AV_MAX);
assert(variant > VARIANT_AUTO && variant < VARIANT_MAX);
+# ifndef XMRIG_NO_ASM
+ static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2 + 3] = {
+# else
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = {
+# endif
cryptonight_av1_v0,
cryptonight_av2_v0,
cryptonight_av3_v0,
@@ -147,13 +189,31 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
NULL,
NULL,
NULL,
- NULL
+ NULL,
+# else
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+# endif
+# ifndef XMRIG_NO_ASM
+ cryptonight_single_hash_asm_intel,
+ cryptonight_single_hash_asm_ryzen,
+ cryptonight_double_hash_asm
# endif
};
- const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1;
-
# ifndef NDEBUG
+ const size_t index = fn_index(algorithm, av, variant, opt_assembly);
+
cn_hash_fun func = func_table[index];
assert(index < sizeof(func_table) / sizeof(func_table[0]));
@@ -161,7 +221,7 @@ cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum V
return func;
# else
- return func_table[index];
+ return func_table[fn_index(algorithm, av, variant, opt_assembly)];
# endif
}
diff --git a/algo/cryptonight/cryptonight_av1.c b/algo/cryptonight/cryptonight_av1.c
index 9ef83b07..c71635ea 100644
--- a/algo/cryptonight/cryptonight_av1.c
+++ b/algo/cryptonight/cryptonight_av1.c
@@ -191,3 +191,57 @@ void cryptonight_av1_v2(const uint8_t *restrict input, size_t size, uint8_t *res
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
+
+
+#ifndef XMRIG_NO_ASM
+extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx);
+extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx);
+extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1);
+
+
+void cryptonight_single_hash_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+ keccak(input, size, ctx[0]->state, 200);
+ cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
+
+ cnv2_mainloop_ivybridge_asm(ctx[0]);
+
+ cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
+ keccakf((uint64_t*) ctx[0]->state, 24);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+}
+
+
+void cryptonight_single_hash_asm_ryzen(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+ keccak(input, size, ctx[0]->state, 200);
+ cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
+
+ cnv2_mainloop_ryzen_asm(ctx[0]);
+
+ cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
+ keccakf((uint64_t*) ctx[0]->state, 24);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+}
+
+
+void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
+{
+ keccak(input, size, ctx[0]->state, 200);
+ keccak(input + size, size, ctx[1]->state, 200);
+
+ cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
+ cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
+
+ cnv2_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
+
+ cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
+ cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
+
+ keccakf((uint64_t*) ctx[0]->state, 24);
+ keccakf((uint64_t*) ctx[1]->state, 24);
+
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+ extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
+}
+#endif
diff --git a/cmake/asm.cmake b/cmake/asm.cmake
new file mode 100644
index 00000000..4420342c
--- /dev/null
+++ b/cmake/asm.cmake
@@ -0,0 +1,33 @@
+if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+ set(XMRIG_ASM_LIBRARY "xmrig-asm")
+
+ if (CMAKE_C_COMPILER_ID MATCHES MSVC)
+ enable_language(ASM_MASM)
+
+ if (MSVC_TOOLSET_VERSION GREATER_EQUAL 141)
+ set(XMRIG_ASM_FILE "crypto/asm/cnv2_main_loop.asm")
+ else()
+ set(XMRIG_ASM_FILE "crypto/asm/win64/cnv2_main_loop.asm")
+ endif()
+
+ set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM)
+ else()
+ enable_language(ASM)
+
+ if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
+ set(XMRIG_ASM_FILE "crypto/asm/win64/cnv2_main_loop.S")
+ else()
+ set(XMRIG_ASM_FILE "crypto/asm/cnv2_main_loop.S")
+ endif()
+
+ set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C)
+ endif()
+
+ add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILE})
+ set(XMRIG_ASM_SOURCES "")
+ set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
+else()
+ set(XMRIG_ASM_SOURCES "")
+ set(XMRIG_ASM_LIBRARY "")
+ add_definitions(/DXMRIG_NO_ASM)
+endif()
diff --git a/cpu.c b/cpu.c
index 2f6ef8b6..0d28559a 100644
--- a/cpu.c
+++ b/cpu.c
@@ -31,6 +31,7 @@
#endif
#include "cpu.h"
+#include "options.h"
#ifndef BUILD_TEST
@@ -63,6 +64,15 @@ void cpu_init_common() {
if (data.flags[CPU_FEATURE_AES]) {
cpu_info.flags |= CPU_FLAG_AES;
+
+# ifndef XMRIG_NO_ASM
+ if (data.vendor == VENDOR_AMD) {
+ cpu_info.assembly = ASM_RYZEN;
+ }
+ else if (data.vendor == VENDOR_INTEL) {
+ cpu_info.assembly = ASM_INTEL;
+ }
+# endif
}
if (data.flags[CPU_FEATURE_BMI2]) {
diff --git a/cpu.h b/cpu.h
index 419192bf..e9314bbe 100644
--- a/cpu.h
+++ b/cpu.h
@@ -21,8 +21,8 @@
* along with this program. If not, see .
*/
-#ifndef __CPU_H__
-#define __CPU_H__
+#ifndef XMRIG_CPU_H
+#define XMRIG_CPU_H
#include
@@ -34,6 +34,7 @@ struct cpu_info {
int l2_cache;
int l3_cache;
char brand[64];
+ int assembly;
};
extern struct cpu_info cpu_info;
@@ -50,4 +51,4 @@ void cpu_init();
int get_optimal_threads_count(int algo, bool double_hash, int max_cpu_usage);
int affine_to_cpu_mask(int id, unsigned long mask);
-#endif /* __CPU_H__ */
+#endif /* XMRIG_CPU_H */
diff --git a/crypto/asm/cnv2_double_main_loop_sandybridge.inc b/crypto/asm/cnv2_double_main_loop_sandybridge.inc
new file mode 100644
index 00000000..e8251bc7
--- /dev/null
+++ b/crypto/asm/cnv2_double_main_loop_sandybridge.inc
@@ -0,0 +1,410 @@
+ mov rax, rsp
+ push rbx
+ push rbp
+ push rsi
+ push rdi
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp, 184
+
+ stmxcsr DWORD PTR [rsp+272]
+ mov DWORD PTR [rsp+276], 24448
+ ldmxcsr DWORD PTR [rsp+276]
+
+ mov r13, QWORD PTR [rcx+224]
+ mov r9, rdx
+ mov r10, QWORD PTR [rcx+32]
+ mov r8, rcx
+ xor r10, QWORD PTR [rcx]
+ mov r14d, 524288
+ mov r11, QWORD PTR [rcx+40]
+ xor r11, QWORD PTR [rcx+8]
+ mov rsi, QWORD PTR [rdx+224]
+ mov rdx, QWORD PTR [rcx+56]
+ xor rdx, QWORD PTR [rcx+24]
+ mov rdi, QWORD PTR [r9+32]
+ xor rdi, QWORD PTR [r9]
+ mov rbp, QWORD PTR [r9+40]
+ xor rbp, QWORD PTR [r9+8]
+ movq xmm0, rdx
+ movaps XMMWORD PTR [rax-88], xmm6
+ movaps XMMWORD PTR [rax-104], xmm7
+ movaps XMMWORD PTR [rax-120], xmm8
+ movaps XMMWORD PTR [rsp+112], xmm9
+ movaps XMMWORD PTR [rsp+96], xmm10
+ movaps XMMWORD PTR [rsp+80], xmm11
+ movaps XMMWORD PTR [rsp+64], xmm12
+ movaps XMMWORD PTR [rsp+48], xmm13
+ movaps XMMWORD PTR [rsp+32], xmm14
+ movaps XMMWORD PTR [rsp+16], xmm15
+ mov rdx, r10
+ movq xmm4, QWORD PTR [r8+96]
+ and edx, 2097136
+ mov rax, QWORD PTR [rcx+48]
+ xorps xmm13, xmm13
+ xor rax, QWORD PTR [rcx+16]
+ mov rcx, QWORD PTR [rcx+88]
+ xor rcx, QWORD PTR [r8+72]
+ movq xmm5, QWORD PTR [r8+104]
+ movq xmm7, rax
+
+ mov eax, 1
+ shl rax, 52
+ movq xmm14, rax
+ punpcklqdq xmm14, xmm14
+
+ mov eax, 1023
+ shl rax, 52
+ movq xmm12, rax
+ punpcklqdq xmm12, xmm12
+
+ mov rax, QWORD PTR [r8+80]
+ xor rax, QWORD PTR [r8+64]
+ punpcklqdq xmm7, xmm0
+ movq xmm0, rcx
+ mov rcx, QWORD PTR [r9+56]
+ xor rcx, QWORD PTR [r9+24]
+ movq xmm3, rax
+ mov rax, QWORD PTR [r9+48]
+ xor rax, QWORD PTR [r9+16]
+ punpcklqdq xmm3, xmm0
+ movq xmm0, rcx
+ mov QWORD PTR [rsp], r13
+ mov rcx, QWORD PTR [r9+88]
+ xor rcx, QWORD PTR [r9+72]
+ movq xmm6, rax
+ mov rax, QWORD PTR [r9+80]
+ xor rax, QWORD PTR [r9+64]
+ punpcklqdq xmm6, xmm0
+ movq xmm0, rcx
+ mov QWORD PTR [rsp+256], r10
+ mov rcx, rdi
+ mov QWORD PTR [rsp+264], r11
+ movq xmm8, rax
+ and ecx, 2097136
+ punpcklqdq xmm8, xmm0
+ movq xmm0, QWORD PTR [r9+96]
+ punpcklqdq xmm4, xmm0
+ movq xmm0, QWORD PTR [r9+104]
+ lea r8, QWORD PTR [rcx+rsi]
+ movdqu xmm11, XMMWORD PTR [r8]
+ punpcklqdq xmm5, xmm0
+ lea r9, QWORD PTR [rdx+r13]
+ movdqu xmm15, XMMWORD PTR [r9]
+
+ ALIGN 16
+main_loop_double_sandybridge:
+ movdqu xmm9, xmm15
+ mov eax, edx
+ mov ebx, edx
+ xor eax, 16
+ xor ebx, 32
+ xor edx, 48
+
+ movq xmm0, r11
+ movq xmm2, r10
+ punpcklqdq xmm2, xmm0
+ aesenc xmm9, xmm2
+
+ movdqu xmm0, XMMWORD PTR [rax+r13]
+ movdqu xmm1, XMMWORD PTR [rbx+r13]
+ paddq xmm0, xmm7
+ paddq xmm1, xmm2
+ movdqu XMMWORD PTR [rbx+r13], xmm0
+ movdqu xmm0, XMMWORD PTR [rdx+r13]
+ movdqu XMMWORD PTR [rdx+r13], xmm1
+ paddq xmm0, xmm3
+ movdqu XMMWORD PTR [rax+r13], xmm0
+
+ movq r11, xmm9
+ mov edx, r11d
+ and edx, 2097136
+ movdqa xmm0, xmm9
+ pxor xmm0, xmm7
+ movdqu XMMWORD PTR [r9], xmm0
+
+ lea rbx, QWORD PTR [rdx+r13]
+ mov r10, QWORD PTR [rdx+r13]
+
+ movdqu xmm10, xmm11
+ movq xmm0, rbp
+ movq xmm11, rdi
+ punpcklqdq xmm11, xmm0
+ aesenc xmm10, xmm11
+
+ mov eax, ecx
+ mov r12d, ecx
+ xor eax, 16
+ xor r12d, 32
+ xor ecx, 48
+
+ movdqu xmm0, XMMWORD PTR [rax+rsi]
+ paddq xmm0, xmm6
+ movdqu xmm1, XMMWORD PTR [r12+rsi]
+ movdqu XMMWORD PTR [r12+rsi], xmm0
+ paddq xmm1, xmm11
+ movdqu xmm0, XMMWORD PTR [rcx+rsi]
+ movdqu XMMWORD PTR [rcx+rsi], xmm1
+ paddq xmm0, xmm8
+ movdqu XMMWORD PTR [rax+rsi], xmm0
+
+ movq rcx, xmm10
+ and ecx, 2097136
+
+ movdqa xmm0, xmm10
+ pxor xmm0, xmm6
+ movdqu XMMWORD PTR [r8], xmm0
+ mov r12, QWORD PTR [rcx+rsi]
+
+ mov r9, QWORD PTR [rbx+8]
+
+ xor edx, 16
+ mov r8d, edx
+ mov r15d, edx
+
+ movq rdx, xmm5
+ shl rdx, 32
+ movq rax, xmm4
+ xor rdx, rax
+ xor r10, rdx
+ mov rax, r10
+ mul r11
+ mov r11d, r8d
+ xor r11d, 48
+ movq xmm0, rdx
+ xor rdx, [r11+r13]
+ movq xmm1, rax
+ xor rax, [r11+r13+8]
+ punpcklqdq xmm0, xmm1
+
+ pxor xmm0, XMMWORD PTR [r8+r13]
+ xor r8d, 32
+ movdqu xmm1, XMMWORD PTR [r11+r13]
+ paddq xmm0, xmm7
+ paddq xmm1, xmm2
+ movdqu XMMWORD PTR [r11+r13], xmm0
+ movdqu xmm0, XMMWORD PTR [r8+r13]
+ movdqu XMMWORD PTR [r8+r13], xmm1
+ paddq xmm0, xmm3
+ movdqu XMMWORD PTR [r15+r13], xmm0
+
+ mov r11, QWORD PTR [rsp+256]
+ add r11, rdx
+ mov rdx, QWORD PTR [rsp+264]
+ add rdx, rax
+ mov QWORD PTR [rbx], r11
+ xor r11, r10
+ mov QWORD PTR [rbx+8], rdx
+ xor rdx, r9
+ mov QWORD PTR [rsp+256], r11
+ and r11d, 2097136
+ mov QWORD PTR [rsp+264], rdx
+ mov QWORD PTR [rsp+8], r11
+ lea r15, QWORD PTR [r11+r13]
+ movdqu xmm15, XMMWORD PTR [r11+r13]
+ lea r13, QWORD PTR [rsi+rcx]
+ movdqa xmm0, xmm5
+ psrldq xmm0, 8
+ movaps xmm2, xmm13
+ movq r10, xmm0
+ psllq xmm5, 1
+ shl r10, 32
+ movdqa xmm0, xmm9
+ psrldq xmm0, 8
+ movdqa xmm1, xmm10
+ movq r11, xmm0
+ psrldq xmm1, 8
+ movq r8, xmm1
+ psrldq xmm4, 8
+ movaps xmm0, xmm13
+ movq rax, xmm4
+ xor r10, rax
+ movaps xmm1, xmm13
+ xor r10, r12
+ lea rax, QWORD PTR [r11+1]
+ shr rax, 1
+ movdqa xmm3, xmm9
+ punpcklqdq xmm3, xmm10
+ paddq xmm5, xmm3
+ movq rdx, xmm5
+ psrldq xmm5, 8
+ cvtsi2sd xmm2, rax
+ or edx, -2147483647
+ lea rax, QWORD PTR [r8+1]
+ shr rax, 1
+ movq r9, xmm5
+ cvtsi2sd xmm0, rax
+ or r9d, -2147483647
+ cvtsi2sd xmm1, rdx
+ unpcklpd xmm2, xmm0
+ movaps xmm0, xmm13
+ cvtsi2sd xmm0, r9
+ unpcklpd xmm1, xmm0
+ divpd xmm2, xmm1
+ paddq xmm2, xmm14
+ cvttsd2si rax, xmm2
+ psrldq xmm2, 8
+ mov rbx, rax
+ imul rax, rdx
+ sub r11, rax
+ js div_fix_1_sandybridge
+div_fix_1_ret_sandybridge:
+
+ cvttsd2si rdx, xmm2
+ mov rax, rdx
+ imul rax, r9
+ movd xmm2, r11d
+ movd xmm4, ebx
+ sub r8, rax
+ js div_fix_2_sandybridge
+div_fix_2_ret_sandybridge:
+
+ movd xmm1, r8d
+ movd xmm0, edx
+ punpckldq xmm2, xmm1
+ punpckldq xmm4, xmm0
+ punpckldq xmm4, xmm2
+ paddq xmm3, xmm4
+ movdqa xmm0, xmm3
+ psrlq xmm0, 12
+ paddq xmm0, xmm12
+ sqrtpd xmm1, xmm0
+ movq r9, xmm1
+ movdqa xmm5, xmm1
+ psrlq xmm5, 19
+ test r9, 524287
+ je sqrt_fix_1_sandybridge
+sqrt_fix_1_ret_sandybridge:
+
+ movq r9, xmm10
+ psrldq xmm1, 8
+ movq r8, xmm1
+ test r8, 524287
+ je sqrt_fix_2_sandybridge
+sqrt_fix_2_ret_sandybridge:
+
+ mov r12d, ecx
+ mov r8d, ecx
+ xor r12d, 16
+ xor r8d, 32
+ xor ecx, 48
+ mov rax, r10
+ mul r9
+ movq xmm0, rax
+ movq xmm3, rdx
+ punpcklqdq xmm3, xmm0
+
+ movdqu xmm0, XMMWORD PTR [r12+rsi]
+ pxor xmm0, xmm3
+ movdqu xmm1, XMMWORD PTR [r8+rsi]
+ xor rdx, [r8+rsi]
+ xor rax, [r8+rsi+8]
+ movdqu xmm3, XMMWORD PTR [rcx+rsi]
+ paddq xmm0, xmm6
+ paddq xmm1, xmm11
+ paddq xmm3, xmm8
+ movdqu XMMWORD PTR [r8+rsi], xmm0
+ movdqu XMMWORD PTR [rcx+rsi], xmm1
+ movdqu XMMWORD PTR [r12+rsi], xmm3
+
+ add rdi, rdx
+ mov QWORD PTR [r13], rdi
+ xor rdi, r10
+ mov ecx, edi
+ and ecx, 2097136
+ lea r8, QWORD PTR [rcx+rsi]
+
+ mov rdx, QWORD PTR [r13+8]
+ add rbp, rax
+ mov QWORD PTR [r13+8], rbp
+ movdqu xmm11, XMMWORD PTR [rcx+rsi]
+ xor rbp, rdx
+ mov r13, QWORD PTR [rsp]
+ movdqa xmm3, xmm7
+ mov rdx, QWORD PTR [rsp+8]
+ movdqa xmm8, xmm6
+ mov r10, QWORD PTR [rsp+256]
+ movdqa xmm7, xmm9
+ mov r11, QWORD PTR [rsp+264]
+ movdqa xmm6, xmm10
+ mov r9, r15
+ dec r14d
+ jne main_loop_double_sandybridge
+
+ ldmxcsr DWORD PTR [rsp+272]
+ movaps xmm13, XMMWORD PTR [rsp+48]
+ lea r11, QWORD PTR [rsp+184]
+ movaps xmm6, XMMWORD PTR [r11-24]
+ movaps xmm7, XMMWORD PTR [r11-40]
+ movaps xmm8, XMMWORD PTR [r11-56]
+ movaps xmm9, XMMWORD PTR [r11-72]
+ movaps xmm10, XMMWORD PTR [r11-88]
+ movaps xmm11, XMMWORD PTR [r11-104]
+ movaps xmm12, XMMWORD PTR [r11-120]
+ movaps xmm14, XMMWORD PTR [rsp+32]
+ movaps xmm15, XMMWORD PTR [rsp+16]
+ mov rsp, r11
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rdi
+ pop rsi
+ pop rbp
+ pop rbx
+ jmp cnv2_double_mainloop_asm_sandybridge_endp
+
+div_fix_1_sandybridge:
+ dec rbx
+ add r11, rdx
+ jmp div_fix_1_ret_sandybridge
+
+div_fix_2_sandybridge:
+ dec rdx
+ add r8, r9
+ jmp div_fix_2_ret_sandybridge
+
+sqrt_fix_1_sandybridge:
+ movq r8, xmm3
+ movdqa xmm0, xmm5
+ psrldq xmm0, 8
+ dec r9
+ mov r11d, -1022
+ shl r11, 32
+ mov rax, r9
+ shr r9, 19
+ shr rax, 20
+ mov rdx, r9
+ sub rdx, rax
+ lea rdx, [rdx+r11+1]
+ add rax, r11
+ imul rdx, rax
+ sub rdx, r8
+ adc r9, 0
+ movq xmm5, r9
+ punpcklqdq xmm5, xmm0
+ jmp sqrt_fix_1_ret_sandybridge
+
+sqrt_fix_2_sandybridge:
+ psrldq xmm3, 8
+ movq r11, xmm3
+ dec r8
+ mov ebx, -1022
+ shl rbx, 32
+ mov rax, r8
+ shr r8, 19
+ shr rax, 20
+ mov rdx, r8
+ sub rdx, rax
+ lea rdx, [rdx+rbx+1]
+ add rax, rbx
+ imul rdx, rax
+ sub rdx, r11
+ adc r8, 0
+ movq xmm0, r8
+ punpcklqdq xmm5, xmm0
+ jmp sqrt_fix_2_ret_sandybridge
+
+cnv2_double_mainloop_asm_sandybridge_endp:
diff --git a/crypto/asm/cnv2_main_loop.S b/crypto/asm/cnv2_main_loop.S
new file mode 100644
index 00000000..4dbcbbda
--- /dev/null
+++ b/crypto/asm/cnv2_main_loop.S
@@ -0,0 +1,37 @@
+#define ALIGN .align
+.intel_syntax noprefix
+#ifdef __APPLE__
+# define FN_PREFIX(fn) _ ## fn
+.text
+#else
+# define FN_PREFIX(fn) fn
+.section .text
+#endif
+.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
+.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
+.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
+
+ALIGN 16
+FN_PREFIX(cnv2_mainloop_ivybridge_asm):
+ sub rsp, 48
+ mov rcx, rdi
+ #include "cnv2_main_loop_ivybridge.inc"
+ add rsp, 48
+ ret 0
+
+ALIGN 16
+FN_PREFIX(cnv2_mainloop_ryzen_asm):
+ sub rsp, 48
+ mov rcx, rdi
+ #include "cnv2_main_loop_ryzen.inc"
+ add rsp, 48
+ ret 0
+
+ALIGN 16
+FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
+ sub rsp, 48
+ mov rcx, rdi
+ mov rdx, rsi
+ #include "cnv2_double_main_loop_sandybridge.inc"
+ add rsp, 48
+ ret 0
diff --git a/crypto/asm/cnv2_main_loop.asm b/crypto/asm/cnv2_main_loop.asm
new file mode 100644
index 00000000..d9522267
--- /dev/null
+++ b/crypto/asm/cnv2_main_loop.asm
@@ -0,0 +1,25 @@
+_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
+PUBLIC cnv2_mainloop_ivybridge_asm
+PUBLIC cnv2_mainloop_ryzen_asm
+PUBLIC cnv2_double_mainloop_sandybridge_asm
+
+ALIGN 64
+cnv2_mainloop_ivybridge_asm PROC
+ INCLUDE cnv2_main_loop_ivybridge.inc
+ ret 0
+cnv2_mainloop_ivybridge_asm ENDP
+
+ALIGN 64
+cnv2_mainloop_ryzen_asm PROC
+ INCLUDE cnv2_main_loop_ryzen.inc
+ ret 0
+cnv2_mainloop_ryzen_asm ENDP
+
+ALIGN 64
+cnv2_double_mainloop_sandybridge_asm PROC
+ INCLUDE cnv2_double_main_loop_sandybridge.inc
+ ret 0
+cnv2_double_mainloop_sandybridge_asm ENDP
+
+_TEXT_CNV2_MAINLOOP ENDS
+END
diff --git a/crypto/asm/cnv2_main_loop_ivybridge.inc b/crypto/asm/cnv2_main_loop_ivybridge.inc
new file mode 100644
index 00000000..8c2c2d3b
--- /dev/null
+++ b/crypto/asm/cnv2_main_loop_ivybridge.inc
@@ -0,0 +1,186 @@
+ mov QWORD PTR [rsp+24], rbx
+ push rbp
+ push rsi
+ push rdi
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp, 80
+
+ stmxcsr DWORD PTR [rsp]
+ mov DWORD PTR [rsp+4], 24448
+ ldmxcsr DWORD PTR [rsp+4]
+
+ mov rax, QWORD PTR [rcx+48]
+ mov r9, rcx
+ xor rax, QWORD PTR [rcx+16]
+ mov esi, 524288
+ mov r8, QWORD PTR [rcx+32]
+ mov r13d, -2147483647
+ xor r8, QWORD PTR [rcx]
+ mov r11, QWORD PTR [rcx+40]
+ mov r10, r8
+ mov rdx, QWORD PTR [rcx+56]
+ movq xmm4, rax
+ xor rdx, QWORD PTR [rcx+24]
+ xor r11, QWORD PTR [rcx+8]
+ mov rbx, QWORD PTR [rcx+224]
+ mov rax, QWORD PTR [r9+80]
+ xor rax, QWORD PTR [r9+64]
+ movq xmm0, rdx
+ mov rcx, QWORD PTR [rcx+88]
+ xor rcx, QWORD PTR [r9+72]
+ movq xmm3, QWORD PTR [r9+104]
+ movaps XMMWORD PTR [rsp+64], xmm6
+ movaps XMMWORD PTR [rsp+48], xmm7
+ movaps XMMWORD PTR [rsp+32], xmm8
+ and r10d, 2097136
+ movq xmm5, rax
+
+ xor eax, eax
+ mov QWORD PTR [rsp+16], rax
+
+ mov ax, 1023
+ shl rax, 52
+ movq xmm8, rax
+ mov r15, QWORD PTR [r9+96]
+ punpcklqdq xmm4, xmm0
+ movq xmm0, rcx
+ punpcklqdq xmm5, xmm0
+ movdqu xmm6, XMMWORD PTR [r10+rbx]
+
+ ALIGN 16
+main_loop_ivybridge:
+ lea rdx, QWORD PTR [r10+rbx]
+ mov ecx, r10d
+ mov eax, r10d
+ mov rdi, r15
+ xor ecx, 16
+ xor eax, 32
+ xor r10d, 48
+ movq xmm0, r11
+ movq xmm7, r8
+ punpcklqdq xmm7, xmm0
+ aesenc xmm6, xmm7
+ movq rbp, xmm6
+ mov r9, rbp
+ and r9d, 2097136
+ movdqu xmm2, XMMWORD PTR [rcx+rbx]
+ movdqu xmm1, XMMWORD PTR [rax+rbx]
+ movdqu xmm0, XMMWORD PTR [r10+rbx]
+ paddq xmm1, xmm7
+ paddq xmm0, xmm5
+ paddq xmm2, xmm4
+ movdqu XMMWORD PTR [rcx+rbx], xmm0
+ movdqu XMMWORD PTR [rax+rbx], xmm2
+ movdqu XMMWORD PTR [r10+rbx], xmm1
+ mov r10, r9
+ xor r10d, 32
+ movq rcx, xmm3
+ mov rax, rcx
+ shl rax, 32
+ xor rdi, rax
+ movdqa xmm0, xmm6
+ pxor xmm0, xmm4
+ movdqu XMMWORD PTR [rdx], xmm0
+ xor rdi, QWORD PTR [r9+rbx]
+ lea r14, QWORD PTR [r9+rbx]
+ mov r12, QWORD PTR [r14+8]
+ xor edx, edx
+ lea r9d, DWORD PTR [ecx+ecx]
+ add r9d, ebp
+ movdqa xmm0, xmm6
+ psrldq xmm0, 8
+ or r9d, r13d
+ movq rax, xmm0
+ div r9
+ xorps xmm3, xmm3
+ mov eax, eax
+ shl rdx, 32
+ add rdx, rax
+ lea r9, QWORD PTR [rdx+rbp]
+ mov r15, rdx
+ mov rax, r9
+ shr rax, 12
+ movq xmm0, rax
+ paddq xmm0, xmm8
+ sqrtsd xmm3, xmm0
+ psubq xmm3, XMMWORD PTR [rsp+16]
+ movq rdx, xmm3
+ test edx, 524287
+ je sqrt_fixup_ivybridge
+ psrlq xmm3, 19
+sqrt_fixup_ivybridge_ret:
+
+ mov ecx, r10d
+ mov rax, rdi
+ mul rbp
+ movq xmm2, rdx
+ xor rdx, [rcx+rbx]
+ add r8, rdx
+ mov QWORD PTR [r14], r8
+ xor r8, rdi
+ mov edi, r8d
+ and edi, 2097136
+ movq xmm0, rax
+ xor rax, [rcx+rbx+8]
+ add r11, rax
+ mov QWORD PTR [r14+8], r11
+ punpcklqdq xmm2, xmm0
+
+ mov r9d, r10d
+ xor r9d, 48
+ xor r10d, 16
+ pxor xmm2, XMMWORD PTR [r9+rbx]
+ movdqu xmm0, XMMWORD PTR [r10+rbx]
+ paddq xmm0, xmm5
+ movdqu xmm1, XMMWORD PTR [rcx+rbx]
+ paddq xmm2, xmm4
+ paddq xmm1, xmm7
+ movdqa xmm5, xmm4
+ movdqu XMMWORD PTR [r9+rbx], xmm0
+ movdqa xmm4, xmm6
+ movdqu XMMWORD PTR [rcx+rbx], xmm2
+ movdqu XMMWORD PTR [r10+rbx], xmm1
+ movdqu xmm6, [rdi+rbx]
+ mov r10d, edi
+ xor r11, r12
+ dec rsi
+ jne main_loop_ivybridge
+
+ ldmxcsr DWORD PTR [rsp]
+ mov rbx, QWORD PTR [rsp+160]
+ movaps xmm6, XMMWORD PTR [rsp+64]
+ movaps xmm7, XMMWORD PTR [rsp+48]
+ movaps xmm8, XMMWORD PTR [rsp+32]
+ add rsp, 80
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rdi
+ pop rsi
+ pop rbp
+ jmp cnv2_main_loop_ivybridge_endp
+
+sqrt_fixup_ivybridge:
+ dec rdx
+ mov r13d, -1022
+ shl r13, 32
+ mov rax, rdx
+ shr rdx, 19
+ shr rax, 20
+ mov rcx, rdx
+ sub rcx, rax
+ add rax, r13
+ not r13
+ sub rcx, r13
+ mov r13d, -2147483647
+ imul rcx, rax
+ sub rcx, r9
+ adc rdx, 0
+ movq xmm3, rdx
+ jmp sqrt_fixup_ivybridge_ret
+
+cnv2_main_loop_ivybridge_endp:
diff --git a/crypto/asm/cnv2_main_loop_ryzen.inc b/crypto/asm/cnv2_main_loop_ryzen.inc
new file mode 100644
index 00000000..d386aa2d
--- /dev/null
+++ b/crypto/asm/cnv2_main_loop_ryzen.inc
@@ -0,0 +1,179 @@
+ mov QWORD PTR [rsp+16], rbx
+ mov QWORD PTR [rsp+24], rbp
+ mov QWORD PTR [rsp+32], rsi
+ push rdi
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp, 64
+
+ stmxcsr DWORD PTR [rsp]
+ mov DWORD PTR [rsp+4], 24448
+ ldmxcsr DWORD PTR [rsp+4]
+
+ mov rax, QWORD PTR [rcx+48]
+ mov r9, rcx
+ xor rax, QWORD PTR [rcx+16]
+ mov ebp, 524288
+ mov r8, QWORD PTR [rcx+32]
+ xor r8, QWORD PTR [rcx]
+ mov r11, QWORD PTR [rcx+40]
+ mov r10, r8
+ mov rdx, QWORD PTR [rcx+56]
+ movq xmm3, rax
+ xor rdx, QWORD PTR [rcx+24]
+ xor r11, QWORD PTR [rcx+8]
+ mov rbx, QWORD PTR [rcx+224]
+ mov rax, QWORD PTR [r9+80]
+ xor rax, QWORD PTR [r9+64]
+ movq xmm0, rdx
+ mov rcx, QWORD PTR [rcx+88]
+ xor rcx, QWORD PTR [r9+72]
+ mov rdi, QWORD PTR [r9+104]
+ and r10d, 2097136
+ movaps XMMWORD PTR [rsp+48], xmm6
+ movq xmm4, rax
+ movaps XMMWORD PTR [rsp+32], xmm7
+ movaps XMMWORD PTR [rsp+16], xmm8
+ xorps xmm8, xmm8
+ mov ax, 1023
+ shl rax, 52
+ movq xmm7, rax
+ mov r15, QWORD PTR [r9+96]
+ punpcklqdq xmm3, xmm0
+ movq xmm0, rcx
+ punpcklqdq xmm4, xmm0
+
+ ALIGN 16
+main_loop_ryzen:
+ movdqa xmm5, XMMWORD PTR [r10+rbx]
+ movq xmm0, r11
+ movq xmm6, r8
+ punpcklqdq xmm6, xmm0
+ lea rdx, QWORD PTR [r10+rbx]
+ lea r9, QWORD PTR [rdi+rdi]
+ shl rdi, 32
+
+ mov ecx, r10d
+ mov eax, r10d
+ xor ecx, 16
+ xor eax, 32
+ xor r10d, 48
+ aesenc xmm5, xmm6
+ movdqa xmm2, XMMWORD PTR [rcx+rbx]
+ movdqa xmm1, XMMWORD PTR [rax+rbx]
+ movdqa xmm0, XMMWORD PTR [r10+rbx]
+ paddq xmm2, xmm3
+ paddq xmm1, xmm6
+ paddq xmm0, xmm4
+ movdqa XMMWORD PTR [rcx+rbx], xmm0
+ movdqa XMMWORD PTR [rax+rbx], xmm2
+ movdqa XMMWORD PTR [r10+rbx], xmm1
+
+ movaps xmm1, xmm8
+ mov rsi, r15
+ xor rsi, rdi
+ movq r14, xmm5
+ movdqa xmm0, xmm5
+ pxor xmm0, xmm3
+ mov r10, r14
+ and r10d, 2097136
+ movdqa XMMWORD PTR [rdx], xmm0
+ xor rsi, QWORD PTR [r10+rbx]
+ lea r12, QWORD PTR [r10+rbx]
+ mov r13, QWORD PTR [r10+rbx+8]
+
+ add r9d, r14d
+ or r9d, -2147483647
+ xor edx, edx
+ movdqa xmm0, xmm5
+ psrldq xmm0, 8
+ movq rax, xmm0
+
+ div r9
+ movq xmm0, rax
+ movq xmm1, rdx
+ punpckldq xmm0, xmm1
+ movq r15, xmm0
+ paddq xmm0, xmm5
+ movdqa xmm2, xmm0
+ psrlq xmm0, 12
+ paddq xmm0, xmm7
+ sqrtsd xmm1, xmm0
+ movq rdi, xmm1
+ test rdi, 524287
+ je sqrt_fixup_ryzen
+ shr rdi, 19
+
+sqrt_fixup_ryzen_ret:
+ mov rax, rsi
+ mul r14
+ movq xmm1, rax
+ movq xmm0, rdx
+ punpcklqdq xmm0, xmm1
+
+ mov r9d, r10d
+ mov ecx, r10d
+ xor r9d, 16
+ xor ecx, 32
+ xor r10d, 48
+ movdqa xmm1, XMMWORD PTR [rcx+rbx]
+ xor rdx, [rcx+rbx]
+ xor rax, [rcx+rbx+8]
+ movdqa xmm2, XMMWORD PTR [r9+rbx]
+ pxor xmm2, xmm0
+ paddq xmm4, XMMWORD PTR [r10+rbx]
+ paddq xmm2, xmm3
+ paddq xmm1, xmm6
+ movdqa XMMWORD PTR [r9+rbx], xmm4
+ movdqa XMMWORD PTR [rcx+rbx], xmm2
+ movdqa XMMWORD PTR [r10+rbx], xmm1
+
+ movdqa xmm4, xmm3
+ add r8, rdx
+ add r11, rax
+ mov QWORD PTR [r12], r8
+ xor r8, rsi
+ mov QWORD PTR [r12+8], r11
+ mov r10, r8
+ xor r11, r13
+ and r10d, 2097136
+ movdqa xmm3, xmm5
+ dec ebp
+ jne main_loop_ryzen
+
+ ldmxcsr DWORD PTR [rsp]
+ movaps xmm6, XMMWORD PTR [rsp+48]
+ lea r11, QWORD PTR [rsp+64]
+ mov rbx, QWORD PTR [r11+56]
+ mov rbp, QWORD PTR [r11+64]
+ mov rsi, QWORD PTR [r11+72]
+ movaps xmm8, XMMWORD PTR [r11-48]
+ movaps xmm7, XMMWORD PTR [rsp+32]
+ mov rsp, r11
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rdi
+ jmp cnv2_main_loop_ryzen_endp
+
+sqrt_fixup_ryzen:
+ movq r9, xmm2
+ dec rdi
+ mov edx, -1022
+ shl rdx, 32
+ mov rax, rdi
+ shr rdi, 19
+ shr rax, 20
+ mov rcx, rdi
+ sub rcx, rax
+ lea rcx, [rcx+rdx+1]
+ add rax, rdx
+ imul rcx, rax
+ sub rcx, r9
+ adc rdi, 0
+ jmp sqrt_fixup_ryzen_ret
+
+cnv2_main_loop_ryzen_endp:
diff --git a/crypto/asm/win64/cnv2_main_loop.S b/crypto/asm/win64/cnv2_main_loop.S
new file mode 100644
index 00000000..78eb1185
--- /dev/null
+++ b/crypto/asm/win64/cnv2_main_loop.S
@@ -0,0 +1,21 @@
+#define ALIGN .align
+.intel_syntax noprefix
+.section .text
+.global cnv2_mainloop_ivybridge_asm
+.global cnv2_mainloop_ryzen_asm
+.global cnv2_double_mainloop_sandybridge_asm
+
+ALIGN 16
+cnv2_mainloop_ivybridge_asm:
+ #include "../cnv2_main_loop_ivybridge.inc"
+ ret 0
+
+ALIGN 16
+cnv2_mainloop_ryzen_asm:
+ #include "../cnv2_main_loop_ryzen.inc"
+ ret 0
+
+ALIGN 16
+cnv2_double_mainloop_sandybridge_asm:
+ #include "../cnv2_double_main_loop_sandybridge.inc"
+ ret 0
diff --git a/options.c b/options.c
index 41921784..f276b3f1 100644
--- a/options.c
+++ b/options.c
@@ -54,9 +54,10 @@ char *opt_userpass = NULL;
char *opt_user = NULL;
char *opt_pass = NULL;
-enum Algo opt_algo = ALGO_CRYPTONIGHT;
-enum Variant opt_variant = VARIANT_AUTO;
-enum AlgoVariant opt_av = AV_AUTO;
+enum Algo opt_algo = ALGO_CRYPTONIGHT;
+enum Variant opt_variant = VARIANT_AUTO;
+enum AlgoVariant opt_av = AV_AUTO;
+enum Assembly opt_assembly = ASM_AUTO;
struct AlgoData
@@ -137,6 +138,7 @@ static struct option const options[] = {
{ "userpass", 1, NULL, 'O' },
{ "version", 0, NULL, 'V' },
{ "variant", 1, NULL, 1021 },
+ { "asm", 1, NULL, 1022 },
{ NULL, 0, NULL, 0 }
};
@@ -157,13 +159,21 @@ static const char *variant_names[] = {
};
+static const char *asm_names[] = {
+ "none",
+ "auto",
+ "intel",
+ "ryzen"
+};
+
+
#ifndef XMRIG_NO_AEON
static int get_cryptonight_lite_variant(int variant) {
- if (variant <= AEON_AV0_AUTO || variant >= AEON_AV_MAX) {
- return (cpu_info.flags & CPU_FLAG_AES) ? AEON_AV2_AESNI_DOUBLE : AEON_AV4_SOFT_AES_DOUBLE;
+ if (variant <= AV_AUTO || variant >= AV_MAX) {
+ return (cpu_info.flags & CPU_FLAG_AES) ? AV_DOUBLE : AV_DOUBLE_SOFT;
}
- if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= AEON_AV2_AESNI_DOUBLE) {
+ if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= AV_DOUBLE) {
return variant + 2;
}
@@ -212,6 +222,14 @@ static void parse_arg(int key, char *arg) {
}
break;
+ case 1022: /* --asm */
+ for (size_t i = 0; i < ARRAY_SIZE(asm_names); i++) {
+ if (strcasecmp(arg, asm_names[i]) == 0) {
+ opt_assembly = i;
+ }
+ }
+ break;
+
case 'O': /* --userpass */
p = strchr(arg, ':');
if (!p) {
diff --git a/options.h b/options.h
index 4f543275..7117130b 100644
--- a/options.h
+++ b/options.h
@@ -27,6 +27,7 @@
#include
#include
+
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#endif
@@ -57,16 +58,13 @@ enum AlgoVariant {
};
-#ifndef XMRIG_NO_AEON
-enum aeon_algo_variant {
- AEON_AV0_AUTO,
- AEON_AV1_AESNI,
- AEON_AV2_AESNI_DOUBLE,
- AEON_AV3_SOFT_AES,
- AEON_AV4_SOFT_AES_DOUBLE,
- AEON_AV_MAX
+enum Assembly {
+ ASM_NONE,
+ ASM_AUTO,
+ ASM_INTEL,
+ ASM_RYZEN,
+ ASM_MAX
};
-#endif
extern bool opt_colors;
@@ -90,6 +88,7 @@ extern int64_t opt_affinity;
extern enum Algo opt_algo;
extern enum Variant opt_variant;
extern enum AlgoVariant opt_av;
+extern enum Assembly opt_assembly;
void parse_cmdline(int argc, char *argv[]);
void show_usage_and_exit(int status);