Optimized CFROUND
This commit is contained in:
@ -1,5 +1,5 @@
|
||||
;# save VM register values
|
||||
add rsp, 24
|
||||
add rsp, 40
|
||||
pop rcx
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
|
@ -1,5 +1,5 @@
|
||||
lea rcx, [rsi+rax]
|
||||
mov [rsp+8], rcx
|
||||
mov [rsp+16], rcx
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
@ -9,7 +9,7 @@
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
lea rcx, [rsi+rdx]
|
||||
mov [rsp+16], rcx
|
||||
mov [rsp+24], rcx
|
||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||
cvtdq2pd xmm2, qword ptr [rcx+16]
|
||||
|
@ -1,4 +1,4 @@
|
||||
mov rcx, [rsp+16]
|
||||
mov rcx, [rsp+24]
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
@ -7,7 +7,7 @@
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
mov rcx, [rsp+8]
|
||||
mov rcx, [rsp+16]
|
||||
xorpd xmm0, xmm4
|
||||
xorpd xmm1, xmm5
|
||||
xorpd xmm2, xmm6
|
||||
|
@ -400,7 +400,7 @@ namespace randomx {
|
||||
*(uint32_t*)(code + codePos + 10) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
|
||||
*(uint32_t*)(code + codePos + 20) = RandomX_CurrentConfig.ScratchpadL3Mask64_Calculated;
|
||||
if (hasAVX) {
|
||||
uint32_t* p = (uint32_t*)(code + codePos + 32);
|
||||
uint32_t* p = (uint32_t*)(code + codePos + 67);
|
||||
*p = (*p & 0xFF000000U) | 0x0077F8C5U;
|
||||
}
|
||||
|
||||
@ -1072,18 +1072,21 @@ namespace randomx {
|
||||
uint8_t* const p = code;
|
||||
int pos = codePos;
|
||||
|
||||
emit(REX_MOV_RR64, p, pos);
|
||||
emitByte(0xc0 + instr.src, p, pos);
|
||||
int rotate = (13 - (instr.getImm32() & 63)) & 63;
|
||||
if (rotate != 0) {
|
||||
emit(ROL_RAX, p, pos);
|
||||
emitByte(rotate, p, pos);
|
||||
}
|
||||
const uint32_t src = instr.src;
|
||||
|
||||
*(uint32_t*)(p + pos) = 0x00C08B49 + (src << 16);
|
||||
const int rotate = (static_cast<int>(instr.getImm32() & 63) - 2) & 63;
|
||||
*(uint32_t*)(p + pos + 3) = 0x00C8C148 + (rotate << 24);
|
||||
|
||||
if (vm_flags & RANDOMX_FLAG_AMD) {
|
||||
emit(AND_OR_MOV_LDMXCSR_RYZEN, p, pos);
|
||||
*(uint64_t*)(p + pos + 7) = 0x742024443B0CE083ULL;
|
||||
*(uint8_t*)(p + pos + 15) = 8;
|
||||
*(uint64_t*)(p + pos + 16) = 0x202444890414AE0FULL;
|
||||
pos += 24;
|
||||
}
|
||||
else {
|
||||
emit(AND_OR_MOV_LDMXCSR, p, pos);
|
||||
*(uint64_t*)(p + pos + 7) = 0x0414AE0F0CE083ULL;
|
||||
pos += 14;
|
||||
}
|
||||
|
||||
codePos = pos;
|
||||
|
@ -82,8 +82,12 @@ randomx_program_prologue_first_load PROC
|
||||
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||
ror rdx, 32
|
||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
sub rsp, 24
|
||||
stmxcsr dword ptr [rsp]
|
||||
sub rsp, 40
|
||||
mov dword ptr [rsp], 9FC0h
|
||||
mov dword ptr [rsp+4], 0BFC0h
|
||||
mov dword ptr [rsp+8], 0DFC0h
|
||||
mov dword ptr [rsp+12], 0FFC0h
|
||||
mov dword ptr [rsp+32], -1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
|
Reference in New Issue
Block a user