From 5efd00abec1e9a59eed273321a0eabf856569ac9 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Sat, 19 Dec 2020 19:46:31 +0100 Subject: [PATCH] Another dataset AVX2 init speedup (+3.8% faster on Zen3) --- src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc | 2 +- src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc | 2 +- src/crypto/randomx/jit_compiler_x86_static.S | 7 +++---- src/crypto/randomx/jit_compiler_x86_static.asm | 7 +++---- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc b/src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc index 88204d99..7ebd871f 100644 --- a/src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc +++ b/src/crypto/randomx/asm/program_sshash_avx2_epilogue.inc @@ -1,4 +1,4 @@ - add rsp, 32 + add rsp, 40 pop r9 movdqu xmm0, xmmword ptr [rsp] diff --git a/src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc b/src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc index 46dd469d..09c2deeb 100644 --- a/src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc +++ b/src/crypto/randomx/asm/program_sshash_avx2_loop_end.inc @@ -34,5 +34,5 @@ add rbp, 5 add rsi, 320 - cmp rbp, qword ptr [rsp+32] + cmp rbp, qword ptr [rsp+40] db 15, 130, 0, 0, 0, 0 ;# jb rel32 diff --git a/src/crypto/randomx/jit_compiler_x86_static.S b/src/crypto/randomx/jit_compiler_x86_static.S index da5ee98e..e2177147 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.S +++ b/src/crypto/randomx/jit_compiler_x86_static.S @@ -212,7 +212,7 @@ DECL(randomx_dataset_init_avx2_prologue): mov rbp, rdx ;# block index push rcx ;# max. block index #endif - sub rsp, 32 + sub rsp, 40 jmp randomx_dataset_init_avx2_prologue_loop_begin #include "asm/program_sshash_avx2_constants.inc" @@ -240,9 +240,8 @@ randomx_dataset_init_avx2_prologue_loop_begin: xor r15, r8 ;# init AVX registers (lanes 1-4) - vpxor ymm0, ymm0, ymm0 - movq xmm0, rbp - vpbroadcastq ymm0, xmm0 + mov qword ptr [rsp+32], rbp + vbroadcastsd ymm0, qword ptr [rsp+32] vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments+rip] ;# ymm0 *= r0_avx2_mul diff --git a/src/crypto/randomx/jit_compiler_x86_static.asm b/src/crypto/randomx/jit_compiler_x86_static.asm index f8a2d527..6e90cbf3 100644 --- a/src/crypto/randomx/jit_compiler_x86_static.asm +++ b/src/crypto/randomx/jit_compiler_x86_static.asm @@ -195,7 +195,7 @@ randomx_dataset_init_avx2_prologue PROC mov rsi, rdx ;# dataset mov rbp, r8 ;# block index push r9 ;# max. block index - sub rsp, 32 + sub rsp, 40 jmp loop_begin include asm/program_sshash_avx2_constants.inc @@ -223,9 +223,8 @@ loop_begin: xor r15, r8 ;# init AVX registers (lanes 1-4) - vpxor ymm0, ymm0, ymm0 - movq xmm0, rbp - vpbroadcastq ymm0, xmm0 + mov qword ptr [rsp+32], rbp + vbroadcastsd ymm0, qword ptr [rsp+32] vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments] ;# ymm0 *= r0_avx2_mul