Merge pull request #1987 from SChernykh/dev
Another dataset AVX2 init speedup (+3.8% faster on Zen3)
This commit is contained in:
commit
aa53ba073d
5 changed files with 10 additions and 10 deletions
|
@ -1,4 +1,4 @@
|
|||
add rsp, 32
|
||||
add rsp, 40
|
||||
pop r9
|
||||
|
||||
movdqu xmm0, xmmword ptr [rsp]
|
||||
|
|
|
@ -34,5 +34,5 @@
|
|||
|
||||
add rbp, 5
|
||||
add rsi, 320
|
||||
cmp rbp, qword ptr [rsp+32]
|
||||
cmp rbp, qword ptr [rsp+40]
|
||||
db 15, 130, 0, 0, 0, 0 ;# jb rel32
|
||||
|
|
|
@ -244,7 +244,9 @@ namespace randomx {
|
|||
switch (arch) {
|
||||
case xmrig::ICpuInfo::ARCH_ZEN:
|
||||
case xmrig::ICpuInfo::ARCH_ZEN_PLUS:
|
||||
default:
|
||||
// AVX2 init is slower on Zen/Zen+
|
||||
// Also disable it for other unknown architectures
|
||||
initDatasetAVX2 = false;
|
||||
break;
|
||||
case xmrig::ICpuInfo::ARCH_ZEN2:
|
||||
|
|
|
@ -212,7 +212,7 @@ DECL(randomx_dataset_init_avx2_prologue):
|
|||
mov rbp, rdx ;# block index
|
||||
push rcx ;# max. block index
|
||||
#endif
|
||||
sub rsp, 32
|
||||
sub rsp, 40
|
||||
|
||||
jmp randomx_dataset_init_avx2_prologue_loop_begin
|
||||
#include "asm/program_sshash_avx2_constants.inc"
|
||||
|
@ -240,9 +240,8 @@ randomx_dataset_init_avx2_prologue_loop_begin:
|
|||
xor r15, r8
|
||||
|
||||
;# init AVX registers (lanes 1-4)
|
||||
vpxor ymm0, ymm0, ymm0
|
||||
movq xmm0, rbp
|
||||
vpbroadcastq ymm0, xmm0
|
||||
mov qword ptr [rsp+32], rbp
|
||||
vbroadcastsd ymm0, qword ptr [rsp+32]
|
||||
vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments+rip]
|
||||
|
||||
;# ymm0 *= r0_avx2_mul
|
||||
|
|
|
@ -195,7 +195,7 @@ randomx_dataset_init_avx2_prologue PROC
|
|||
mov rsi, rdx ;# dataset
|
||||
mov rbp, r8 ;# block index
|
||||
push r9 ;# max. block index
|
||||
sub rsp, 32
|
||||
sub rsp, 40
|
||||
|
||||
jmp loop_begin
|
||||
include asm/program_sshash_avx2_constants.inc
|
||||
|
@ -223,9 +223,8 @@ loop_begin:
|
|||
xor r15, r8
|
||||
|
||||
;# init AVX registers (lanes 1-4)
|
||||
vpxor ymm0, ymm0, ymm0
|
||||
movq xmm0, rbp
|
||||
vpbroadcastq ymm0, xmm0
|
||||
mov qword ptr [rsp+32], rbp
|
||||
vbroadcastsd ymm0, qword ptr [rsp+32]
|
||||
vpaddq ymm0, ymm0, ymmword ptr [r0_avx2_increments]
|
||||
|
||||
;# ymm0 *= r0_avx2_mul
|
||||
|
|
Loading…
Reference in a new issue