From 23c12fc35169c667051f30946ead9353da2e464d Mon Sep 17 00:00:00 2001 From: SChernykh Date: Thu, 24 Mar 2022 13:10:03 +0100 Subject: [PATCH] Optimized keccak Big astrobwt/v2 speedup on non-AVX2 CPUs: **Core i7-2600 +64% (17 -> 28 kh/s)** --- src/base/crypto/keccak.cpp | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/base/crypto/keccak.cpp b/src/base/crypto/keccak.cpp index 781ef64a..77bd445b 100644 --- a/src/base/crypto/keccak.cpp +++ b/src/base/crypto/keccak.cpp @@ -53,10 +53,8 @@ const uint64_t keccakf_rndc[24] = void xmrig::keccakf(uint64_t st[25], int rounds) { - int i, j, round; - uint64_t t, bc[5]; - - for (round = 0; round < rounds; ++round) { + for (int round = 0; round < rounds; ++round) { + uint64_t bc[5]; // Theta bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20]; @@ -65,17 +63,21 @@ void xmrig::keccakf(uint64_t st[25], int rounds) bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23]; bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24]; - for (i = 0; i < 5; ++i) { - t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); - st[i ] ^= t; - st[i + 5] ^= t; - st[i + 10] ^= t; - st[i + 15] ^= t; - st[i + 20] ^= t; +#define X(i) { \ + const uint64_t t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); \ + st[i ] ^= t; \ + st[i + 5] ^= t; \ + st[i + 10] ^= t; \ + st[i + 15] ^= t; \ + st[i + 20] ^= t; \ } + X(0); X(1); X(2); X(3); X(4); + +#undef (X) + // Rho Pi - t = st[1]; + const uint64_t t = st[1]; st[ 1] = ROTL64(st[ 6], 44); st[ 6] = ROTL64(st[ 9], 20); st[ 9] = ROTL64(st[22], 61); @@ -103,7 +105,7 @@ void xmrig::keccakf(uint64_t st[25], int rounds) // Chi // unrolled loop, where only last iteration is different - j = 0; + int j = 0; bc[0] = st[j ]; bc[1] = st[j + 1];