From f29d05bdde386ad34c6d5cb8a074cc70aa15a4ed Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 21 Apr 2017 11:14:27 +0300 Subject: [PATCH] Simplify cryptonight_ctx. --- algo/cryptonight/cryptonight.h | 28 +--------- algo/cryptonight/cryptonight_av1_aesni.c | 12 ++-- algo/cryptonight/cryptonight_av2_aesni_stak.c | 12 ++-- algo/cryptonight/cryptonight_av3_aesni_bmi2.c | 12 ++-- algo/cryptonight/cryptonight_av4_softaes.c | 56 ++++++++++--------- .../cryptonight_av5_aesni_experimental.c | 8 +-- xmrig.c | 6 +- 7 files changed, 58 insertions(+), 76 deletions(-) diff --git a/algo/cryptonight/cryptonight.h b/algo/cryptonight/cryptonight.h index 1bb33f1e..0d0c2c75 100644 --- a/algo/cryptonight/cryptonight.h +++ b/algo/cryptonight/cryptonight.h @@ -27,34 +27,12 @@ #include #include -#define MEMORY 2097152 /* 2 MiB */ -#define ITER (1 << 20) -#define AES_BLOCK_SIZE 16 -#define AES_KEY_SIZE 32 /*16*/ -#define INIT_SIZE_BLK 8 -#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) // 128 - - -union hash_state { - uint8_t b[200]; - uint64_t w[25]; -}; - -union cn_slow_hash_state { - union hash_state hs; - struct { - uint8_t k[64]; - uint8_t init[INIT_SIZE_BYTE]; - }; -}; +#define MEMORY 2097152 /* 2 MiB */ struct cryptonight_ctx { - union cn_slow_hash_state state; - uint8_t text[INIT_SIZE_BYTE] __attribute((aligned(16))); - uint64_t a[2] __attribute__((aligned(16))); - uint64_t b[2] __attribute__((aligned(16))); - uint64_t c[2] __attribute__((aligned(16))); + uint8_t state[200] __attribute__((aligned(16))); + uint8_t* memory __attribute__((aligned(16))); }; diff --git a/algo/cryptonight/cryptonight_av1_aesni.c b/algo/cryptonight/cryptonight_av1_aesni.c index fb7d45fd..bbc33fbb 100644 --- a/algo/cryptonight/cryptonight_av1_aesni.c +++ b/algo/cryptonight/cryptonight_av1_aesni.c @@ -224,12 +224,12 @@ static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output) void cryptonight_av1_aesni(void *restrict output, const void *restrict input, char *restrict memory, struct cryptonight_ctx *restrict ctx) { - keccak((const uint8_t *) input, 76, (uint8_t *) &ctx->state.hs, 200); + keccak((const uint8_t *) input, 76, ctx->state, 200); - cn_explode_scratchpad((__m128i*) &ctx->state.hs, (__m128i*) memory); + cn_explode_scratchpad((__m128i*) ctx->state, (__m128i*) memory); const uint8_t* l0 = memory; - uint64_t* h0 = (uint64_t*) &ctx->state.hs; + uint64_t* h0 = (uint64_t*) ctx->state; uint64_t al0 = h0[0] ^ h0[4]; uint64_t ah0 = h0[1] ^ h0[5]; @@ -262,8 +262,8 @@ void cryptonight_av1_aesni(void *restrict output, const void *restrict input, ch idx0 = al0; } - cn_implode_scratchpad((__m128i*) memory, (__m128i*) &ctx->state.hs); + cn_implode_scratchpad((__m128i*) memory, (__m128i*) ctx->state); - keccakf((uint64_t*) &ctx->state.hs, 24); - extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output); + keccakf(h0, 24); + extra_hashes[ctx->state[0] & 3](ctx->state, 200, output); } diff --git a/algo/cryptonight/cryptonight_av2_aesni_stak.c b/algo/cryptonight/cryptonight_av2_aesni_stak.c index 67298a5b..a3a94b86 100644 --- a/algo/cryptonight/cryptonight_av2_aesni_stak.c +++ b/algo/cryptonight/cryptonight_av2_aesni_stak.c @@ -224,12 +224,12 @@ static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output) void cryptonight_av2_aesni_stak(void *restrict output, const void *restrict input, char *restrict memory, struct cryptonight_ctx *restrict ctx) { - keccak((const uint8_t *) input, 76, (uint8_t *) &ctx->state.hs, 200); + keccak((const uint8_t *) input, 76, ctx->state, 200); - cn_explode_scratchpad((__m128i*) &ctx->state.hs, (__m128i*) memory); + cn_explode_scratchpad((__m128i*) ctx->state, (__m128i*) memory); const uint8_t* l0 = memory; - uint64_t* h0 = (uint64_t*) &ctx->state.hs; + uint64_t* h0 = (uint64_t*) ctx->state; uint64_t al0 = h0[0] ^ h0[4]; uint64_t ah0 = h0[1] ^ h0[5]; @@ -266,8 +266,8 @@ void cryptonight_av2_aesni_stak(void *restrict output, const void *restrict inpu _mm_prefetch((const char*)&l0[idx0 & 0x1FFFF0], _MM_HINT_T0); } - cn_implode_scratchpad((__m128i*) memory, (__m128i*) &ctx->state.hs); + cn_implode_scratchpad((__m128i*) memory, (__m128i*) ctx->state); - keccakf((uint64_t*) &ctx->state.hs, 24); - extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output); + keccakf(h0, 24); + extra_hashes[ctx->state[0] & 3](ctx->state, 200, output); } diff --git a/algo/cryptonight/cryptonight_av3_aesni_bmi2.c b/algo/cryptonight/cryptonight_av3_aesni_bmi2.c index a2a04bf0..90be5e8a 100644 --- a/algo/cryptonight/cryptonight_av3_aesni_bmi2.c +++ b/algo/cryptonight/cryptonight_av3_aesni_bmi2.c @@ -224,12 +224,12 @@ static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output) void cryptonight_av3_aesni_bmi2(void *restrict output, const void *restrict input, char *restrict memory, struct cryptonight_ctx *restrict ctx) { - keccak((const uint8_t *) input, 76, (uint8_t *) &ctx->state.hs, 200); + keccak((const uint8_t *) input, 76, ctx->state, 200); - cn_explode_scratchpad((__m128i*) &ctx->state, (__m128i*) memory); + cn_explode_scratchpad((__m128i*) ctx->state, (__m128i*) memory); const uint8_t* l0 = memory; - uint64_t* h0 = (uint64_t*) &ctx->state; + uint64_t* h0 = (uint64_t*) ctx->state; uint64_t al0 = h0[0] ^ h0[4]; uint64_t ah0 = h0[1] ^ h0[5]; @@ -262,8 +262,8 @@ void cryptonight_av3_aesni_bmi2(void *restrict output, const void *restrict inpu idx0 = al0; } - cn_implode_scratchpad((__m128i*) memory, (__m128i*) &ctx->state.hs); + cn_implode_scratchpad((__m128i*) memory, (__m128i*) ctx->state); - keccakf((uint64_t*) &ctx->state.hs, 24); - extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output); + keccakf(h0, 24); + extra_hashes[ctx->state[0] & 3](ctx->state, 200, output); } diff --git a/algo/cryptonight/cryptonight_av4_softaes.c b/algo/cryptonight/cryptonight_av4_softaes.c index 9136293d..2e2c8f08 100644 --- a/algo/cryptonight/cryptonight_av4_softaes.c +++ b/algo/cryptonight/cryptonight_av4_softaes.c @@ -205,44 +205,46 @@ static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output) void cryptonight_av4_softaes(void *restrict output, const void *restrict input, char *restrict memory, struct cryptonight_ctx *restrict ctx) { - uint64_t* state = ctx->state.hs.w; + keccak((const uint8_t *) input, 76, ctx->state, 200); - keccak((const uint8_t *) input, 76, (uint8_t *) state, 200); - cn_explode_scratchpad((__m128i*) state, (__m128i*) memory); + cn_explode_scratchpad((__m128i*) ctx->state, (__m128i*) memory); - uint64_t a[2] __attribute((aligned(16))) = { state[0] ^ state[4], state[1] ^ state[5] }; - uint64_t c __attribute((aligned(16))); - uint64_t d[2] __attribute((aligned(16))); + const uint8_t* l0 = memory; + uint64_t* h0 = (uint64_t*) ctx->state; - __m128i a_x = _mm_load_si128((__m128i *) &memory[a[0] & 0x1FFFF0]); - __m128i b_x = _mm_set_epi64x(state[3] ^ state[7], state[2] ^ state[6]); + uint64_t al0 = h0[0] ^ h0[4]; + uint64_t ah0 = h0[1] ^ h0[5]; + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + + uint64_t idx0 = h0[0] ^ h0[4]; for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) { - __m128i c_x = soft_aesenc(a_x, _mm_load_si128((__m128i *) a)); - c = _mm_cvtsi128_si64(c_x); + __m128i cx; + cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]); + cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0)); - uint64_t *restrict d_ptr = (uint64_t *) &memory[c & 0x1FFFF0]; - _mm_store_si128((__m128i *) &memory[a[0] & 0x1FFFF0], _mm_xor_si128(b_x, c_x)); - b_x = c_x; + _mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx)); + idx0 = _mm_cvtsi128_si64(cx); + bx0 = cx; - d[0] = d_ptr[0]; - d[1] = d_ptr[1]; + uint64_t hi, lo, cl, ch; + cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0]; + ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1]; + lo = _umul128(idx0, cl, &hi); - { - unsigned __int128 res = (unsigned __int128) c * d[0]; + al0 += hi; + ah0 += lo; - d_ptr[0] = a[0] += res >> 64; - d_ptr[1] = a[1] += (uint64_t) res; - } + ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0; + ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0; - a[0] ^= d[0]; - a[1] ^= d[1]; - - a_x = _mm_load_si128((__m128i *) &memory[a[0] & 0x1FFFF0]); + ah0 ^= ch; + al0 ^= cl; + idx0 = al0; } - cn_implode_scratchpad((__m128i*) memory, (__m128i*) state); + cn_implode_scratchpad((__m128i*) memory, (__m128i*) ctx->state); - keccakf(state, 24); - extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output); + keccakf(h0, 24); + extra_hashes[ctx->state[0] & 3](ctx->state, 200, output); } diff --git a/algo/cryptonight/cryptonight_av5_aesni_experimental.c b/algo/cryptonight/cryptonight_av5_aesni_experimental.c index e474a8a3..4409b1d8 100644 --- a/algo/cryptonight/cryptonight_av5_aesni_experimental.c +++ b/algo/cryptonight/cryptonight_av5_aesni_experimental.c @@ -215,10 +215,10 @@ static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output) void cryptonight_av5_aesni_experimental(void *restrict output, const void *restrict input, char *restrict memory, struct cryptonight_ctx *restrict ctx) { - uint64_t* state = ctx->state.hs.w; + keccak((const uint8_t *) input, 76, ctx->state, 200); + cn_explode_scratchpad((__m128i*) ctx->state, (__m128i*) memory); - keccak((const uint8_t *) input, 76, (uint8_t *) state, 200); - cn_explode_scratchpad((__m128i*) state, (__m128i*) memory); + uint64_t* state = (uint64_t*) ctx->state; uint64_t a[2] __attribute((aligned(16))) = { state[0] ^ state[4], state[1] ^ state[5] }; uint64_t c __attribute((aligned(16))); @@ -254,5 +254,5 @@ void cryptonight_av5_aesni_experimental(void *restrict output, const void *restr cn_implode_scratchpad((__m128i*) memory, (__m128i*) state); keccakf(state, 24); - extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output); + extra_hashes[ctx->state[0] & 3](ctx->state, 200, output); } diff --git a/xmrig.c b/xmrig.c index ea2b6d6b..1179d618 100644 --- a/xmrig.c +++ b/xmrig.c @@ -282,7 +282,9 @@ static void *miner_thread(void *userdata) { uint32_t max_nonce; uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20; - struct cryptonight_ctx *persistentctx = (struct cryptonight_ctx *) &persistent_memory[TWO_MB_PAGE - sizeof(struct cryptonight_ctx) * (thr_id + 1)]; + applog(LOG_BLUE, "%d", sizeof(struct cryptonight_ctx)); + + struct cryptonight_ctx *persistentctx = (struct cryptonight_ctx *) &persistent_memory[MEMORY - sizeof(struct cryptonight_ctx) * (thr_id + 1)]; if (cpu_info.count > 1 && opt_n_threads > 1 && opt_affinity != -1L) { affine_to_cpu_mask(thr_id, (unsigned long) opt_affinity); @@ -335,7 +337,7 @@ static void *miner_thread(void *userdata) { gettimeofday(&tv_start, NULL ); /* scan nonces for a proof-of-work hash */ - rc = scanhash_cryptonight(thr_id, hash, work.data, work.target, max_nonce, &hashes_done, &persistent_memory[TWO_MB_PAGE * (thr_id + 1)], persistentctx); + rc = scanhash_cryptonight(thr_id, hash, work.data, work.target, max_nonce, &hashes_done, &persistent_memory[MEMORY * (thr_id + 1)], persistentctx); stats_add_hashes(thr_id, &tv_start, hashes_done); memcpy(work.hash, hash, 32);