Initial ASM wrapper.
This commit is contained in:
parent
f163aad38c
commit
ba65a34a01
7 changed files with 58 additions and 29 deletions
|
@ -256,4 +256,4 @@ if (WITH_DEBUG_LOG)
|
|||
endif()
|
||||
|
||||
add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES})
|
||||
target_link_libraries(${PROJECT_NAME} ${${XMRIG_ASM_LIBRARY}} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
|
||||
target_link_libraries(${PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
|
||||
|
|
|
@ -17,7 +17,6 @@ if (WITH_ASM AND NOT XMRIG_ARM)
|
|||
|
||||
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
|
||||
else()
|
||||
# set(XMRIG_ASM_SOURCES "")
|
||||
set(XMRIG_ASM_LIBRARY "")
|
||||
add_definitions(/DXMRIG_NO_ASM)
|
||||
endif()
|
||||
|
|
|
@ -22,8 +22,8 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __CRYPTONIGHT_H__
|
||||
#define __CRYPTONIGHT_H__
|
||||
#ifndef XMRIG_CRYPTONIGHT_H
|
||||
#define XMRIG_CRYPTONIGHT_H
|
||||
|
||||
|
||||
#include <stddef.h>
|
||||
|
@ -31,9 +31,9 @@
|
|||
|
||||
|
||||
struct cryptonight_ctx {
|
||||
alignas(16) uint8_t state[200];
|
||||
alignas(16) uint8_t* memory;
|
||||
alignas(16) uint8_t state[224];
|
||||
alignas(16) uint8_t *memory;
|
||||
};
|
||||
|
||||
|
||||
#endif /* __CRYPTONIGHT_H__ */
|
||||
#endif /* XMRIG_CRYPTONIGHT_H */
|
||||
|
|
|
@ -561,6 +561,31 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
|||
}
|
||||
|
||||
|
||||
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx);
|
||||
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
|
||||
|
||||
|
||||
template<xmrig::Algo ALGO, xmrig::Variant VARIANT, xmrig::Assembly ASM>
|
||||
inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||
{
|
||||
constexpr size_t MEM = xmrig::cn_select_memory<ALGO>();
|
||||
|
||||
xmrig::keccak(input, size, ctx[0]->state);
|
||||
cn_explode_scratchpad<ALGO, MEM, false>((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
if (ASM == xmrig::ASM_INTEL) {
|
||||
cnv2_mainloop_ivybridge_asm(ctx[0]);
|
||||
}
|
||||
else {
|
||||
cnv2_mainloop_ryzen_asm(ctx[0]);
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, MEM, false>((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
xmrig::keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
template<xmrig::Algo ALGO, bool SOFT_AES, xmrig::Variant VARIANT>
|
||||
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
|
||||
{
|
||||
|
|
|
@ -48,10 +48,10 @@
|
|||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, rcx
|
||||
punpcklqdq xmm5, xmm0
|
||||
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||
|
||||
ALIGN 16
|
||||
main_loop_ivybridge:
|
||||
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
|
@ -63,28 +63,29 @@ main_loop_ivybridge:
|
|||
movq xmm7, r8
|
||||
punpcklqdq xmm7, xmm0
|
||||
aesenc xmm6, xmm7
|
||||
movq rbp, xmm6
|
||||
mov r9, rbp
|
||||
and r9d, 2097136
|
||||
movdqu xmm2, XMMWORD PTR [rcx+rbx]
|
||||
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm1, xmm7
|
||||
movdqu xmm2, XMMWORD PTR [rcx+rbx]
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||
movq rcx, xmm3
|
||||
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||
mov rax, rcx
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
mov r10, r9
|
||||
xor r10d, 32
|
||||
movq rcx, xmm3
|
||||
mov rax, rcx
|
||||
shl rax, 32
|
||||
xor rdi, rax
|
||||
movq rbp, xmm6
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
mov r10, rbp
|
||||
and r10d, 2097136
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
xor rdi, QWORD PTR [r10+rbx]
|
||||
lea r14, QWORD PTR [r10+rbx]
|
||||
xor r10d, 32
|
||||
xor rdi, QWORD PTR [r9+rbx]
|
||||
lea r14, QWORD PTR [r9+rbx]
|
||||
mov r12, QWORD PTR [r14+8]
|
||||
xor edx, edx
|
||||
lea r9d, DWORD PTR [ecx+ecx]
|
||||
|
@ -117,8 +118,15 @@ sqrt_fixup_ivybridge_ret:
|
|||
mul rbp
|
||||
movq xmm2, rdx
|
||||
xor rdx, [rcx+rbx]
|
||||
add r8, rdx
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rdi
|
||||
mov edi, r8d
|
||||
and edi, 2097136
|
||||
movq xmm0, rax
|
||||
xor rax, [rcx+rbx+8]
|
||||
add r11, rax
|
||||
mov QWORD PTR [r14+8], r11
|
||||
punpcklqdq xmm2, xmm0
|
||||
|
||||
mov r9d, r10d
|
||||
|
@ -135,13 +143,8 @@ sqrt_fixup_ivybridge_ret:
|
|||
movdqa xmm4, xmm6
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm2
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
add r8, rdx
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rdi
|
||||
mov r10, r8
|
||||
add r11, rax
|
||||
mov QWORD PTR [r14+8], r11
|
||||
and r10d, 2097136
|
||||
movdqu xmm6, [rdi+rbx]
|
||||
mov r10d, edi
|
||||
xor r11, r12
|
||||
dec rsi
|
||||
jne main_loop_ivybridge
|
||||
|
|
|
@ -65,7 +65,8 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
|||
{
|
||||
assert(variant >= VARIANT_0 && variant < VARIANT_MAX);
|
||||
|
||||
static const cn_hash_fun func_table[VARIANT_MAX * 10 * 3] = {
|
||||
constexpr const size_t count = VARIANT_MAX * 10 * 3;
|
||||
static const cn_hash_fun func_table[count + 2] = {
|
||||
cryptonight_single_hash<CRYPTONIGHT, false, VARIANT_0>,
|
||||
cryptonight_double_hash<CRYPTONIGHT, false, VARIANT_0>,
|
||||
cryptonight_single_hash<CRYPTONIGHT, true, VARIANT_0>,
|
||||
|
@ -242,6 +243,8 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
|
|||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
# endif
|
||||
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_INTEL>,
|
||||
cryptonight_single_hash_asm<CRYPTONIGHT, VARIANT_2, ASM_RYZEN>
|
||||
};
|
||||
|
||||
const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1;
|
||||
|
|
|
@ -21,8 +21,8 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __WORKER_H__
|
||||
#define __WORKER_H__
|
||||
#ifndef XMRIG_WORKER_H
|
||||
#define XMRIG_WORKER_H
|
||||
|
||||
|
||||
#include <atomic>
|
||||
|
@ -33,7 +33,6 @@
|
|||
#include "Mem.h"
|
||||
|
||||
|
||||
struct cryptonight_ctx;
|
||||
class Handle;
|
||||
|
||||
|
||||
|
@ -67,4 +66,4 @@ protected:
|
|||
};
|
||||
|
||||
|
||||
#endif /* __WORKER_H__ */
|
||||
#endif /* XMRIG_WORKER_H */
|
||||
|
|
Loading…
Reference in a new issue