Merge branch 'xmrig:master' into master
This commit is contained in:
commit
fe8008ee19
140 changed files with 7179 additions and 5223 deletions
3
.github/ISSUE_TEMPLATE/bug_report.md
vendored
3
.github/ISSUE_TEMPLATE/bug_report.md
vendored
|
@ -17,6 +17,9 @@ Steps to reproduce the behavior.
|
|||
A clear and concise description of what you expected to happen.
|
||||
|
||||
**Required data**
|
||||
- XMRig version
|
||||
- Either the exact link to a release you downloaded from https://github.com/xmrig/xmrig/releases
|
||||
- Or the exact command lines that you used to build XMRig
|
||||
- Miner log as text or screenshot
|
||||
- Config file or command line (without wallets)
|
||||
- OS: [e.g. Windows]
|
||||
|
|
53
CHANGELOG.md
53
CHANGELOG.md
|
@ -1,3 +1,56 @@
|
|||
# v6.21.3
|
||||
- [#3462](https://github.com/xmrig/xmrig/pull/3462) RandomX: correct memcpy size for JIT initialization.
|
||||
|
||||
# v6.21.2
|
||||
- The dependencies of all prebuilt releases have been updated. Support for old Ubuntu releases has been dropped.
|
||||
- [#2800](https://github.com/xmrig/xmrig/issues/2800) Fixed donation with GhostRider algorithm for builds without KawPow algorithm.
|
||||
- [#3436](https://github.com/xmrig/xmrig/pull/3436) Fixed, the file log writer was not thread-safe.
|
||||
- [#3450](https://github.com/xmrig/xmrig/pull/3450) Fixed RandomX crash when compiled with fortify_source.
|
||||
|
||||
# v6.21.1
|
||||
- [#3391](https://github.com/xmrig/xmrig/pull/3391) Added support for townforge (monero fork using randomx).
|
||||
- [#3399](https://github.com/xmrig/xmrig/pull/3399) Fixed Zephyr mining (OpenCL).
|
||||
- [#3420](https://github.com/xmrig/xmrig/pull/3420) Fixed segfault in HTTP API rebind.
|
||||
|
||||
# v6.21.0
|
||||
- [#3302](https://github.com/xmrig/xmrig/pull/3302) [#3312](https://github.com/xmrig/xmrig/pull/3312) Enabled keepalive for Windows (>= Vista).
|
||||
- [#3320](https://github.com/xmrig/xmrig/pull/3320) Added "built for OS/architecture/bits" to "ABOUT".
|
||||
- [#3339](https://github.com/xmrig/xmrig/pull/3339) Added SNI option for TLS connections.
|
||||
- [#3342](https://github.com/xmrig/xmrig/pull/3342) Update `cn_main_loop.asm`.
|
||||
- [#3346](https://github.com/xmrig/xmrig/pull/3346) ARM64 JIT: don't use `x18` register.
|
||||
- [#3348](https://github.com/xmrig/xmrig/pull/3348) Update to latest `sse2neon.h`.
|
||||
- [#3356](https://github.com/xmrig/xmrig/pull/3356) Updated pricing record size for **Zephyr** solo mining.
|
||||
- [#3358](https://github.com/xmrig/xmrig/pull/3358) **Zephyr** solo mining: handle multiple outputs.
|
||||
|
||||
# v6.20.0
|
||||
- Added new ARM CPU names.
|
||||
- [#2394](https://github.com/xmrig/xmrig/pull/2394) Added new CMake options `ARM_V8` and `ARM_V7`.
|
||||
- [#2830](https://github.com/xmrig/xmrig/pull/2830) Added API rebind polling.
|
||||
- [#2927](https://github.com/xmrig/xmrig/pull/2927) Fixed compatibility with hwloc 1.11.x.
|
||||
- [#3060](https://github.com/xmrig/xmrig/pull/3060) Added x86 to `README.md`.
|
||||
- [#3236](https://github.com/xmrig/xmrig/pull/3236) Fixed: receive CUDA loader error on Linux too.
|
||||
- [#3290](https://github.com/xmrig/xmrig/pull/3290) Added [Zephyr](https://www.zephyrprotocol.com/) coin support for solo mining.
|
||||
|
||||
# v6.19.3
|
||||
- [#3245](https://github.com/xmrig/xmrig/issues/3245) Improved algorithm negotiation for donation rounds by sending extra information about current mining job.
|
||||
- [#3254](https://github.com/xmrig/xmrig/pull/3254) Tweaked auto-tuning for Intel CPUs.
|
||||
- [#3271](https://github.com/xmrig/xmrig/pull/3271) RandomX: optimized program generation.
|
||||
- [#3273](https://github.com/xmrig/xmrig/pull/3273) RandomX: fixed undefined behavior.
|
||||
- [#3275](https://github.com/xmrig/xmrig/pull/3275) RandomX: fixed `jccErratum` list.
|
||||
- [#3280](https://github.com/xmrig/xmrig/pull/3280) Updated example scripts.
|
||||
|
||||
# v6.19.2
|
||||
- [#3230](https://github.com/xmrig/xmrig/pull/3230) Fixed parsing of `TX_EXTRA_MERGE_MINING_TAG`.
|
||||
- [#3232](https://github.com/xmrig/xmrig/pull/3232) Added new `X-Hash-Difficulty` HTTP header.
|
||||
- [#3240](https://github.com/xmrig/xmrig/pull/3240) Improved .cmd files when run by shortcuts on another drive.
|
||||
- [#3241](https://github.com/xmrig/xmrig/pull/3241) Added view tag calculation (fixes Wownero solo mining issue).
|
||||
|
||||
# v6.19.1
|
||||
- Resolved deprecated methods warnings with OpenSSL 3.0.
|
||||
- [#3213](https://github.com/xmrig/xmrig/pull/3213) Fixed build with 32-bit clang 15.
|
||||
- [#3218](https://github.com/xmrig/xmrig/pull/3218) Fixed: `--randomx-wrmsr=-1` worked only on Intel.
|
||||
- [#3228](https://github.com/xmrig/xmrig/pull/3228) Fixed build with gcc 13.
|
||||
|
||||
# v6.19.0
|
||||
- [#3144](https://github.com/xmrig/xmrig/pull/3144) Update to latest `sse2neon.h`.
|
||||
- [#3161](https://github.com/xmrig/xmrig/pull/3161) MSVC build: enabled parallel compilation.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.1)
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
project(xmrig)
|
||||
|
||||
option(WITH_HWLOC "Enable hwloc support" ON)
|
||||
|
@ -34,7 +34,8 @@ option(WITH_SECURE_JIT "Enable secure access to JIT memory" OFF)
|
|||
option(WITH_DMI "Enable DMI/SMBIOS reader" ON)
|
||||
|
||||
option(BUILD_STATIC "Build static binary" OFF)
|
||||
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)
|
||||
option(ARM_V8 "Force ARMv8 (64 bit) architecture, use with caution if automatic detection fails, but you sure it may work" OFF)
|
||||
option(ARM_V7 "Force ARMv7 (32 bit) architecture, use with caution if automatic detection fails, but you sure it may work" OFF)
|
||||
option(HWLOC_DEBUG "Enable hwloc debug helpers and log" OFF)
|
||||
|
||||
|
||||
|
@ -161,7 +162,7 @@ if (XMRIG_OS_WIN)
|
|||
src/crypto/common/VirtualMemory_win.cpp
|
||||
)
|
||||
|
||||
set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv)
|
||||
set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv dbghelp)
|
||||
elseif (XMRIG_OS_APPLE)
|
||||
list(APPEND SOURCES_OS
|
||||
src/App_unix.cpp
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
XMRig is a high performance, open source, cross platform RandomX, KawPow, CryptoNight and [GhostRider](https://github.com/xmrig/xmrig/tree/master/src/crypto/ghostrider#readme) unified CPU/GPU miner and [RandomX benchmark](https://xmrig.com/benchmark). Official binaries are available for Windows, Linux, macOS and FreeBSD.
|
||||
|
||||
## Mining backends
|
||||
- **CPU** (x64/ARMv7/ARMv8)
|
||||
- **CPU** (x86/x64/ARMv7/ARMv8)
|
||||
- **OpenCL** for AMD GPUs.
|
||||
- **CUDA** for NVIDIA GPUs via external [CUDA plugin](https://github.com/xmrig/xmrig-cuda).
|
||||
|
||||
|
|
|
@ -29,6 +29,12 @@ else()
|
|||
set(WITH_VAES OFF)
|
||||
endif()
|
||||
|
||||
if (ARM_V8)
|
||||
set(ARM_TARGET 8)
|
||||
elseif (ARM_V7)
|
||||
set(ARM_TARGET 7)
|
||||
endif()
|
||||
|
||||
if (NOT ARM_TARGET)
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv8-a)$")
|
||||
set(ARM_TARGET 8)
|
||||
|
|
|
@ -10,7 +10,7 @@ if ("${CMAKE_BUILD_TYPE}" STREQUAL "")
|
|||
endif()
|
||||
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Release")
|
||||
add_definitions(/DNDEBUG)
|
||||
add_definitions(-DNDEBUG)
|
||||
endif()
|
||||
|
||||
include(CheckSymbolExists)
|
||||
|
@ -32,7 +32,7 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
|||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
|
||||
|
||||
add_definitions(/DHAVE_ROTR)
|
||||
add_definitions(-DHAVE_ROTR)
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
|
@ -49,16 +49,7 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
|||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
|
||||
endif()
|
||||
|
||||
add_definitions(/D_GNU_SOURCE)
|
||||
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.1.0")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||
endif()
|
||||
|
||||
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -gdwarf-2")
|
||||
|
||||
add_definitions(/DHAVE_BUILTIN_CLEAR_CACHE)
|
||||
add_definitions(-D_GNU_SOURCE -DHAVE_BUILTIN_CLEAR_CACHE)
|
||||
|
||||
elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
|
||||
set(CMAKE_C_FLAGS_RELEASE "/MP /MT /O2 /Oi /DNDEBUG /GL")
|
||||
|
@ -67,10 +58,7 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
|
|||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "/MP /Ob1 /Zi /DRELWITHDEBINFO")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/MP /Ob1 /Zi /DRELWITHDEBINFO")
|
||||
|
||||
add_definitions(/D_CRT_SECURE_NO_WARNINGS)
|
||||
add_definitions(/D_CRT_NONSTDC_NO_WARNINGS)
|
||||
add_definitions(/DNOMINMAX)
|
||||
add_definitions(/DHAVE_ROTR)
|
||||
add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS -DNOMINMAX -DHAVE_ROTR)
|
||||
|
||||
elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
||||
|
||||
|
@ -92,7 +80,7 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
|||
|
||||
check_symbol_exists("_rotr" "x86intrin.h" HAVE_ROTR)
|
||||
if (HAVE_ROTR)
|
||||
add_definitions(/DHAVE_ROTR)
|
||||
add_definitions(-DHAVE_ROTR)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -105,6 +93,6 @@ endif()
|
|||
if (NOT WIN32)
|
||||
check_symbol_exists("__builtin___clear_cache" "stdlib.h" HAVE_BUILTIN_CLEAR_CACHE)
|
||||
if (HAVE_BUILTIN_CLEAR_CACHE)
|
||||
add_definitions(/DHAVE_BUILTIN_CLEAR_CACHE)
|
||||
add_definitions(-DHAVE_BUILTIN_CLEAR_CACHE)
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
@ -256,7 +256,7 @@
|
|||
|
||||
# v2.8.0
|
||||
- **[#753](https://github.com/xmrig/xmrig/issues/753) Added new algorithm [CryptoNight variant 2](https://github.com/xmrig/xmrig/issues/753) for Monero fork, thanks [@SChernykh](https://github.com/SChernykh).**
|
||||
- Added global and per thread option `"asm"` and and command line equivalent.
|
||||
- Added global and per thread option `"asm"` and command line equivalent.
|
||||
- **[#758](https://github.com/xmrig/xmrig/issues/758) Added SSL/TLS support for secure connections to pools.**
|
||||
- Added per pool options `"tls"` and `"tls-fingerprint"` and command line equivalents.
|
||||
- [#767](https://github.com/xmrig/xmrig/issues/767) Added config autosave feature, same with GPU miners.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
@echo off
|
||||
cd %~dp0
|
||||
cd /d "%~dp0"
|
||||
xmrig.exe --bench=10M --submit
|
||||
pause
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
@echo off
|
||||
cd %~dp0
|
||||
cd /d "%~dp0"
|
||||
xmrig.exe --bench=1M --submit
|
||||
pause
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
HWLOC_VERSION_MAJOR="2"
|
||||
HWLOC_VERSION_MINOR="9"
|
||||
HWLOC_VERSION_MINOR="10"
|
||||
HWLOC_VERSION_PATCH="0"
|
||||
|
||||
HWLOC_VERSION="${HWLOC_VERSION_MAJOR}.${HWLOC_VERSION_MINOR}.${HWLOC_VERSION_PATCH}"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
HWLOC_VERSION="1.11.13"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
LIBRESSL_VERSION="3.5.2"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
OPENSSL_VERSION="1.1.1s"
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
OPENSSL_VERSION="3.0.7"
|
||||
OPENSSL_VERSION="3.0.13"
|
||||
|
||||
mkdir -p deps
|
||||
mkdir -p deps/include
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
UV_VERSION="1.44.2"
|
||||
UV_VERSION="1.48.0"
|
||||
|
||||
mkdir -p deps
|
||||
mkdir -p deps/include
|
||||
|
@ -8,10 +8,10 @@ mkdir -p deps/lib
|
|||
|
||||
mkdir -p build && cd build
|
||||
|
||||
wget https://github.com/libuv/libuv/archive/v${UV_VERSION}.tar.gz -O v${UV_VERSION}.tar.gz
|
||||
wget https://dist.libuv.org/dist/v${UV_VERSION}/libuv-v${UV_VERSION}.tar.gz -O v${UV_VERSION}.tar.gz
|
||||
tar -xzf v${UV_VERSION}.tar.gz
|
||||
|
||||
cd libuv-${UV_VERSION}
|
||||
cd libuv-v${UV_VERSION}
|
||||
sh autogen.sh
|
||||
./configure --disable-shared
|
||||
make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
./build.uv.sh
|
||||
./build.hwloc.sh
|
||||
./build.openssl.sh
|
||||
./build.openssl3.sh
|
|
@ -1,4 +1,4 @@
|
|||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
# https://xmrig.com/docs/miner/hugepages#onegb-huge-pages
|
||||
|
||||
|
|
|
@ -15,6 +15,6 @@
|
|||
:: Choose pools outside of top 5 to help Monero network be more decentralized!
|
||||
:: Smaller pools also often have smaller fees/payout limits.
|
||||
|
||||
cd %~dp0
|
||||
xmrig.exe -o pool.hashvault.pro:3333 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD -p x
|
||||
cd /d "%~dp0"
|
||||
xmrig.exe -o xmrpool.eu:3333 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD -p x
|
||||
pause
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
:: Choose pools outside of top 5 to help Raptoreum network be more decentralized!
|
||||
:: Smaller pools also often have smaller fees/payout limits.
|
||||
|
||||
cd %~dp0
|
||||
cd /d "%~dp0"
|
||||
:: Use this command line to connect to non-SSL port
|
||||
xmrig.exe -a gr -o raptoreumemporium.com:3008 -u WALLET_ADDRESS -p x
|
||||
:: Or use this command line to connect to an SSL port
|
||||
|
|
|
@ -11,6 +11,6 @@
|
|||
:: Mining solo is the best way to help Monero network be more decentralized!
|
||||
:: But you will only get a payout when you find a block which can take more than a year for a single low-end PC.
|
||||
|
||||
cd %~dp0
|
||||
xmrig.exe -o node.xmr.to:18081 -a rx/0 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD --daemon
|
||||
cd /d "%~dp0"
|
||||
xmrig.exe -o YOUR_NODE_IP:18081 -a rx/0 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD --daemon
|
||||
pause
|
||||
|
|
2
src/3rdparty/argon2/CMakeLists.txt
vendored
2
src/3rdparty/argon2/CMakeLists.txt
vendored
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.1)
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
|
||||
project(argon2 C)
|
||||
set(CMAKE_C_STANDARD 99)
|
||||
|
|
2
src/3rdparty/epee/README.md
vendored
2
src/3rdparty/epee/README.md
vendored
|
@ -1 +1 @@
|
|||
epee - is a small library of helpers, wrappers, tools and and so on, used to make my life easier.
|
||||
epee - is a small library of helpers, wrappers, tools and so on, used to make my life easier.
|
||||
|
|
2
src/3rdparty/hwloc/CMakeLists.txt
vendored
2
src/3rdparty/hwloc/CMakeLists.txt
vendored
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.1)
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
project (hwloc C)
|
||||
|
||||
include_directories(include)
|
||||
|
|
107
src/3rdparty/hwloc/NEWS
vendored
107
src/3rdparty/hwloc/NEWS
vendored
|
@ -1,5 +1,5 @@
|
|||
Copyright © 2009 CNRS
|
||||
Copyright © 2009-2022 Inria. All rights reserved.
|
||||
Copyright © 2009-2023 Inria. All rights reserved.
|
||||
Copyright © 2009-2013 Université Bordeaux
|
||||
Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
Copyright © 2020 Hewlett Packard Enterprise. All rights reserved.
|
||||
|
@ -17,6 +17,103 @@ bug fixes (and other actions) for each version of hwloc since version
|
|||
0.9.
|
||||
|
||||
|
||||
Version 2.10.0
|
||||
--------------
|
||||
* Heterogeneous Memory core improvements
|
||||
+ Better heuristics to identify the subtype of memory such as HBM,
|
||||
DRAM, NVM, CXL-DRAM, etc.
|
||||
+ Build memory tiers, i.e. sets of NUMA nodes with the same subtype
|
||||
and similar performance.
|
||||
- NUMA node tier ranks are exposed in the new MemoryTier info
|
||||
attribute (starts from 0 for highest bandwidth tier)..
|
||||
+ See the new Heterogeneous Memory section in the documentation.
|
||||
* API
|
||||
+ Add hwloc_topology_free_group_object() to discard a Group created
|
||||
by hwloc_topology_alloc_group_object().
|
||||
* Linux backend
|
||||
+ Fix cpukinds on NVIDIA Grace to report identical cores even if they
|
||||
actually have very small frequency differences.
|
||||
Thanks to John C. Linford for the report.
|
||||
+ Add CXLDevice attributes to CXL DAX objects and NUMA nodes to show
|
||||
which PCI device implements which window.
|
||||
+ Ignore buggy memory-side caches and memory attributes when fake NUMA
|
||||
emulation is enabled on the Linux kernel command-line.
|
||||
+ Add more info attributes in MemoryModule Misc objects,
|
||||
thanks to Zubiao Xiong for the patch.
|
||||
+ Get CPUModel and CPUFamily info attributes on LoongArch platforms.
|
||||
* x86 backend
|
||||
+ Add support for new AMD CPUID leaf 0x80000026 for better detection
|
||||
of Core Complex and Die on Zen4 processors.
|
||||
+ Improve Zhaoxin CPU topology detection.
|
||||
* Tools
|
||||
+ Input locations and many command-line options (e.g. hwloc-calc -I -N -H,
|
||||
lstopo --only) now accept filters such as "NUMA[HBM]" so that only
|
||||
objects are that type and subtype are considered.
|
||||
- NUMA[tier=1] is also accepted for selecting NUMA nodes depending
|
||||
on their MemoryTier info attribute.
|
||||
+ Add --object-output to hwloc-calc to report the type as a prefix to
|
||||
object indexes, e.g. Core:2 instead of 2 in the output of -I.
|
||||
+ hwloc-info --ancestor and --descendants now accepts kinds of objects
|
||||
instead of single types.
|
||||
- The new --first option only shows the first matching object.
|
||||
+ Add --children-of-pid to hwloc-ps to show a hierarchy of processes.
|
||||
Thanks to Antoine Morvan for the suggestion.
|
||||
+ Add --misc-from to lstopo to add Misc objects described in a file.
|
||||
- To be combined with the new hwloc-ps --lstopo-misc for a customizable
|
||||
lstopo --top replacement.
|
||||
* Misc
|
||||
+ lstopo may now configure the layout of memory object placed above,
|
||||
for instance with --children-order memory:above:vert.
|
||||
+ Fix XML import from memory or stdin when using libxml2 2.12.
|
||||
+ Fix installation failures when configuring with --target,
|
||||
thanks to Clement Foyer for the patch.
|
||||
+ Fix support for 128bit pointer architectures.
|
||||
+ Remove Netloc.
|
||||
|
||||
|
||||
Version 2.9.3
|
||||
-------------
|
||||
* Handle Linux glibc allocation errors in binding routines (CVE-2022-47022).
|
||||
* Fix hwloc-calc when searching objects on heterogeneous memory platforms,
|
||||
thanks to Antoine Morvan for the report.
|
||||
* Fix hwloc_get_next_child() when there are some memory-side caches.
|
||||
* Don't crash if the topology is empty because Linux cgroups are wrong.
|
||||
* Improve some hwloc-bind warnings in case of command-line parsing errors.
|
||||
* Many documentation improvements all over the place, including:
|
||||
+ hwloc_topology_restrict() and hwloc_topology_insert_group() may reorder
|
||||
children, causing the logical indexes of objects to change.
|
||||
|
||||
|
||||
Version 2.9.2
|
||||
-------------
|
||||
* Don't forget L3i when defining filters for multiple levels of caches
|
||||
with hwloc_topology_set_cache/icache_types_filter().
|
||||
* Fix object total_memory after hwloc_topology_insert_group_object().
|
||||
* Fix the (non-yet) exporting in synthetic description for complex memory
|
||||
hierarchies with memory-side caches, etc.
|
||||
* Fix some default size attributes when building synthetic topologies.
|
||||
* Fix size units in hwloc-annotate.
|
||||
* Improve bitmap reallocation error management in many functions.
|
||||
* Documentation improvements:
|
||||
+ Better document return values of functions.
|
||||
+ Add "Error reporting" section (in hwloc.h and in the doxygen doc).
|
||||
+ Add FAQ entry "What may I disable to make hwloc faster?"
|
||||
+ Improve FAQ entries "Why is lstopo slow?" and
|
||||
"I only need ..., why should I use hwloc?"
|
||||
+ Clarify how to deal with cpukinds in hwloc-calc and hwloc-bind
|
||||
manpages.
|
||||
|
||||
|
||||
Version 2.9.1
|
||||
-------------
|
||||
* Don't forget to apply object type filters to "perflevel" caches detected
|
||||
on recent Mac OS X releases, thanks to Michel Lesoinne for the report.
|
||||
* Fix a failed assertion in hwloc_topology_restrict() when some NUMA nodes
|
||||
are removed because of HWLOC_RESTRICT_FLAG_REMOVE_CPULESS but no PUs are.
|
||||
Thanks to Mark Grondona for reporting the issue.
|
||||
* Mark HPE Cray Slingshot NICs with subtype "Slingshot".
|
||||
|
||||
|
||||
Version 2.9.0
|
||||
-------------
|
||||
* Backends
|
||||
|
@ -61,6 +158,14 @@ Version 2.8.0
|
|||
file from the documentation.
|
||||
|
||||
|
||||
Version 2.7.2
|
||||
-------------
|
||||
* Fix a crash when LevelZero devices have multiple subdevices,
|
||||
e.g. on PonteVecchio GPUs, thanks to Jonathan Peyton.
|
||||
* Fix a leak when importing cpukinds from XML,
|
||||
thanks to Hui Zhou.
|
||||
|
||||
|
||||
Version 2.7.1
|
||||
-------------
|
||||
* Workaround crashes when virtual machines report incoherent x86 CPUID
|
||||
|
|
496
src/3rdparty/hwloc/README
vendored
496
src/3rdparty/hwloc/README
vendored
|
@ -1,4 +1,8 @@
|
|||
Introduction
|
||||
This is a truncated and poorly-formatted version of the documentation main page.
|
||||
See https://www.open-mpi.org/projects/hwloc/doc/ for more.
|
||||
|
||||
|
||||
hwloc Overview
|
||||
|
||||
The Hardware Locality (hwloc) software project aims at easing the process of
|
||||
discovering hardware resources in parallel architectures. It offers
|
||||
|
@ -8,66 +12,456 @@ high-performance computing (HPC) applications, but is also applicable to any
|
|||
project seeking to exploit code and/or data locality on modern computing
|
||||
platforms.
|
||||
|
||||
hwloc is actually made of two subprojects distributed together:
|
||||
hwloc provides command line tools and a C API to obtain the hierarchical map of
|
||||
key computing elements within a node, such as: NUMA memory nodes, shared
|
||||
caches, processor packages, dies and cores, processing units (logical
|
||||
processors or "threads") and even I/O devices. hwloc also gathers various
|
||||
attributes such as cache and memory information, and is portable across a
|
||||
variety of different operating systems and platforms.
|
||||
|
||||
* The original hwloc project for describing the internals of computing nodes.
|
||||
It is described in details starting at section Hardware Locality (hwloc)
|
||||
Introduction.
|
||||
* The network-oriented companion called netloc (Network Locality), described
|
||||
in details starting with section Network Locality (netloc).
|
||||
hwloc primarily aims at helping high-performance computing (HPC) applications,
|
||||
but is also applicable to any project seeking to exploit code and/or data
|
||||
locality on modern computing platforms.
|
||||
|
||||
See also the Related pages tab above for links to other sections.
|
||||
hwloc supports the following operating systems:
|
||||
|
||||
Netloc may be disabled, but the original hwloc cannot. Both hwloc and netloc
|
||||
APIs are documented after these sections.
|
||||
* Linux (with knowledge of cgroups and cpusets, memory targets/initiators,
|
||||
etc.) on all supported hardware, including Intel Xeon Phi, ScaleMP vSMP,
|
||||
and NumaScale NumaConnect.
|
||||
* Solaris (with support for processor sets and logical domains)
|
||||
* AIX
|
||||
* Darwin / OS X
|
||||
* FreeBSD and its variants (such as kFreeBSD/GNU)
|
||||
* NetBSD
|
||||
* HP-UX
|
||||
* Microsoft Windows
|
||||
* IBM BlueGene/Q Compute Node Kernel (CNK)
|
||||
|
||||
Installation
|
||||
Since it uses standard Operating System information, hwloc's support is mostly
|
||||
independant from the processor type (x86, powerpc, ...) and just relies on the
|
||||
Operating System support. The main exception is BSD operating systems (NetBSD,
|
||||
FreeBSD, etc.) because they do not provide support topology information, hence
|
||||
hwloc uses an x86-only CPUID-based backend (which can be used for other OSes
|
||||
too, see the Components and plugins section).
|
||||
|
||||
hwloc (https://www.open-mpi.org/projects/hwloc/) is available under the BSD
|
||||
license. It is hosted as a sub-project of the overall Open MPI project (https:/
|
||||
/www.open-mpi.org/). Note that hwloc does not require any functionality from
|
||||
Open MPI -- it is a wholly separate (and much smaller!) project and code base.
|
||||
It just happens to be hosted as part of the overall Open MPI project.
|
||||
To check whether hwloc works on a particular machine, just try to build it and
|
||||
run lstopo or lstopo-no-graphics. If some things do not look right (e.g. bogus
|
||||
or missing cache information), see Questions and Bugs.
|
||||
|
||||
Basic Installation
|
||||
hwloc only reports the number of processors on unsupported operating systems;
|
||||
no topology information is available.
|
||||
|
||||
Installation is the fairly common GNU-based process:
|
||||
For development and debugging purposes, hwloc also offers the ability to work
|
||||
on "fake" topologies:
|
||||
|
||||
shell$ ./configure --prefix=...
|
||||
shell$ make
|
||||
shell$ make install
|
||||
* Symmetrical tree of resources generated from a list of level arities, see
|
||||
Synthetic topologies.
|
||||
* Remote machine simulation through the gathering of topology as XML files,
|
||||
see Importing and exporting topologies from/to XML files.
|
||||
|
||||
hwloc- and netloc-specific configure options and requirements are documented in
|
||||
sections hwloc Installation and Netloc Installation respectively.
|
||||
hwloc can display the topology in a human-readable format, either in graphical
|
||||
mode (X11), or by exporting in one of several different formats, including:
|
||||
plain text, LaTeX tikzpicture, PDF, PNG, and FIG (see Command-line Examples
|
||||
below). Note that some of the export formats require additional support
|
||||
libraries.
|
||||
|
||||
Also note that if you install supplemental libraries in non-standard locations,
|
||||
hwloc's configure script may not be able to find them without some help. You
|
||||
may need to specify additional CPPFLAGS, LDFLAGS, or PKG_CONFIG_PATH values on
|
||||
the configure command line.
|
||||
hwloc offers a programming interface for manipulating topologies and objects.
|
||||
It also brings a powerful CPU bitmap API that is used to describe topology
|
||||
objects location on physical/logical processors. See the Programming Interface
|
||||
below. It may also be used to binding applications onto certain cores or memory
|
||||
nodes. Several utility programs are also provided to ease command-line
|
||||
manipulation of topology objects, binding of processes, and so on.
|
||||
|
||||
For example, if libpciaccess was installed into /opt/pciaccess, hwloc's
|
||||
configure script may not find it be default. Try adding PKG_CONFIG_PATH to the
|
||||
./configure command line, like this:
|
||||
Bindings for several other languages are available from the project website.
|
||||
|
||||
./configure PKG_CONFIG_PATH=/opt/pciaccess/lib/pkgconfig ...
|
||||
Command-line Examples
|
||||
|
||||
Running the "lstopo" tool is a good way to check as a graphical output whether
|
||||
hwloc properly detected the architecture of your node. Netloc command-line
|
||||
tools can be used to display the network topology interconnecting your nodes.
|
||||
On a 4-package 2-core machine with hyper-threading, the lstopo tool may show
|
||||
the following graphical output:
|
||||
|
||||
Installing from a Git clone
|
||||
[dudley]
|
||||
|
||||
Additionally, the code can be directly cloned from Git:
|
||||
Here's the equivalent output in textual form:
|
||||
|
||||
shell$ git clone https://github.com/open-mpi/hwloc.git
|
||||
shell$ cd hwloc
|
||||
shell$ ./autogen.sh
|
||||
Machine
|
||||
NUMANode L#0 (P#0)
|
||||
Package L#0 + L3 L#0 (4096KB)
|
||||
L2 L#0 (1024KB) + L1 L#0 (16KB) + Core L#0
|
||||
PU L#0 (P#0)
|
||||
PU L#1 (P#8)
|
||||
L2 L#1 (1024KB) + L1 L#1 (16KB) + Core L#1
|
||||
PU L#2 (P#4)
|
||||
PU L#3 (P#12)
|
||||
Package L#1 + L3 L#1 (4096KB)
|
||||
L2 L#2 (1024KB) + L1 L#2 (16KB) + Core L#2
|
||||
PU L#4 (P#1)
|
||||
PU L#5 (P#9)
|
||||
L2 L#3 (1024KB) + L1 L#3 (16KB) + Core L#3
|
||||
PU L#6 (P#5)
|
||||
PU L#7 (P#13)
|
||||
Package L#2 + L3 L#2 (4096KB)
|
||||
L2 L#4 (1024KB) + L1 L#4 (16KB) + Core L#4
|
||||
PU L#8 (P#2)
|
||||
PU L#9 (P#10)
|
||||
L2 L#5 (1024KB) + L1 L#5 (16KB) + Core L#5
|
||||
PU L#10 (P#6)
|
||||
PU L#11 (P#14)
|
||||
Package L#3 + L3 L#3 (4096KB)
|
||||
L2 L#6 (1024KB) + L1 L#6 (16KB) + Core L#6
|
||||
PU L#12 (P#3)
|
||||
PU L#13 (P#11)
|
||||
L2 L#7 (1024KB) + L1 L#7 (16KB) + Core L#7
|
||||
PU L#14 (P#7)
|
||||
PU L#15 (P#15)
|
||||
|
||||
Note that GNU Autoconf >=2.63, Automake >=1.11 and Libtool >=2.2.6 are required
|
||||
when building from a Git clone.
|
||||
Note that there is also an equivalent output in XML that is meant for exporting
|
||||
/importing topologies but it is hardly readable to human-beings (see Importing
|
||||
and exporting topologies from/to XML files for details).
|
||||
|
||||
Nightly development snapshots are available on the web site, they can be
|
||||
configured and built without any need for Git or GNU Autotools.
|
||||
On a 4-package 2-core Opteron NUMA machine (with two core cores disallowed by
|
||||
the administrator), the lstopo tool may show the following graphical output
|
||||
(with --disallowed for displaying disallowed objects):
|
||||
|
||||
[hagrid]
|
||||
|
||||
Here's the equivalent output in textual form:
|
||||
|
||||
Machine (32GB total)
|
||||
Package L#0
|
||||
NUMANode L#0 (P#0 8190MB)
|
||||
L2 L#0 (1024KB) + L1 L#0 (64KB) + Core L#0 + PU L#0 (P#0)
|
||||
L2 L#1 (1024KB) + L1 L#1 (64KB) + Core L#1 + PU L#1 (P#1)
|
||||
Package L#1
|
||||
NUMANode L#1 (P#1 8192MB)
|
||||
L2 L#2 (1024KB) + L1 L#2 (64KB) + Core L#2 + PU L#2 (P#2)
|
||||
L2 L#3 (1024KB) + L1 L#3 (64KB) + Core L#3 + PU L#3 (P#3)
|
||||
Package L#2
|
||||
NUMANode L#2 (P#2 8192MB)
|
||||
L2 L#4 (1024KB) + L1 L#4 (64KB) + Core L#4 + PU L#4 (P#4)
|
||||
L2 L#5 (1024KB) + L1 L#5 (64KB) + Core L#5 + PU L#5 (P#5)
|
||||
Package L#3
|
||||
NUMANode L#3 (P#3 8192MB)
|
||||
L2 L#6 (1024KB) + L1 L#6 (64KB) + Core L#6 + PU L#6 (P#6)
|
||||
L2 L#7 (1024KB) + L1 L#7 (64KB) + Core L#7 + PU L#7 (P#7)
|
||||
|
||||
On a 2-package quad-core Xeon (pre-Nehalem, with 2 dual-core dies into each
|
||||
package):
|
||||
|
||||
[emmett]
|
||||
|
||||
Here's the same output in textual form:
|
||||
|
||||
Machine (total 16GB)
|
||||
NUMANode L#0 (P#0 16GB)
|
||||
Package L#0
|
||||
L2 L#0 (4096KB)
|
||||
L1 L#0 (32KB) + Core L#0 + PU L#0 (P#0)
|
||||
L1 L#1 (32KB) + Core L#1 + PU L#1 (P#4)
|
||||
L2 L#1 (4096KB)
|
||||
L1 L#2 (32KB) + Core L#2 + PU L#2 (P#2)
|
||||
L1 L#3 (32KB) + Core L#3 + PU L#3 (P#6)
|
||||
Package L#1
|
||||
L2 L#2 (4096KB)
|
||||
L1 L#4 (32KB) + Core L#4 + PU L#4 (P#1)
|
||||
L1 L#5 (32KB) + Core L#5 + PU L#5 (P#5)
|
||||
L2 L#3 (4096KB)
|
||||
L1 L#6 (32KB) + Core L#6 + PU L#6 (P#3)
|
||||
L1 L#7 (32KB) + Core L#7 + PU L#7 (P#7)
|
||||
|
||||
Programming Interface
|
||||
|
||||
The basic interface is available in hwloc.h. Some higher-level functions are
|
||||
available in hwloc/helper.h to reduce the need to manually manipulate objects
|
||||
and follow links between them. Documentation for all these is provided later in
|
||||
this document. Developers may also want to look at hwloc/inlines.h which
|
||||
contains the actual inline code of some hwloc.h routines, and at this document,
|
||||
which provides good higher-level topology traversal examples.
|
||||
|
||||
To precisely define the vocabulary used by hwloc, a Terms and Definitions
|
||||
section is available and should probably be read first.
|
||||
|
||||
Each hwloc object contains a cpuset describing the list of processing units
|
||||
that it contains. These bitmaps may be used for CPU binding and Memory binding.
|
||||
hwloc offers an extensive bitmap manipulation interface in hwloc/bitmap.h.
|
||||
|
||||
Moreover, hwloc also comes with additional helpers for interoperability with
|
||||
several commonly used environments. See the Interoperability With Other
|
||||
Software section for details.
|
||||
|
||||
The complete API documentation is available in a full set of HTML pages, man
|
||||
pages, and self-contained PDF files (formatted for both both US letter and A4
|
||||
formats) in the source tarball in doc/doxygen-doc/.
|
||||
|
||||
NOTE: If you are building the documentation from a Git clone, you will need to
|
||||
have Doxygen and pdflatex installed -- the documentation will be built during
|
||||
the normal "make" process. The documentation is installed during "make install"
|
||||
to $prefix/share/doc/hwloc/ and your systems default man page tree (under
|
||||
$prefix, of course).
|
||||
|
||||
Portability
|
||||
|
||||
Operating System have varying support for CPU and memory binding, e.g. while
|
||||
some Operating Systems provide interfaces for all kinds of CPU and memory
|
||||
bindings, some others provide only interfaces for a limited number of kinds of
|
||||
CPU and memory binding, and some do not provide any binding interface at all.
|
||||
Hwloc's binding functions would then simply return the ENOSYS error (Function
|
||||
not implemented), meaning that the underlying Operating System does not provide
|
||||
any interface for them. CPU binding and Memory binding provide more information
|
||||
on which hwloc binding functions should be preferred because interfaces for
|
||||
them are usually available on the supported Operating Systems.
|
||||
|
||||
Similarly, the ability of reporting topology information varies from one
|
||||
platform to another. As shown in Command-line Examples, hwloc can obtain
|
||||
information on a wide variety of hardware topologies. However, some platforms
|
||||
and/or operating system versions will only report a subset of this information.
|
||||
For example, on an PPC64-based system with 8 cores (each with 2 hardware
|
||||
threads) running a default 2.6.18-based kernel from RHEL 5.4, hwloc is only
|
||||
able to glean information about NUMA nodes and processor units (PUs). No
|
||||
information about caches, packages, or cores is available.
|
||||
|
||||
Here's the graphical output from lstopo on this platform when Simultaneous
|
||||
Multi-Threading (SMT) is enabled:
|
||||
|
||||
[ppc64-with]
|
||||
|
||||
And here's the graphical output from lstopo on this platform when SMT is
|
||||
disabled:
|
||||
|
||||
[ppc64-with]
|
||||
|
||||
Notice that hwloc only sees half the PUs when SMT is disabled. PU L#6, for
|
||||
example, seems to change location from NUMA node #0 to #1. In reality, no PUs
|
||||
"moved" -- they were simply re-numbered when hwloc only saw half as many (see
|
||||
also Logical index in Indexes and Sets). Hence, PU L#6 in the SMT-disabled
|
||||
picture probably corresponds to PU L#12 in the SMT-enabled picture.
|
||||
|
||||
This same "PUs have disappeared" effect can be seen on other platforms -- even
|
||||
platforms / OSs that provide much more information than the above PPC64 system.
|
||||
This is an unfortunate side-effect of how operating systems report information
|
||||
to hwloc.
|
||||
|
||||
Note that upgrading the Linux kernel on the same PPC64 system mentioned above
|
||||
to 2.6.34, hwloc is able to discover all the topology information. The
|
||||
following picture shows the entire topology layout when SMT is enabled:
|
||||
|
||||
[ppc64-full]
|
||||
|
||||
Developers using the hwloc API or XML output for portable applications should
|
||||
therefore be extremely careful to not make any assumptions about the structure
|
||||
of data that is returned. For example, per the above reported PPC topology, it
|
||||
is not safe to assume that PUs will always be descendants of cores.
|
||||
|
||||
Additionally, future hardware may insert new topology elements that are not
|
||||
available in this version of hwloc. Long-lived applications that are meant to
|
||||
span multiple different hardware platforms should also be careful about making
|
||||
structure assumptions. For example, a new element may someday exist between a
|
||||
core and a PU.
|
||||
|
||||
API Example
|
||||
|
||||
The following small C example (available in the source tree as ``doc/examples/
|
||||
hwloc-hello.c'') prints the topology of the machine and performs some thread
|
||||
and memory binding. More examples are available in the doc/examples/ directory
|
||||
of the source tree.
|
||||
|
||||
/* Example hwloc API program.
|
||||
*
|
||||
* See other examples under doc/examples/ in the source tree
|
||||
* for more details.
|
||||
*
|
||||
* Copyright (c) 2009-2016 Inria. All rights reserved.
|
||||
* Copyright (c) 2009-2011 Universit?eacute; Bordeaux
|
||||
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*
|
||||
* hwloc-hello.c
|
||||
*/
|
||||
#include "hwloc.h"
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
static void print_children(hwloc_topology_t topology, hwloc_obj_t obj,
|
||||
int depth)
|
||||
{
|
||||
char type[32], attr[1024];
|
||||
unsigned i;
|
||||
hwloc_obj_type_snprintf(type, sizeof(type), obj, 0);
|
||||
printf("%*s%s", 2*depth, "", type);
|
||||
if (obj->os_index != (unsigned) -1)
|
||||
printf("#%u", obj->os_index);
|
||||
hwloc_obj_attr_snprintf(attr, sizeof(attr), obj, " ", 0);
|
||||
if (*attr)
|
||||
printf("(%s)", attr);
|
||||
printf("\n");
|
||||
for (i = 0; i < obj->arity; i++) {
|
||||
print_children(topology, obj->children[i], depth + 1);
|
||||
}
|
||||
}
|
||||
int main(void)
|
||||
{
|
||||
int depth;
|
||||
unsigned i, n;
|
||||
unsigned long size;
|
||||
int levels;
|
||||
char string[128];
|
||||
int topodepth;
|
||||
void *m;
|
||||
hwloc_topology_t topology;
|
||||
hwloc_cpuset_t cpuset;
|
||||
hwloc_obj_t obj;
|
||||
/* Allocate and initialize topology object. */
|
||||
hwloc_topology_init(&topology);
|
||||
/* ... Optionally, put detection configuration here to ignore
|
||||
some objects types, define a synthetic topology, etc....
|
||||
The default is to detect all the objects of the machine that
|
||||
the caller is allowed to access. See Configure Topology
|
||||
Detection. */
|
||||
/* Perform the topology detection. */
|
||||
hwloc_topology_load(topology);
|
||||
/* Optionally, get some additional topology information
|
||||
in case we need the topology depth later. */
|
||||
topodepth = hwloc_topology_get_depth(topology);
|
||||
/*****************************************************************
|
||||
* First example:
|
||||
* Walk the topology with an array style, from level 0 (always
|
||||
* the system level) to the lowest level (always the proc level).
|
||||
*****************************************************************/
|
||||
for (depth = 0; depth < topodepth; depth++) {
|
||||
printf("*** Objects at level %d\n", depth);
|
||||
for (i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth);
|
||||
i++) {
|
||||
hwloc_obj_type_snprintf(string, sizeof(string),
|
||||
hwloc_get_obj_by_depth(topology, depth, i), 0);
|
||||
printf("Index %u: %s\n", i, string);
|
||||
}
|
||||
}
|
||||
/*****************************************************************
|
||||
* Second example:
|
||||
* Walk the topology with a tree style.
|
||||
*****************************************************************/
|
||||
printf("*** Printing overall tree\n");
|
||||
print_children(topology, hwloc_get_root_obj(topology), 0);
|
||||
/*****************************************************************
|
||||
* Third example:
|
||||
* Print the number of packages.
|
||||
*****************************************************************/
|
||||
depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE);
|
||||
if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
|
||||
printf("*** The number of packages is unknown\n");
|
||||
} else {
|
||||
printf("*** %u package(s)\n",
|
||||
hwloc_get_nbobjs_by_depth(topology, depth));
|
||||
}
|
||||
/*****************************************************************
|
||||
* Fourth example:
|
||||
* Compute the amount of cache that the first logical processor
|
||||
* has above it.
|
||||
*****************************************************************/
|
||||
levels = 0;
|
||||
size = 0;
|
||||
for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0);
|
||||
obj;
|
||||
obj = obj->parent)
|
||||
if (hwloc_obj_type_is_cache(obj->type)) {
|
||||
levels++;
|
||||
size += obj->attr->cache.size;
|
||||
}
|
||||
printf("*** Logical processor 0 has %d caches totaling %luKB\n",
|
||||
levels, size / 1024);
|
||||
/*****************************************************************
|
||||
* Fifth example:
|
||||
* Bind to only one thread of the last core of the machine.
|
||||
*
|
||||
* First find out where cores are, or else smaller sets of CPUs if
|
||||
* the OS doesn't have the notion of a "core".
|
||||
*****************************************************************/
|
||||
depth = hwloc_get_type_or_below_depth(topology, HWLOC_OBJ_CORE);
|
||||
/* Get last core. */
|
||||
obj = hwloc_get_obj_by_depth(topology, depth,
|
||||
hwloc_get_nbobjs_by_depth(topology, depth) - 1);
|
||||
if (obj) {
|
||||
/* Get a copy of its cpuset that we may modify. */
|
||||
cpuset = hwloc_bitmap_dup(obj->cpuset);
|
||||
/* Get only one logical processor (in case the core is
|
||||
SMT/hyper-threaded). */
|
||||
hwloc_bitmap_singlify(cpuset);
|
||||
/* And try to bind ourself there. */
|
||||
if (hwloc_set_cpubind(topology, cpuset, 0)) {
|
||||
char *str;
|
||||
int error = errno;
|
||||
hwloc_bitmap_asprintf(&str, obj->cpuset);
|
||||
printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error));
|
||||
free(str);
|
||||
}
|
||||
/* Free our cpuset copy */
|
||||
hwloc_bitmap_free(cpuset);
|
||||
}
|
||||
/*****************************************************************
|
||||
* Sixth example:
|
||||
* Allocate some memory on the last NUMA node, bind some existing
|
||||
* memory to the last NUMA node.
|
||||
*****************************************************************/
|
||||
/* Get last node. There's always at least one. */
|
||||
n = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE);
|
||||
obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, n - 1);
|
||||
size = 1024*1024;
|
||||
m = hwloc_alloc_membind(topology, size, obj->nodeset,
|
||||
HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
|
||||
hwloc_free(topology, m, size);
|
||||
m = malloc(size);
|
||||
hwloc_set_area_membind(topology, m, size, obj->nodeset,
|
||||
HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
|
||||
free(m);
|
||||
/* Destroy topology object. */
|
||||
hwloc_topology_destroy(topology);
|
||||
return 0;
|
||||
}
|
||||
|
||||
hwloc provides a pkg-config executable to obtain relevant compiler and linker
|
||||
flags. For example, it can be used thusly to compile applications that utilize
|
||||
the hwloc library (assuming GNU Make):
|
||||
|
||||
CFLAGS += $(shell pkg-config --cflags hwloc)
|
||||
LDLIBS += $(shell pkg-config --libs hwloc)
|
||||
|
||||
hwloc-hello: hwloc-hello.c
|
||||
$(CC) hwloc-hello.c $(CFLAGS) -o hwloc-hello $(LDLIBS)
|
||||
|
||||
On a machine 2 processor packages -- each package of which has two processing
|
||||
cores -- the output from running hwloc-hello could be something like the
|
||||
following:
|
||||
|
||||
shell$ ./hwloc-hello
|
||||
*** Objects at level 0
|
||||
Index 0: Machine
|
||||
*** Objects at level 1
|
||||
Index 0: Package#0
|
||||
Index 1: Package#1
|
||||
*** Objects at level 2
|
||||
Index 0: Core#0
|
||||
Index 1: Core#1
|
||||
Index 2: Core#3
|
||||
Index 3: Core#2
|
||||
*** Objects at level 3
|
||||
Index 0: PU#0
|
||||
Index 1: PU#1
|
||||
Index 2: PU#2
|
||||
Index 3: PU#3
|
||||
*** Printing overall tree
|
||||
Machine
|
||||
Package#0
|
||||
Core#0
|
||||
PU#0
|
||||
Core#1
|
||||
PU#1
|
||||
Package#1
|
||||
Core#3
|
||||
PU#2
|
||||
Core#2
|
||||
PU#3
|
||||
*** 2 package(s)
|
||||
*** Logical processor 0 has 0 caches totaling 0KB
|
||||
shell$
|
||||
|
||||
Questions and Bugs
|
||||
|
||||
|
@ -80,6 +474,20 @@ www.open-mpi.org/community/lists/hwloc.php).
|
|||
|
||||
There is also a #hwloc IRC channel on Libera Chat (irc.libera.chat).
|
||||
|
||||
History / Credits
|
||||
|
||||
hwloc is the evolution and merger of the libtopology project and the Portable
|
||||
Linux Processor Affinity (PLPA) (https://www.open-mpi.org/projects/plpa/)
|
||||
project. Because of functional and ideological overlap, these two code bases
|
||||
and ideas were merged and released under the name "hwloc" as an Open MPI
|
||||
sub-project.
|
||||
|
||||
libtopology was initially developed by the Inria Runtime Team-Project. PLPA was
|
||||
initially developed by the Open MPI development team as a sub-project. Both are
|
||||
now deprecated in favor of hwloc, which is distributed as an Open MPI
|
||||
sub-project.
|
||||
|
||||
|
||||
See https://www.open-mpi.org/projects/hwloc/doc/ for more hwloc documentation.
|
||||
|
||||
See https://www.open-mpi.org/projects/hwloc/doc/ for more hwloc documentation,
|
||||
actual links to related pages, images, etc.
|
||||
|
|
7
src/3rdparty/hwloc/VERSION
vendored
7
src/3rdparty/hwloc/VERSION
vendored
|
@ -8,7 +8,7 @@
|
|||
# Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too.
|
||||
|
||||
major=2
|
||||
minor=9
|
||||
minor=10
|
||||
release=0
|
||||
|
||||
# greek is used for alpha or beta release tags. If it is non-empty,
|
||||
|
@ -22,7 +22,7 @@ greek=
|
|||
|
||||
# The date when this release was created
|
||||
|
||||
date="Dec 14, 2022"
|
||||
date="Dec 04, 2023"
|
||||
|
||||
# If snapshot=1, then use the value from snapshot_version as the
|
||||
# entire hwloc version (i.e., ignore major, minor, release, and
|
||||
|
@ -41,7 +41,6 @@ snapshot_version=${major}.${minor}.${release}${greek}-git
|
|||
# 2. Version numbers are described in the Libtool current:revision:age
|
||||
# format.
|
||||
|
||||
libhwloc_so_version=21:1:6
|
||||
libnetloc_so_version=0:0:0
|
||||
libhwloc_so_version=22:0:7
|
||||
|
||||
# Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -11,9 +11,9 @@
|
|||
#ifndef HWLOC_CONFIG_H
|
||||
#define HWLOC_CONFIG_H
|
||||
|
||||
#define HWLOC_VERSION "2.9.0"
|
||||
#define HWLOC_VERSION "2.10.0"
|
||||
#define HWLOC_VERSION_MAJOR 2
|
||||
#define HWLOC_VERSION_MINOR 9
|
||||
#define HWLOC_VERSION_MINOR 10
|
||||
#define HWLOC_VERSION_RELEASE 0
|
||||
#define HWLOC_VERSION_GREEK ""
|
||||
|
||||
|
|
51
src/3rdparty/hwloc/include/hwloc/bitmap.h
vendored
51
src/3rdparty/hwloc/include/hwloc/bitmap.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -50,9 +50,10 @@ extern "C" {
|
|||
* hwloc_bitmap_free(set);
|
||||
* \endcode
|
||||
*
|
||||
* \note Most functions below return an int that may be negative in case of
|
||||
* error. The usual error case would be an internal failure to realloc/extend
|
||||
* \note Most functions below return 0 on success and -1 on error.
|
||||
* The usual error case would be an internal failure to realloc/extend
|
||||
* the storage of the bitmap (\p errno would be set to \c ENOMEM).
|
||||
* See also \ref hwlocality_api_error_reporting.
|
||||
*
|
||||
* \note Several examples of using the bitmap API are available under the
|
||||
* doc/examples/ directory in the source tree.
|
||||
|
@ -83,7 +84,13 @@ typedef const struct hwloc_bitmap_s * hwloc_const_bitmap_t;
|
|||
*/
|
||||
HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc(void) __hwloc_attribute_malloc;
|
||||
|
||||
/** \brief Allocate a new full bitmap. */
|
||||
/** \brief Allocate a new full bitmap.
|
||||
*
|
||||
* \returns A valid bitmap or \c NULL.
|
||||
*
|
||||
* The bitmap should be freed by a corresponding call to
|
||||
* hwloc_bitmap_free().
|
||||
*/
|
||||
HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc_full(void) __hwloc_attribute_malloc;
|
||||
|
||||
/** \brief Free bitmap \p bitmap.
|
||||
|
@ -119,11 +126,13 @@ HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buf
|
|||
|
||||
/** \brief Stringify a bitmap into a newly allocated string.
|
||||
*
|
||||
* \return -1 on error.
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
||||
|
||||
/** \brief Parse a bitmap string and stores it in bitmap \p bitmap.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
|
||||
|
||||
|
@ -144,11 +153,13 @@ HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_
|
|||
|
||||
/** \brief Stringify a bitmap into a newly allocated list string.
|
||||
*
|
||||
* \return -1 on error.
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_list_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
||||
|
||||
/** \brief Parse a list string and stores it in bitmap \p bitmap.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
|
||||
|
||||
|
@ -168,11 +179,13 @@ HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, si
|
|||
|
||||
/** \brief Stringify a bitmap into a newly allocated taskset-specific string.
|
||||
*
|
||||
* \return -1 on error.
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_taskset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
||||
|
||||
/** \brief Parse a taskset-specific bitmap string and stores it in bitmap \p bitmap.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_taskset_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
|
||||
|
||||
|
@ -279,6 +292,7 @@ HWLOC_DECLSPEC int hwloc_bitmap_to_ulongs(hwloc_const_bitmap_t bitmap, unsigned
|
|||
* When called on the output of hwloc_topology_get_topology_cpuset(),
|
||||
* the returned number is large enough for all cpusets of the topology.
|
||||
*
|
||||
* \return the number of unsigned longs required.
|
||||
* \return -1 if \p bitmap is infinite.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_nr_ulongs(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
@ -305,21 +319,23 @@ HWLOC_DECLSPEC int hwloc_bitmap_isfull(hwloc_const_bitmap_t bitmap) __hwloc_attr
|
|||
|
||||
/** \brief Compute the first index (least significant bit) in bitmap \p bitmap
|
||||
*
|
||||
* \return -1 if no index is set in \p bitmap.
|
||||
* \return the first index set in \p bitmap.
|
||||
* \return -1 if \p bitmap is empty.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_first(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the next index in bitmap \p bitmap which is after index \p prev
|
||||
*
|
||||
* If \p prev is -1, the first index is returned.
|
||||
*
|
||||
* \return the first index set in \p bitmap if \p prev is \c -1.
|
||||
* \return the next index set in \p bitmap if \p prev is not \c -1.
|
||||
* \return -1 if no index with higher index is set in \p bitmap.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_next(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the last index (most significant bit) in bitmap \p bitmap
|
||||
*
|
||||
* \return -1 if no index is set in \p bitmap, or if \p bitmap is infinitely set.
|
||||
* \return the last index set in \p bitmap.
|
||||
* \return -1 if \p bitmap is empty, or if \p bitmap is infinitely set.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
|
@ -327,28 +343,29 @@ HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attrib
|
|||
* indexes that are in the bitmap).
|
||||
*
|
||||
* \return the number of indexes that are in the bitmap.
|
||||
*
|
||||
* \return -1 if \p bitmap is infinitely set.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_weight(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the first unset index (least significant bit) in bitmap \p bitmap
|
||||
*
|
||||
* \return -1 if no index is unset in \p bitmap.
|
||||
* \return the first unset index in \p bitmap.
|
||||
* \return -1 if \p bitmap is full.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_first_unset(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the next unset index in bitmap \p bitmap which is after index \p prev
|
||||
*
|
||||
* If \p prev is -1, the first unset index is returned.
|
||||
*
|
||||
* \return the first index unset in \p bitmap if \p prev is \c -1.
|
||||
* \return the next index unset in \p bitmap if \p prev is not \c -1.
|
||||
* \return -1 if no index with higher index is unset in \p bitmap.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_next_unset(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the last unset index (most significant bit) in bitmap \p bitmap
|
||||
*
|
||||
* \return -1 if no index is unset in \p bitmap, or if \p bitmap is infinitely set.
|
||||
* \return the last index unset in \p bitmap.
|
||||
* \return -1 if \p bitmap is full, or if \p bitmap is not infinitely set.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_last_unset(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
|
@ -428,6 +445,8 @@ HWLOC_DECLSPEC int hwloc_bitmap_not (hwloc_bitmap_t res, hwloc_const_bitmap_t bi
|
|||
/** \brief Test whether bitmaps \p bitmap1 and \p bitmap2 intersects.
|
||||
*
|
||||
* \return 1 if bitmaps intersect, 0 otherwise.
|
||||
*
|
||||
* \note The empty bitmap does not intersect any other bitmap.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_intersects (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;
|
||||
|
||||
|
|
8
src/3rdparty/hwloc/include/hwloc/cuda.h
vendored
8
src/3rdparty/hwloc/include/hwloc/cuda.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2010-2021 Inria. All rights reserved.
|
||||
* Copyright © 2010-2023 Inria. All rights reserved.
|
||||
* Copyright © 2010-2011 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -42,6 +42,9 @@ extern "C" {
|
|||
/** \brief Return the domain, bus and device IDs of the CUDA device \p cudevice.
|
||||
*
|
||||
* Device \p cudevice must match the local machine.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
@ -87,6 +90,9 @@ hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused
|
|||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
|
8
src/3rdparty/hwloc/include/hwloc/cudart.h
vendored
8
src/3rdparty/hwloc/include/hwloc/cudart.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2010-2021 Inria. All rights reserved.
|
||||
* Copyright © 2010-2023 Inria. All rights reserved.
|
||||
* Copyright © 2010-2011 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -43,6 +43,9 @@ extern "C" {
|
|||
/** \brief Return the domain, bus and device IDs of the CUDA device whose index is \p idx.
|
||||
*
|
||||
* Device index \p idx must match the local machine.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
@ -84,6 +87,9 @@ hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unus
|
|||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
|
17
src/3rdparty/hwloc/include/hwloc/diff.h
vendored
17
src/3rdparty/hwloc/include/hwloc/diff.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2013-2020 Inria. All rights reserved.
|
||||
* Copyright © 2013-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -222,6 +222,8 @@ enum hwloc_topology_diff_apply_flags_e {
|
|||
HWLOC_DECLSPEC int hwloc_topology_diff_apply(hwloc_topology_t topology, hwloc_topology_diff_t diff, unsigned long flags);
|
||||
|
||||
/** \brief Destroy a list of topology differences.
|
||||
*
|
||||
* \return 0.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff);
|
||||
|
||||
|
@ -233,6 +235,8 @@ HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff);
|
|||
* This identifier is usually the name of the other XML file
|
||||
* that contains the reference topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note the pointer returned in refname should later be freed
|
||||
* by the caller.
|
||||
*/
|
||||
|
@ -246,10 +250,17 @@ HWLOC_DECLSPEC int hwloc_topology_diff_load_xml(const char *xmlpath, hwloc_topol
|
|||
* This identifier is usually the name of the other XML file
|
||||
* that contains the reference topology.
|
||||
* This attribute is given back when reading the diff from XML.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, const char *refname, const char *xmlpath);
|
||||
|
||||
/** \brief Load a list of topology differences from a XML buffer.
|
||||
*
|
||||
* Build a list of differences from the XML memory buffer given
|
||||
* at \p xmlbuffer and of length \p buflen (including an ending \0).
|
||||
* This buffer may have been filled earlier with
|
||||
* hwloc_topology_diff_export_xmlbuffer().
|
||||
*
|
||||
* If not \c NULL, \p refname will be filled with the identifier
|
||||
* string of the reference topology for the difference file,
|
||||
|
@ -257,6 +268,8 @@ HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, co
|
|||
* This identifier is usually the name of the other XML file
|
||||
* that contains the reference topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note the pointer returned in refname should later be freed
|
||||
* by the caller.
|
||||
*/
|
||||
|
@ -274,6 +287,8 @@ HWLOC_DECLSPEC int hwloc_topology_diff_load_xmlbuffer(const char *xmlbuffer, int
|
|||
* The returned buffer ends with a \0 that is included in the returned
|
||||
* length.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note The XML buffer should later be freed with hwloc_free_xmlbuffer().
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_topology_diff_export_xmlbuffer(hwloc_topology_diff_t diff, const char *refname, char **xmlbuffer, int *buflen);
|
||||
|
|
37
src/3rdparty/hwloc/include/hwloc/distances.h
vendored
37
src/3rdparty/hwloc/include/hwloc/distances.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2010-2022 Inria. All rights reserved.
|
||||
* Copyright © 2010-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -131,6 +131,8 @@ enum hwloc_distances_kind_e {
|
|||
*
|
||||
* Each distance matrix returned in the \p distances array should be released
|
||||
* by the caller using hwloc_distances_release().
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_distances_get(hwloc_topology_t topology,
|
||||
|
@ -140,6 +142,8 @@ hwloc_distances_get(hwloc_topology_t topology,
|
|||
/** \brief Retrieve distance matrices for object at a specific depth in the topology.
|
||||
*
|
||||
* Identical to hwloc_distances_get() with the additional \p depth filter.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
|
||||
|
@ -149,6 +153,8 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
|
|||
/** \brief Retrieve distance matrices for object of a specific type.
|
||||
*
|
||||
* Identical to hwloc_distances_get() with the additional \p type filter.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
|
||||
|
@ -162,6 +168,8 @@ hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
|
|||
* The name of the most common structure is "NUMALatency".
|
||||
* Others include "XGMIBandwidth", "XGMIHops", "XeLinkBandwidth",
|
||||
* and "NVLinkBandwidth".
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
|
||||
|
@ -171,7 +179,12 @@ hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
|
|||
/** \brief Get a description of what a distances structure contains.
|
||||
*
|
||||
* For instance "NUMALatency" for hardware-provided NUMA distances (ACPI SLIT),
|
||||
* or NULL if unknown.
|
||||
* or \c NULL if unknown.
|
||||
*
|
||||
* \return the constant string with the name of the distance structure.
|
||||
*
|
||||
* \note The returned name should not be freed by the caller,
|
||||
* it belongs to the hwloc library.
|
||||
*/
|
||||
HWLOC_DECLSPEC const char *
|
||||
hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances);
|
||||
|
@ -252,6 +265,8 @@ enum hwloc_distances_transform_e {
|
|||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \return 0 on success, -1 on error for instance if flags are invalid.
|
||||
*
|
||||
* \note Objects in distances array \p objs may be directly modified
|
||||
* in place without using hwloc_distances_transform().
|
||||
* One may use hwloc_get_obj_with_same_locality() to easily convert
|
||||
|
@ -272,6 +287,7 @@ HWLOC_DECLSPEC int hwloc_distances_transform(hwloc_topology_t topology, struct h
|
|||
|
||||
/** \brief Find the index of an object in a distances structure.
|
||||
*
|
||||
* \return the index of the object in the distances structure if any.
|
||||
* \return -1 if object \p obj is not involved in structure \p distances.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
|
@ -289,6 +305,7 @@ hwloc_distances_obj_index(struct hwloc_distances_s *distances, hwloc_obj_t obj)
|
|||
* The distance from \p obj1 to \p obj2 is stored in the value pointed by
|
||||
* \p value1to2 and reciprocally.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 if object \p obj1 or \p obj2 is not involved in structure \p distances.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
|
@ -374,8 +391,8 @@ hwloc_distances_add_create(hwloc_topology_t topology,
|
|||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \return \c 0 on success.
|
||||
* \return \c -1 on error.
|
||||
* \return 0 on success.
|
||||
* \return -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_add_values(hwloc_topology_t topology,
|
||||
hwloc_distances_add_handle_t handle,
|
||||
|
@ -411,8 +428,8 @@ enum hwloc_distances_add_flag_e {
|
|||
*
|
||||
* On error, the temporary distances structure and its content are destroyed.
|
||||
*
|
||||
* \return \c 0 on success.
|
||||
* \return \c -1 on error.
|
||||
* \return 0 on success.
|
||||
* \return -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_add_commit(hwloc_topology_t topology,
|
||||
hwloc_distances_add_handle_t handle,
|
||||
|
@ -433,18 +450,24 @@ HWLOC_DECLSPEC int hwloc_distances_add_commit(hwloc_topology_t topology,
|
|||
*
|
||||
* If these distances were used to group objects, these additional
|
||||
* Group objects are not removed from the topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology);
|
||||
|
||||
/** \brief Remove distance matrices for objects at a specific depth in the topology.
|
||||
*
|
||||
* Identical to hwloc_distances_remove() but only applies to one level of the topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth);
|
||||
|
||||
/** \brief Remove distance matrices for objects of a specific type in the topology.
|
||||
*
|
||||
* Identical to hwloc_distances_remove() but only applies to one level of the topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
|
||||
|
@ -458,6 +481,8 @@ hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
|
|||
/** \brief Release and remove the given distance matrice from the topology.
|
||||
*
|
||||
* This function includes a call to hwloc_distances_release().
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_release_remove(hwloc_topology_t topology, struct hwloc_distances_s *distances);
|
||||
|
||||
|
|
16
src/3rdparty/hwloc/include/hwloc/export.h
vendored
16
src/3rdparty/hwloc/include/hwloc/export.h
vendored
|
@ -55,7 +55,7 @@ enum hwloc_topology_export_xml_flags_e {
|
|||
*
|
||||
* \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e.
|
||||
*
|
||||
* \return -1 if a failure occured.
|
||||
* \return 0 on success, or -1 on error.
|
||||
*
|
||||
* \note See also hwloc_topology_set_userdata_export_callback()
|
||||
* for exporting application-specific object userdata.
|
||||
|
@ -91,7 +91,7 @@ HWLOC_DECLSPEC int hwloc_topology_export_xml(hwloc_topology_t topology, const ch
|
|||
*
|
||||
* \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e.
|
||||
*
|
||||
* \return -1 if a failure occured.
|
||||
* \return 0 on success, or -1 on error.
|
||||
*
|
||||
* \note See also hwloc_topology_set_userdata_export_callback()
|
||||
* for exporting application-specific object userdata.
|
||||
|
@ -145,13 +145,15 @@ HWLOC_DECLSPEC void hwloc_topology_set_userdata_export_callback(hwloc_topology_t
|
|||
* that were given to the export callback.
|
||||
*
|
||||
* Only printable characters may be exported to XML string attributes.
|
||||
* If a non-printable character is passed in \p name or \p buffer,
|
||||
* the function returns -1 with errno set to EINVAL.
|
||||
*
|
||||
* If exporting binary data, the application should first encode into
|
||||
* printable characters only (or use hwloc_export_obj_userdata_base64()).
|
||||
* It should also take care of portability issues if the export may
|
||||
* be reimported on a different architecture.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if a non-printable character is
|
||||
* passed in \p name or \b buffer.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length);
|
||||
|
||||
|
@ -165,8 +167,14 @@ HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t to
|
|||
* This function may only be called from within the export() callback passed
|
||||
* to hwloc_topology_set_userdata_export_callback().
|
||||
*
|
||||
* The name must be made of printable characters for export to XML string attributes.
|
||||
*
|
||||
* The function does not take care of portability issues if the export
|
||||
* may be reimported on a different architecture.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if a non-printable character is
|
||||
* passed in \p name.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_export_obj_userdata_base64(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length);
|
||||
|
||||
|
|
5
src/3rdparty/hwloc/include/hwloc/gl.h
vendored
5
src/3rdparty/hwloc/include/hwloc/gl.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2012 Blue Brain Project, EPFL. All rights reserved.
|
||||
* Copyright © 2012-2021 Inria. All rights reserved.
|
||||
* Copyright © 2012-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -102,7 +102,8 @@ hwloc_gl_get_display_osdev_by_name(hwloc_topology_t topology,
|
|||
* Retrieves the OpenGL display port (server) in \p port and device (screen)
|
||||
* in \p screen that correspond to the given hwloc OS device object.
|
||||
*
|
||||
* \return \c -1 if none could be found.
|
||||
* \return 0 on success.
|
||||
* \return -1 if none could be found.
|
||||
*
|
||||
* The topology \p topology does not necessarily have to match the current
|
||||
* machine. For instance the topology may be an XML import of a remote host.
|
||||
|
|
13
src/3rdparty/hwloc/include/hwloc/glibc-sched.h
vendored
13
src/3rdparty/hwloc/include/hwloc/glibc-sched.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -52,6 +52,8 @@ extern "C" {
|
|||
* that takes a cpu_set_t as input parameter.
|
||||
*
|
||||
* \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC
|
||||
*
|
||||
* \return 0.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t hwlocset,
|
||||
|
@ -80,6 +82,9 @@ hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute
|
|||
* that takes a cpu_set_t as input parameter.
|
||||
*
|
||||
* \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t hwlocset,
|
||||
|
@ -95,7 +100,8 @@ hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribu
|
|||
cpu = 0;
|
||||
while (count) {
|
||||
if (CPU_ISSET_S(cpu, schedsetsize, schedset)) {
|
||||
hwloc_bitmap_set(hwlocset, cpu);
|
||||
if (hwloc_bitmap_set(hwlocset, cpu) < 0)
|
||||
return -1;
|
||||
count--;
|
||||
}
|
||||
cpu++;
|
||||
|
@ -107,7 +113,8 @@ hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribu
|
|||
assert(schedsetsize == sizeof(cpu_set_t));
|
||||
for(cpu=0; cpu<CPU_SETSIZE; cpu++)
|
||||
if (CPU_ISSET(cpu, schedset))
|
||||
hwloc_bitmap_set(hwlocset, cpu);
|
||||
if (hwloc_bitmap_set(hwlocset, cpu) < 0)
|
||||
return -1;
|
||||
#endif /* !CPU_ZERO_S */
|
||||
return 0;
|
||||
}
|
||||
|
|
1039
src/3rdparty/hwloc/include/hwloc/helper.h
vendored
1039
src/3rdparty/hwloc/include/hwloc/helper.h
vendored
File diff suppressed because it is too large
Load diff
10
src/3rdparty/hwloc/include/hwloc/levelzero.h
vendored
10
src/3rdparty/hwloc/include/hwloc/levelzero.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2021 Inria. All rights reserved.
|
||||
* Copyright © 2021-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -44,8 +44,9 @@ extern "C" {
|
|||
* the Level Zero device \p device.
|
||||
*
|
||||
* Topology \p topology and device \p device must match the local machine.
|
||||
* The Level Zero must have been initialized with Sysman enabled
|
||||
* (ZES_ENABLE_SYSMAN=1 in the environment).
|
||||
* The Level Zero library must have been initialized with Sysman enabled
|
||||
* (by calling zesInit(0) if supported,
|
||||
* or by setting ZES_ENABLE_SYSMAN=1 in the environment).
|
||||
* I/O devices detection and the Level Zero component are not needed in the
|
||||
* topology.
|
||||
*
|
||||
|
@ -55,6 +56,9 @@ extern "C" {
|
|||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_levelzero_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
|
34
src/3rdparty/hwloc/include/hwloc/linux-libnuma.h
vendored
34
src/3rdparty/hwloc/include/hwloc/linux-libnuma.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2017 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010, 2012 Université Bordeaux
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
@ -50,6 +50,8 @@ extern "C" {
|
|||
* This function may be used before calling set_mempolicy, mbind, migrate_pages
|
||||
* or any other function that takes an array of unsigned long and a maximal
|
||||
* node number as input parameter.
|
||||
*
|
||||
* \return 0.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset,
|
||||
|
@ -84,6 +86,8 @@ hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpus
|
|||
* This function may be used before calling set_mempolicy, mbind, migrate_pages
|
||||
* or any other function that takes an array of unsigned long and a maximal
|
||||
* node number as input parameter.
|
||||
*
|
||||
* \return 0.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset,
|
||||
|
@ -119,6 +123,9 @@ hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nod
|
|||
* This function may be used after calling get_mempolicy or any other function
|
||||
* that takes an array of unsigned long as output parameter (and possibly
|
||||
* a maximal node number as input parameter).
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if failing an internal reallocation.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
|
||||
|
@ -130,7 +137,8 @@ hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t
|
|||
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
|
||||
if (node->os_index < maxnode
|
||||
&& (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8)))))
|
||||
hwloc_bitmap_or(cpuset, cpuset, node->cpuset);
|
||||
if (hwloc_bitmap_or(cpuset, cpuset, node->cpuset) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -142,6 +150,9 @@ hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t
|
|||
* This function may be used after calling get_mempolicy or any other function
|
||||
* that takes an array of unsigned long as output parameter (and possibly
|
||||
* a maximal node number as input parameter).
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset_t nodeset,
|
||||
|
@ -153,7 +164,8 @@ hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset
|
|||
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
|
||||
if (node->os_index < maxnode
|
||||
&& (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8)))))
|
||||
hwloc_bitmap_set(nodeset, node->os_index);
|
||||
if (hwloc_bitmap_set(nodeset, node->os_index) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -184,7 +196,7 @@ hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset
|
|||
* This function may be used before calling many numa_ functions
|
||||
* that use a struct bitmask as an input parameter.
|
||||
*
|
||||
* \return newly allocated struct bitmask.
|
||||
* \return newly allocated struct bitmask, or \c NULL on error.
|
||||
*/
|
||||
static __hwloc_inline struct bitmask *
|
||||
hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset) __hwloc_attribute_malloc;
|
||||
|
@ -209,7 +221,7 @@ hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpu
|
|||
* This function may be used before calling many numa_ functions
|
||||
* that use a struct bitmask as an input parameter.
|
||||
*
|
||||
* \return newly allocated struct bitmask.
|
||||
* \return newly allocated struct bitmask, or \c NULL on error.
|
||||
*/
|
||||
static __hwloc_inline struct bitmask *
|
||||
hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset) __hwloc_attribute_malloc;
|
||||
|
@ -231,6 +243,9 @@ hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_no
|
|||
*
|
||||
* This function may be used after calling many numa_ functions
|
||||
* that use a struct bitmask as an output parameter.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
|
||||
|
@ -241,7 +256,8 @@ hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_
|
|||
hwloc_bitmap_zero(cpuset);
|
||||
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
|
||||
if (numa_bitmask_isbitset(bitmask, node->os_index))
|
||||
hwloc_bitmap_or(cpuset, cpuset, node->cpuset);
|
||||
if (hwloc_bitmap_or(cpuset, cpuset, node->cpuset) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -249,6 +265,9 @@ hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_
|
|||
*
|
||||
* This function may be used after calling many numa_ functions
|
||||
* that use a struct bitmask as an output parameter.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodeset_t nodeset,
|
||||
|
@ -259,7 +278,8 @@ hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodese
|
|||
hwloc_bitmap_zero(nodeset);
|
||||
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
|
||||
if (numa_bitmask_isbitset(bitmask, node->os_index))
|
||||
hwloc_bitmap_set(nodeset, node->os_index);
|
||||
if (hwloc_bitmap_set(nodeset, node->os_index) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
10
src/3rdparty/hwloc/include/hwloc/linux.h
vendored
10
src/3rdparty/hwloc/include/hwloc/linux.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2021 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011 Université Bordeaux
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
@ -38,6 +38,8 @@ extern "C" {
|
|||
* The behavior is exactly the same as the Linux sched_setaffinity system call,
|
||||
* but uses a hwloc cpuset.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note This is equivalent to calling hwloc_set_proc_cpubind() with
|
||||
* HWLOC_CPUBIND_THREAD as flags.
|
||||
*/
|
||||
|
@ -52,6 +54,8 @@ HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t
|
|||
* The behavior is exactly the same as the Linux sched_getaffinity system call,
|
||||
* but uses a hwloc cpuset.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note This is equivalent to calling hwloc_get_proc_cpubind() with
|
||||
* ::HWLOC_CPUBIND_THREAD as flags.
|
||||
*/
|
||||
|
@ -62,6 +66,8 @@ HWLOC_DECLSPEC int hwloc_linux_get_tid_cpubind(hwloc_topology_t topology, pid_t
|
|||
* The CPU-set \p set (previously allocated by the caller)
|
||||
* is filled with the PU which the thread last ran on.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note This is equivalent to calling hwloc_get_proc_last_cpu_location() with
|
||||
* ::HWLOC_CPUBIND_THREAD as flags.
|
||||
*/
|
||||
|
@ -72,6 +78,8 @@ HWLOC_DECLSPEC int hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topolo
|
|||
* Might be used when reading CPU set from sysfs attributes such as topology
|
||||
* and caches for processors, or local_cpus for devices.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note This function ignores the HWLOC_FSROOT environment variable.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_linux_read_path_as_cpumask(const char *path, hwloc_bitmap_t set);
|
||||
|
|
44
src/3rdparty/hwloc/include/hwloc/memattrs.h
vendored
44
src/3rdparty/hwloc/include/hwloc/memattrs.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2019-2022 Inria. All rights reserved.
|
||||
* Copyright © 2019-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -54,6 +54,10 @@ extern "C" {
|
|||
* Attribute values for these nodes, if any, may then be obtained with
|
||||
* hwloc_memattr_get_value() and manually compared with the desired criteria.
|
||||
*
|
||||
* Memory attributes are also used internally to build Memory Tiers which provide
|
||||
* an easy way to distinguish NUMA nodes of different kinds, as explained
|
||||
* in \ref heteromem.
|
||||
*
|
||||
* \sa An example is available in doc/examples/memory-attributes.c in the source tree.
|
||||
*
|
||||
* \note The API also supports specific objects as initiator,
|
||||
|
@ -178,6 +182,9 @@ enum hwloc_memattr_id_e {
|
|||
typedef unsigned hwloc_memattr_id_t;
|
||||
|
||||
/** \brief Return the identifier of the memory attribute with the given name.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if no such attribute exists.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_by_name(hwloc_topology_t topology,
|
||||
|
@ -247,6 +254,8 @@ enum hwloc_local_numanode_flag_e {
|
|||
* or the number of nodes that would have been stored if there were
|
||||
* enough room.
|
||||
*
|
||||
* \return 0 on success or -1 on error.
|
||||
*
|
||||
* \note Some of these NUMA nodes may not have any memory attribute
|
||||
* values and hence not be reported as actual targets in other functions.
|
||||
*
|
||||
|
@ -276,6 +285,10 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
|
|||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance with errno set to \c EINVAL if flags
|
||||
* are invalid or no such attribute exists.
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when refering to accesses performed by CPU cores.
|
||||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
|
||||
|
@ -307,7 +320,10 @@ hwloc_memattr_get_value(hwloc_topology_t topology,
|
|||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* If there are no matching targets, \c -1 is returned with \p errno set to \c ENOENT;
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOENT if there are no matching targets.
|
||||
* \return -1 with errno set to \c EINVAL if flags are invalid,
|
||||
* or no such attribute exists.
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when refering to accesses performed by CPU cores.
|
||||
|
@ -323,10 +339,6 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology,
|
|||
hwloc_obj_t *best_target, hwloc_uint64_t *value);
|
||||
|
||||
/** \brief Return the best initiator for the given attribute and target NUMA node.
|
||||
*
|
||||
* If the attribute does not relate to a specific initiator
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
|
||||
* \c -1 is returned and \p errno is set to \c EINVAL.
|
||||
*
|
||||
* If \p value is non \c NULL, the corresponding value is returned there.
|
||||
*
|
||||
|
@ -342,7 +354,10 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology,
|
|||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* If there are no matching initiators, \c -1 is returned with \p errno set to \c ENOENT;
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOENT if there are no matching initiators.
|
||||
* \return -1 with errno set to \c EINVAL if the attribute does not relate to a specific initiator
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR).
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_best_initiator(hwloc_topology_t topology,
|
||||
|
@ -359,6 +374,9 @@ hwloc_memattr_get_best_initiator(hwloc_topology_t topology,
|
|||
*/
|
||||
|
||||
/** \brief Return the name of a memory attribute.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if the attribute does not exist.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_name(hwloc_topology_t topology,
|
||||
|
@ -368,6 +386,9 @@ hwloc_memattr_get_name(hwloc_topology_t topology,
|
|||
/** \brief Return the flags of the given attribute.
|
||||
*
|
||||
* Flags are a OR'ed set of ::hwloc_memattr_flag_e.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if the attribute does not exist.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_flags(hwloc_topology_t topology,
|
||||
|
@ -397,6 +418,9 @@ enum hwloc_memattr_flag_e {
|
|||
* Add a specific memory attribute that is not defined in ::hwloc_memattr_id_e.
|
||||
* Flags are a OR'ed set of ::hwloc_memattr_flag_e. It must contain at least
|
||||
* one of ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST or ::HWLOC_MEMATTR_FLAG_LOWER_FIRST.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EBUSY if another attribute already uses this name.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_register(hwloc_topology_t topology,
|
||||
|
@ -421,6 +445,8 @@ hwloc_memattr_register(hwloc_topology_t topology,
|
|||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
|
||||
* but users may for instance use it to provide custom information about
|
||||
* host memory accesses performed by GPUs.
|
||||
*
|
||||
* \return 0 on success or -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_set_value(hwloc_topology_t topology,
|
||||
|
@ -460,6 +486,8 @@ hwloc_memattr_set_value(hwloc_topology_t topology,
|
|||
* NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute
|
||||
* values.
|
||||
*
|
||||
* \return 0 on success or -1 on error.
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when referring to accesses performed by CPU cores.
|
||||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
|
||||
|
@ -497,6 +525,8 @@ hwloc_memattr_get_targets(hwloc_topology_t topology,
|
|||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
|
||||
* no initiator is returned.
|
||||
*
|
||||
* \return 0 on success or -1 on error.
|
||||
*
|
||||
* \note This function is meant for tools and debugging (listing internal information)
|
||||
* rather than for application queries. Applications should rather select useful
|
||||
* NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute
|
||||
|
|
5
src/3rdparty/hwloc/include/hwloc/nvml.h
vendored
5
src/3rdparty/hwloc/include/hwloc/nvml.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2012-2021 Inria. All rights reserved.
|
||||
* Copyright © 2012-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -51,6 +51,9 @@ extern "C" {
|
|||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
|
8
src/3rdparty/hwloc/include/hwloc/opencl.h
vendored
8
src/3rdparty/hwloc/include/hwloc/opencl.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2012-2021 Inria. All rights reserved.
|
||||
* Copyright © 2012-2023 Inria. All rights reserved.
|
||||
* Copyright © 2013, 2018 Université Bordeaux. All right reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
@ -69,6 +69,9 @@ typedef union {
|
|||
/** \brief Return the domain, bus and device IDs of the OpenCL device \p device.
|
||||
*
|
||||
* Device \p device must match the local machine.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_opencl_get_device_pci_busid(cl_device_id device,
|
||||
|
@ -126,6 +129,9 @@ hwloc_opencl_get_device_pci_busid(cl_device_id device,
|
|||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux with the AMD or NVIDIA OpenCL implementation; other systems will simply
|
||||
* get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if the device could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2021 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -57,6 +57,9 @@ extern "C" {
|
|||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
|
2
src/3rdparty/hwloc/include/hwloc/plugins.h
vendored
2
src/3rdparty/hwloc/include/hwloc/plugins.h
vendored
|
@ -164,7 +164,7 @@ struct hwloc_disc_status {
|
|||
*/
|
||||
unsigned excluded_phases;
|
||||
|
||||
/** \brief OR'ed set of hwloc_disc_status_flag_e */
|
||||
/** \brief OR'ed set of ::hwloc_disc_status_flag_e */
|
||||
unsigned long flags;
|
||||
};
|
||||
|
||||
|
|
1
src/3rdparty/hwloc/include/hwloc/rename.h
vendored
1
src/3rdparty/hwloc/include/hwloc/rename.h
vendored
|
@ -176,6 +176,7 @@ extern "C" {
|
|||
|
||||
#define hwloc_topology_insert_misc_object HWLOC_NAME(topology_insert_misc_object)
|
||||
#define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object)
|
||||
#define hwloc_topology_free_group_object HWLOC_NAME(topology_free_group_object)
|
||||
#define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object)
|
||||
#define hwloc_obj_add_other_obj_sets HWLOC_NAME(obj_add_other_obj_sets)
|
||||
#define hwloc_topology_refresh HWLOC_NAME(topology_refresh)
|
||||
|
|
5
src/3rdparty/hwloc/include/hwloc/rsmi.h
vendored
5
src/3rdparty/hwloc/include/hwloc/rsmi.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2012-2021 Inria. All rights reserved.
|
||||
* Copyright © 2012-2023 Inria. All rights reserved.
|
||||
* Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Written by Advanced Micro Devices,
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -55,6 +55,9 @@ extern "C" {
|
|||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_rsmi_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
|
|
17
src/3rdparty/hwloc/include/hwloc/shmem.h
vendored
17
src/3rdparty/hwloc/include/hwloc/shmem.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2013-2018 Inria. All rights reserved.
|
||||
* Copyright © 2013-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -48,6 +48,8 @@ extern "C" {
|
|||
* This length (in bytes) must be used in hwloc_shmem_topology_write()
|
||||
* and hwloc_shmem_topology_adopt() later.
|
||||
*
|
||||
* \return the length, or -1 on error, for instance if flags are invalid.
|
||||
*
|
||||
* \note Flags \p flags are currently unused, must be 0.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_shmem_topology_get_length(hwloc_topology_t topology,
|
||||
|
@ -74,9 +76,10 @@ HWLOC_DECLSPEC int hwloc_shmem_topology_get_length(hwloc_topology_t topology,
|
|||
* is not. However the caller may also allocate it manually in shared memory
|
||||
* to share it as well.
|
||||
*
|
||||
* \return -1 with errno set to EBUSY if the virtual memory mapping defined
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EBUSY if the virtual memory mapping defined
|
||||
* by \p mmap_address and \p length isn't available in the process.
|
||||
* \return -1 with errno set to EINVAL if \p fileoffset, \p mmap_address
|
||||
* \return -1 with errno set to \c EINVAL if \p fileoffset, \p mmap_address
|
||||
* or \p length aren't page-aligned.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_shmem_topology_write(hwloc_topology_t topology,
|
||||
|
@ -112,14 +115,16 @@ HWLOC_DECLSPEC int hwloc_shmem_topology_write(hwloc_topology_t topology,
|
|||
*
|
||||
* \note This function takes care of calling hwloc_topology_abi_check().
|
||||
*
|
||||
* \return -1 with errno set to EBUSY if the virtual memory mapping defined
|
||||
* \return 0 on success.
|
||||
*
|
||||
* \return -1 with errno set to \c EBUSY if the virtual memory mapping defined
|
||||
* by \p mmap_address and \p length isn't available in the process.
|
||||
*
|
||||
* \return -1 with errno set to EINVAL if \p fileoffset, \p mmap_address
|
||||
* \return -1 with errno set to \c EINVAL if \p fileoffset, \p mmap_address
|
||||
* or \p length aren't page-aligned, or do not match what was given to
|
||||
* hwloc_shmem_topology_write() earlier.
|
||||
*
|
||||
* \return -1 with errno set to EINVAL if the layout of the topology structure
|
||||
* \return -1 with errno set to \c EINVAL if the layout of the topology structure
|
||||
* is different between the writer process and the adopter process.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
|
||||
|
|
578
src/3rdparty/hwloc/include/private/netloc.h
vendored
578
src/3rdparty/hwloc/include/private/netloc.h
vendored
|
@ -1,578 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright © 2013-2014 University of Wisconsin-La Crosse.
|
||||
* All rights reserved.
|
||||
* Copyright © 2015-2017 Inria. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
* See COPYING in top-level directory.
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _NETLOC_PRIVATE_H_
|
||||
#define _NETLOC_PRIVATE_H_
|
||||
|
||||
#include <hwloc.h>
|
||||
#include <netloc.h>
|
||||
#include <netloc/uthash.h>
|
||||
#include <netloc/utarray.h>
|
||||
#include <private/autogen/config.h>
|
||||
|
||||
#define NETLOCFILE_VERSION 1
|
||||
|
||||
#ifdef NETLOC_SCOTCH
|
||||
#include <stdint.h>
|
||||
#include <scotch.h>
|
||||
#define NETLOC_int SCOTCH_Num
|
||||
#else
|
||||
#define NETLOC_int int
|
||||
#endif
|
||||
|
||||
/*
|
||||
* "Import" a few things from hwloc
|
||||
*/
|
||||
#define __netloc_attribute_unused __hwloc_attribute_unused
|
||||
#define __netloc_attribute_malloc __hwloc_attribute_malloc
|
||||
#define __netloc_attribute_const __hwloc_attribute_const
|
||||
#define __netloc_attribute_pure __hwloc_attribute_pure
|
||||
#define __netloc_attribute_deprecated __hwloc_attribute_deprecated
|
||||
#define __netloc_attribute_may_alias __hwloc_attribute_may_alias
|
||||
#define NETLOC_DECLSPEC HWLOC_DECLSPEC
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* Types
|
||||
**********************************************************************/
|
||||
|
||||
/**
|
||||
* Definitions for Comparators
|
||||
* \sa These are the return values from the following functions:
|
||||
* netloc_network_compare, netloc_dt_edge_t_compare, netloc_dt_node_t_compare
|
||||
*/
|
||||
typedef enum {
|
||||
NETLOC_CMP_SAME = 0, /**< Compared as the Same */
|
||||
NETLOC_CMP_SIMILAR = -1, /**< Compared as Similar, but not the Same */
|
||||
NETLOC_CMP_DIFF = -2 /**< Compared as Different */
|
||||
} netloc_compare_type_t;
|
||||
|
||||
/**
|
||||
* Enumerated type for the various types of supported networks
|
||||
*/
|
||||
typedef enum {
|
||||
NETLOC_NETWORK_TYPE_ETHERNET = 1, /**< Ethernet network */
|
||||
NETLOC_NETWORK_TYPE_INFINIBAND = 2, /**< InfiniBand network */
|
||||
NETLOC_NETWORK_TYPE_INVALID = 3 /**< Invalid network */
|
||||
} netloc_network_type_t;
|
||||
|
||||
/**
|
||||
* Enumerated type for the various types of supported topologies
|
||||
*/
|
||||
typedef enum {
|
||||
NETLOC_TOPOLOGY_TYPE_INVALID = -1, /**< Invalid */
|
||||
NETLOC_TOPOLOGY_TYPE_TREE = 1, /**< Tree */
|
||||
} netloc_topology_type_t;
|
||||
|
||||
/**
|
||||
* Enumerated type for the various types of nodes
|
||||
*/
|
||||
typedef enum {
|
||||
NETLOC_NODE_TYPE_HOST = 0, /**< Host (a.k.a., network addressable endpoint - e.g., MAC Address) node */
|
||||
NETLOC_NODE_TYPE_SWITCH = 1, /**< Switch node */
|
||||
NETLOC_NODE_TYPE_INVALID = 2 /**< Invalid node */
|
||||
} netloc_node_type_t;
|
||||
|
||||
typedef enum {
|
||||
NETLOC_ARCH_TREE = 0, /* Fat tree */
|
||||
} netloc_arch_type_t;
|
||||
|
||||
|
||||
/* Pre declarations to avoid inter dependency problems */
|
||||
/** \cond IGNORE */
|
||||
struct netloc_topology_t;
|
||||
typedef struct netloc_topology_t netloc_topology_t;
|
||||
struct netloc_node_t;
|
||||
typedef struct netloc_node_t netloc_node_t;
|
||||
struct netloc_edge_t;
|
||||
typedef struct netloc_edge_t netloc_edge_t;
|
||||
struct netloc_physical_link_t;
|
||||
typedef struct netloc_physical_link_t netloc_physical_link_t;
|
||||
struct netloc_path_t;
|
||||
typedef struct netloc_path_t netloc_path_t;
|
||||
|
||||
struct netloc_arch_tree_t;
|
||||
typedef struct netloc_arch_tree_t netloc_arch_tree_t;
|
||||
struct netloc_arch_node_t;
|
||||
typedef struct netloc_arch_node_t netloc_arch_node_t;
|
||||
struct netloc_arch_node_slot_t;
|
||||
typedef struct netloc_arch_node_slot_t netloc_arch_node_slot_t;
|
||||
struct netloc_arch_t;
|
||||
typedef struct netloc_arch_t netloc_arch_t;
|
||||
/** \endcond */
|
||||
|
||||
/**
|
||||
* \struct netloc_topology_t
|
||||
* \brief Netloc Topology Context
|
||||
*
|
||||
* An opaque data structure used to reference a network topology.
|
||||
*
|
||||
* \note Must be initialized with \ref netloc_topology_construct()
|
||||
*/
|
||||
struct netloc_topology_t {
|
||||
/** Topology path */
|
||||
char *topopath;
|
||||
/** Subnet ID */
|
||||
char *subnet_id;
|
||||
|
||||
/** Node List */
|
||||
netloc_node_t *nodes; /* Hash table of nodes by physical_id */
|
||||
netloc_node_t *nodesByHostname; /* Hash table of nodes by hostname */
|
||||
|
||||
netloc_physical_link_t *physical_links; /* Hash table with physcial links */
|
||||
|
||||
/** Partition List */
|
||||
UT_array *partitions;
|
||||
|
||||
/** Hwloc topology List */
|
||||
char *hwlocpath;
|
||||
UT_array *topos;
|
||||
hwloc_topology_t *hwloc_topos;
|
||||
|
||||
/** Type of the graph */
|
||||
netloc_topology_type_t type;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Netloc Node Type
|
||||
*
|
||||
* Represents the concept of a node (a.k.a., vertex, endpoint) within a network
|
||||
* graph. This could be a server or a network switch. The \ref node_type parameter
|
||||
* will distinguish the exact type of node this represents in the graph.
|
||||
*/
|
||||
struct netloc_node_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable with physical_id */
|
||||
UT_hash_handle hh2; /* makes this structure hashable with hostname */
|
||||
|
||||
/** Physical ID of the node */
|
||||
char physical_id[20];
|
||||
|
||||
/** Logical ID of the node (if any) */
|
||||
int logical_id;
|
||||
|
||||
/** Type of the node */
|
||||
netloc_node_type_t type;
|
||||
|
||||
/* Pointer to physical_links */
|
||||
UT_array *physical_links;
|
||||
|
||||
/** Description information from discovery (if any) */
|
||||
char *description;
|
||||
|
||||
/**
|
||||
* Application-given private data pointer.
|
||||
* Initialized to NULL, and not used by the netloc library.
|
||||
*/
|
||||
void * userdata;
|
||||
|
||||
/** Outgoing edges from this node */
|
||||
netloc_edge_t *edges;
|
||||
|
||||
UT_array *subnodes; /* the group of nodes for the virtual nodes */
|
||||
|
||||
netloc_path_t *paths;
|
||||
|
||||
char *hostname;
|
||||
|
||||
UT_array *partitions; /* index in the list from the topology */
|
||||
|
||||
hwloc_topology_t hwlocTopo;
|
||||
int hwlocTopoIdx;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Netloc Edge Type
|
||||
*
|
||||
* Represents the concept of a directed edge within a network graph.
|
||||
*
|
||||
* \note We do not point to the netloc_node_t structure directly to
|
||||
* simplify the representation, and allow the information to more easily
|
||||
* be entered into the data store without circular references.
|
||||
* \todo JJH Is the note above still true?
|
||||
*/
|
||||
struct netloc_edge_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable */
|
||||
|
||||
netloc_node_t *dest;
|
||||
|
||||
int id;
|
||||
|
||||
/** Pointers to the parent node */
|
||||
netloc_node_t *node;
|
||||
|
||||
/* Pointer to physical_links */
|
||||
UT_array *physical_links;
|
||||
|
||||
/** total gbits of the links */
|
||||
float total_gbits;
|
||||
|
||||
UT_array *partitions; /* index in the list from the topology */
|
||||
|
||||
UT_array *subnode_edges; /* for edges going to virtual nodes */
|
||||
|
||||
struct netloc_edge_t *other_way;
|
||||
|
||||
/**
|
||||
* Application-given private data pointer.
|
||||
* Initialized to NULL, and not used by the netloc library.
|
||||
*/
|
||||
void * userdata;
|
||||
};
|
||||
|
||||
|
||||
struct netloc_physical_link_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable */
|
||||
|
||||
int id; // TODO long long
|
||||
netloc_node_t *src;
|
||||
netloc_node_t *dest;
|
||||
int ports[2];
|
||||
char *width;
|
||||
char *speed;
|
||||
|
||||
netloc_edge_t *edge;
|
||||
|
||||
int other_way_id;
|
||||
struct netloc_physical_link_t *other_way;
|
||||
|
||||
UT_array *partitions; /* index in the list from the topology */
|
||||
|
||||
/** gbits of the link from speed and width */
|
||||
float gbits;
|
||||
|
||||
/** Description information from discovery (if any) */
|
||||
char *description;
|
||||
};
|
||||
|
||||
struct netloc_path_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable */
|
||||
char dest_id[20];
|
||||
UT_array *links;
|
||||
};
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* Architecture structures
|
||||
**********************************************************************/
|
||||
struct netloc_arch_tree_t {
|
||||
NETLOC_int num_levels;
|
||||
NETLOC_int *degrees;
|
||||
NETLOC_int *cost;
|
||||
};
|
||||
|
||||
struct netloc_arch_node_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable */
|
||||
char *name; /* Hash key */
|
||||
netloc_node_t *node; /* Corresponding node */
|
||||
int idx_in_topo; /* idx with ghost hosts to have complete topo */
|
||||
int num_slots; /* it is not the real number of slots but the maximum slot idx */
|
||||
int *slot_idx; /* corresponding idx in slot_tree */
|
||||
int *slot_os_idx; /* corresponding os index for each leaf in tree */
|
||||
netloc_arch_tree_t *slot_tree; /* Tree built from hwloc */
|
||||
int num_current_slots; /* Number of PUs */
|
||||
NETLOC_int *current_slots; /* indices in the complete tree */
|
||||
int *slot_ranks; /* corresponding MPI rank for each leaf in tree */
|
||||
};
|
||||
|
||||
struct netloc_arch_node_slot_t {
|
||||
netloc_arch_node_t *node;
|
||||
int slot;
|
||||
};
|
||||
|
||||
struct netloc_arch_t {
|
||||
netloc_topology_t *topology;
|
||||
int has_slots; /* if slots are included in the architecture */
|
||||
netloc_arch_type_t type;
|
||||
union {
|
||||
netloc_arch_tree_t *node_tree;
|
||||
netloc_arch_tree_t *global_tree;
|
||||
} arch;
|
||||
netloc_arch_node_t *nodes_by_name;
|
||||
netloc_arch_node_slot_t *node_slot_by_idx; /* node_slot by index in complete topo */
|
||||
NETLOC_int num_current_hosts; /* if has_slots, host is a slot, else host is a node */
|
||||
NETLOC_int *current_hosts; /* indices in the complete topology */
|
||||
};
|
||||
|
||||
/**********************************************************************
|
||||
* Topology Functions
|
||||
**********************************************************************/
|
||||
/**
|
||||
* Allocate a topology handle.
|
||||
*
|
||||
* User is responsible for calling \ref netloc_detach on the topology handle.
|
||||
* The network parameter information is deep copied into the topology handle, so the
|
||||
* user may destruct the network handle after calling this function and/or reuse
|
||||
* the network handle.
|
||||
*
|
||||
* \returns NETLOC_SUCCESS on success
|
||||
* \returns NETLOC_ERROR upon an error.
|
||||
*/
|
||||
netloc_topology_t *netloc_topology_construct(char *path);
|
||||
|
||||
/**
|
||||
* Destruct a topology handle
|
||||
*
|
||||
* \param topology A valid pointer to a \ref netloc_topology_t handle created
|
||||
* from a prior call to \ref netloc_topology_construct.
|
||||
*
|
||||
* \returns NETLOC_SUCCESS on success
|
||||
* \returns NETLOC_ERROR upon an error.
|
||||
*/
|
||||
int netloc_topology_destruct(netloc_topology_t *topology);
|
||||
|
||||
int netloc_topology_find_partition_idx(netloc_topology_t *topology, char *partition_name);
|
||||
|
||||
int netloc_topology_read_hwloc(netloc_topology_t *topology, int num_nodes,
|
||||
netloc_node_t **node_list);
|
||||
|
||||
#define netloc_topology_iter_partitions(topology,partition) \
|
||||
for ((partition) = (char **)utarray_front(topology->partitions); \
|
||||
(partition) != NULL; \
|
||||
(partition) = (char **)utarray_next(topology->partitions, partition))
|
||||
|
||||
#define netloc_topology_iter_hwloctopos(topology,hwloctopo) \
|
||||
for ((hwloctopo) = (char **)utarray_front(topology->topos); \
|
||||
(hwloctopo) != NULL; \
|
||||
(hwloctopo) = (char **)utarray_next(topology->topos, hwloctopo))
|
||||
|
||||
#define netloc_topology_find_node(topology,node_id,node) \
|
||||
HASH_FIND_STR(topology->nodes, node_id, node)
|
||||
|
||||
#define netloc_topology_iter_nodes(topology,node,_tmp) \
|
||||
HASH_ITER(hh, topology->nodes, node, _tmp)
|
||||
|
||||
#define netloc_topology_num_nodes(topology) \
|
||||
HASH_COUNT(topology->nodes)
|
||||
|
||||
/*************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for netloc_node_t
|
||||
*
|
||||
* User is responsible for calling the destructor on the handle.
|
||||
*
|
||||
* Returns
|
||||
* A newly allocated pointer to the network information.
|
||||
*/
|
||||
netloc_node_t *netloc_node_construct(void);
|
||||
|
||||
/**
|
||||
* Destructor for netloc_node_t
|
||||
*
|
||||
* \param node A valid node handle
|
||||
*
|
||||
* Returns
|
||||
* NETLOC_SUCCESS on success
|
||||
* NETLOC_ERROR on error
|
||||
*/
|
||||
int netloc_node_destruct(netloc_node_t *node);
|
||||
|
||||
char *netloc_node_pretty_print(netloc_node_t* node);
|
||||
|
||||
#define netloc_node_get_num_subnodes(node) \
|
||||
utarray_len((node)->subnodes)
|
||||
|
||||
#define netloc_node_get_subnode(node,i) \
|
||||
(*(netloc_node_t **)utarray_eltptr((node)->subnodes, (i)))
|
||||
|
||||
#define netloc_node_get_num_edges(node) \
|
||||
utarray_len((node)->edges)
|
||||
|
||||
#define netloc_node_get_edge(node,i) \
|
||||
(*(netloc_edge_t **)utarray_eltptr((node)->edges, (i)))
|
||||
|
||||
#define netloc_node_iter_edges(node,edge,_tmp) \
|
||||
HASH_ITER(hh, node->edges, edge, _tmp)
|
||||
|
||||
#define netloc_node_iter_paths(node,path,_tmp) \
|
||||
HASH_ITER(hh, node->paths, path, _tmp)
|
||||
|
||||
#define netloc_node_is_host(node) \
|
||||
(node->type == NETLOC_NODE_TYPE_HOST)
|
||||
|
||||
#define netloc_node_is_switch(node) \
|
||||
(node->type == NETLOC_NODE_TYPE_SWITCH)
|
||||
|
||||
#define netloc_node_iter_paths(node, path,_tmp) \
|
||||
HASH_ITER(hh, node->paths, path, _tmp)
|
||||
|
||||
int netloc_node_is_in_partition(netloc_node_t *node, int partition);
|
||||
|
||||
/*************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for netloc_edge_t
|
||||
*
|
||||
* User is responsible for calling the destructor on the handle.
|
||||
*
|
||||
* Returns
|
||||
* A newly allocated pointer to the edge information.
|
||||
*/
|
||||
netloc_edge_t *netloc_edge_construct(void);
|
||||
|
||||
/**
|
||||
* Destructor for netloc_edge_t
|
||||
*
|
||||
* \param edge A valid edge handle
|
||||
*
|
||||
* Returns
|
||||
* NETLOC_SUCCESS on success
|
||||
* NETLOC_ERROR on error
|
||||
*/
|
||||
int netloc_edge_destruct(netloc_edge_t *edge);
|
||||
|
||||
char * netloc_edge_pretty_print(netloc_edge_t* edge);
|
||||
|
||||
void netloc_edge_reset_uid(void);
|
||||
|
||||
int netloc_edge_is_in_partition(netloc_edge_t *edge, int partition);
|
||||
|
||||
#define netloc_edge_get_num_links(edge) \
|
||||
utarray_len((edge)->physical_links)
|
||||
|
||||
#define netloc_edge_get_link(edge,i) \
|
||||
(*(netloc_physical_link_t **)utarray_eltptr((edge)->physical_links, (i)))
|
||||
|
||||
#define netloc_edge_get_num_subedges(edge) \
|
||||
utarray_len((edge)->subnode_edges)
|
||||
|
||||
#define netloc_edge_get_subedge(edge,i) \
|
||||
(*(netloc_edge_t **)utarray_eltptr((edge)->subnode_edges, (i)))
|
||||
|
||||
/*************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for netloc_physical_link_t
|
||||
*
|
||||
* User is responsible for calling the destructor on the handle.
|
||||
*
|
||||
* Returns
|
||||
* A newly allocated pointer to the physical link information.
|
||||
*/
|
||||
netloc_physical_link_t * netloc_physical_link_construct(void);
|
||||
|
||||
/**
|
||||
* Destructor for netloc_physical_link_t
|
||||
*
|
||||
* Returns
|
||||
* NETLOC_SUCCESS on success
|
||||
* NETLOC_ERROR on error
|
||||
*/
|
||||
int netloc_physical_link_destruct(netloc_physical_link_t *link);
|
||||
|
||||
char * netloc_link_pretty_print(netloc_physical_link_t* link);
|
||||
|
||||
/*************************************************/
|
||||
|
||||
|
||||
netloc_path_t *netloc_path_construct(void);
|
||||
int netloc_path_destruct(netloc_path_t *path);
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* Architecture functions
|
||||
**********************************************************************/
|
||||
|
||||
netloc_arch_t * netloc_arch_construct(void);
|
||||
|
||||
int netloc_arch_destruct(netloc_arch_t *arch);
|
||||
|
||||
int netloc_arch_build(netloc_arch_t *arch, int add_slots);
|
||||
|
||||
int netloc_arch_set_current_resources(netloc_arch_t *arch);
|
||||
|
||||
int netloc_arch_set_global_resources(netloc_arch_t *arch);
|
||||
|
||||
int netloc_arch_node_get_hwloc_info(netloc_arch_node_t *arch);
|
||||
|
||||
void netloc_arch_tree_complete(netloc_arch_tree_t *tree, UT_array **down_degrees_by_level,
|
||||
int num_hosts, int **parch_idx);
|
||||
|
||||
NETLOC_int netloc_arch_tree_num_leaves(netloc_arch_tree_t *tree);
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* Access functions of various elements of the topology
|
||||
**********************************************************************/
|
||||
|
||||
#define netloc_get_num_partitions(object) \
|
||||
utarray_len((object)->partitions)
|
||||
|
||||
#define netloc_get_partition(object,i) \
|
||||
(*(int *)utarray_eltptr((object)->partitions, (i)))
|
||||
|
||||
|
||||
#define netloc_path_iter_links(path,link) \
|
||||
for ((link) = (netloc_physical_link_t **)utarray_front(path->links); \
|
||||
(link) != NULL; \
|
||||
(link) = (netloc_physical_link_t **)utarray_next(path->links, link))
|
||||
|
||||
/**********************************************************************
|
||||
* Misc functions
|
||||
**********************************************************************/
|
||||
|
||||
/**
|
||||
* Decode the network type
|
||||
*
|
||||
* \param net_type A valid member of the \ref netloc_network_type_t type
|
||||
*
|
||||
* \returns NULL if the type is invalid
|
||||
* \returns A string for that \ref netloc_network_type_t type
|
||||
*/
|
||||
static inline const char * netloc_network_type_decode(netloc_network_type_t net_type) {
|
||||
if( NETLOC_NETWORK_TYPE_ETHERNET == net_type ) {
|
||||
return "ETH";
|
||||
}
|
||||
else if( NETLOC_NETWORK_TYPE_INFINIBAND == net_type ) {
|
||||
return "IB";
|
||||
}
|
||||
else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the node type
|
||||
*
|
||||
* \param node_type A valid member of the \ref netloc_node_type_t type
|
||||
*
|
||||
* \returns NULL if the type is invalid
|
||||
* \returns A string for that \ref netloc_node_type_t type
|
||||
*/
|
||||
static inline const char * netloc_node_type_decode(netloc_node_type_t node_type) {
|
||||
if( NETLOC_NODE_TYPE_SWITCH == node_type ) {
|
||||
return "SW";
|
||||
}
|
||||
else if( NETLOC_NODE_TYPE_HOST == node_type ) {
|
||||
return "CA";
|
||||
}
|
||||
else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
ssize_t netloc_line_get(char **lineptr, size_t *n, FILE *stream);
|
||||
|
||||
char *netloc_line_get_next_token(char **string, char c);
|
||||
|
||||
int netloc_build_comm_mat(char *filename, int *pn, double ***pmat);
|
||||
|
||||
#define STRDUP_IF_NOT_NULL(str) (NULL == str ? NULL : strdup(str))
|
||||
#define STR_EMPTY_IF_NULL(str) (NULL == str ? "" : str)
|
||||
|
||||
|
||||
#endif // _NETLOC_PRIVATE_H_
|
11
src/3rdparty/hwloc/include/private/private.h
vendored
11
src/3rdparty/hwloc/include/private/private.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012, 2020 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
*
|
||||
|
@ -245,6 +245,12 @@ struct hwloc_topology {
|
|||
* temporary variables during discovery
|
||||
*/
|
||||
|
||||
/* set to 1 at the beginning of load() if the filter of any cpu cache type (L1 to L3i) is not NONE,
|
||||
* may be checked by backends before querying caches
|
||||
* (when they don't know the level of caches they are querying).
|
||||
*/
|
||||
int want_some_cpu_caches;
|
||||
|
||||
/* machine-wide memory.
|
||||
* temporarily stored there by OSes that only provide this without NUMA information,
|
||||
* and actually used later by the core.
|
||||
|
@ -420,7 +426,7 @@ extern void hwloc_internal_memattrs_need_refresh(hwloc_topology_t topology);
|
|||
extern void hwloc_internal_memattrs_refresh(hwloc_topology_t topology);
|
||||
extern int hwloc_internal_memattrs_dup(hwloc_topology_t new, hwloc_topology_t old);
|
||||
extern int hwloc_internal_memattr_set_value(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_type_t target_type, hwloc_uint64_t target_gp_index, unsigned target_os_index, struct hwloc_internal_location_s *initiator, hwloc_uint64_t value);
|
||||
extern int hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology);
|
||||
extern int hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology, int force_subtype);
|
||||
|
||||
extern void hwloc_internal_cpukinds_init(hwloc_topology_t topology);
|
||||
extern int hwloc_internal_cpukinds_rank(hwloc_topology_t topology);
|
||||
|
@ -477,6 +483,7 @@ extern char * hwloc_progname(struct hwloc_topology *topology);
|
|||
#define HWLOC_GROUP_KIND_INTEL_DIE 104 /* no subkind */
|
||||
#define HWLOC_GROUP_KIND_S390_BOOK 110 /* subkind 0 is book, subkind 1 is drawer (group of books) */
|
||||
#define HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT 120 /* no subkind */
|
||||
#define HWLOC_GROUP_KIND_AMD_COMPLEX 121 /* no subkind */
|
||||
/* then, OS-specific groups */
|
||||
#define HWLOC_GROUP_KIND_SOLARIS_PG_HW_PERF 200 /* subkind is group width */
|
||||
#define HWLOC_GROUP_KIND_AIX_SDL_UNKNOWN 210 /* subkind is SDL level */
|
||||
|
|
8
src/3rdparty/hwloc/include/private/xml.h
vendored
8
src/3rdparty/hwloc/include/private/xml.h
vendored
|
@ -19,13 +19,14 @@ HWLOC_DECLSPEC int hwloc__xml_verbose(void);
|
|||
typedef struct hwloc__xml_import_state_s {
|
||||
struct hwloc__xml_import_state_s *parent;
|
||||
|
||||
/* globals shared because the entire stack of states during import */
|
||||
/* globals shared between the entire stack of states during import */
|
||||
struct hwloc_xml_backend_data_s *global;
|
||||
|
||||
/* opaque data used to store backend-specific data.
|
||||
* statically allocated to allow stack-allocation by the common code without knowing actual backend needs.
|
||||
* libxml is 3 ptrs. nolibxml is 3 ptr + one int.
|
||||
*/
|
||||
char data[32];
|
||||
char data[4 * SIZEOF_VOID_P];
|
||||
} * hwloc__xml_import_state_t;
|
||||
|
||||
struct hwloc__xml_imported_v1distances_s {
|
||||
|
@ -74,8 +75,9 @@ typedef struct hwloc__xml_export_state_s {
|
|||
|
||||
/* opaque data used to store backend-specific data.
|
||||
* statically allocated to allow stack-allocation by the common code without knowing actual backend needs.
|
||||
* libxml is 1 ptr. nolibxml is 1 ptr + 2 size_t + 3 ints.
|
||||
*/
|
||||
char data[40];
|
||||
char data[6 * SIZEOF_VOID_P];
|
||||
} * hwloc__xml_export_state_t;
|
||||
|
||||
HWLOC_DECLSPEC void hwloc__xml_export_topology(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, unsigned long flags);
|
||||
|
|
3
src/3rdparty/hwloc/src/components.c
vendored
3
src/3rdparty/hwloc/src/components.c
vendored
|
@ -94,8 +94,7 @@ static hwloc_dlhandle hwloc_dlopenext(const char *_filename)
|
|||
{
|
||||
hwloc_dlhandle handle;
|
||||
char *filename = NULL;
|
||||
(void) asprintf(&filename, "%s.so", _filename);
|
||||
if (!filename)
|
||||
if (asprintf(&filename, "%s.so", _filename) < 0)
|
||||
return NULL;
|
||||
handle = dlopen(filename, RTLD_NOW|RTLD_LOCAL);
|
||||
free(filename);
|
||||
|
|
26
src/3rdparty/hwloc/src/diff.c
vendored
26
src/3rdparty/hwloc/src/diff.c
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2013-2022 Inria. All rights reserved.
|
||||
* Copyright © 2013-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -411,6 +411,30 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1,
|
|||
}
|
||||
}
|
||||
|
||||
if (!err) {
|
||||
/* cpukinds */
|
||||
if (topo1->nr_cpukinds != topo2->nr_cpukinds)
|
||||
goto roottoocomplex;
|
||||
for(i=0; i<topo1->nr_cpukinds; i++) {
|
||||
struct hwloc_internal_cpukind_s *ic1 = &topo1->cpukinds[i];
|
||||
struct hwloc_internal_cpukind_s *ic2 = &topo2->cpukinds[i];
|
||||
unsigned j;
|
||||
if (!hwloc_bitmap_isequal(ic1->cpuset, ic2->cpuset)
|
||||
|| ic1->efficiency != ic2->efficiency
|
||||
|| ic1->forced_efficiency != ic2->forced_efficiency
|
||||
|| ic1->ranking_value != ic2->ranking_value
|
||||
|| ic1->nr_infos != ic2->nr_infos)
|
||||
goto roottoocomplex;
|
||||
for(j=0; j<ic1->nr_infos; j++) {
|
||||
struct hwloc_info_s *info1 = &ic1->infos[j], *info2 = &ic2->infos[j];
|
||||
if (strcmp(info1->name, info2->name)
|
||||
|| strcmp(info1->value, info2->value)) {
|
||||
goto roottoocomplex;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
|
||||
roottoocomplex:
|
||||
|
|
758
src/3rdparty/hwloc/src/memattrs.c
vendored
758
src/3rdparty/hwloc/src/memattrs.c
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2020-2022 Inria. All rights reserved.
|
||||
* Copyright © 2020-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -1219,24 +1219,82 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
|
|||
* Using memattrs to identify HBM/DRAM
|
||||
*/
|
||||
|
||||
enum hwloc_memory_tier_type_e {
|
||||
/* WARNING: keep higher BW types first for compare_tiers_by_bw_and_type() when BW info is missing */
|
||||
HWLOC_MEMORY_TIER_HBM = 1UL<<0,
|
||||
HWLOC_MEMORY_TIER_DRAM = 1UL<<1,
|
||||
HWLOC_MEMORY_TIER_GPU = 1UL<<2,
|
||||
HWLOC_MEMORY_TIER_SPM = 1UL<<3, /* Specific-Purpose Memory is usually HBM, we'll use BW to confirm or force*/
|
||||
HWLOC_MEMORY_TIER_NVM = 1UL<<4,
|
||||
HWLOC_MEMORY_TIER_CXL = 1UL<<5
|
||||
};
|
||||
typedef unsigned long hwloc_memory_tier_type_t;
|
||||
#define HWLOC_MEMORY_TIER_UNKNOWN 0UL
|
||||
|
||||
static const char * hwloc_memory_tier_type_snprintf(hwloc_memory_tier_type_t type)
|
||||
{
|
||||
switch (type) {
|
||||
case HWLOC_MEMORY_TIER_DRAM: return "DRAM";
|
||||
case HWLOC_MEMORY_TIER_HBM: return "HBM";
|
||||
case HWLOC_MEMORY_TIER_GPU: return "GPUMemory";
|
||||
case HWLOC_MEMORY_TIER_SPM: return "SPM";
|
||||
case HWLOC_MEMORY_TIER_NVM: return "NVM";
|
||||
case HWLOC_MEMORY_TIER_CXL:
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_DRAM: return "CXL-DRAM";
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_HBM: return "CXL-HBM";
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_GPU: return "CXL-GPUMemory";
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_SPM: return "CXL-SPM";
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_NVM: return "CXL-NVM";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static hwloc_memory_tier_type_t hwloc_memory_tier_type_sscanf(const char *name)
|
||||
{
|
||||
if (!strcasecmp(name, "DRAM"))
|
||||
return HWLOC_MEMORY_TIER_DRAM;
|
||||
if (!strcasecmp(name, "HBM"))
|
||||
return HWLOC_MEMORY_TIER_HBM;
|
||||
if (!strcasecmp(name, "GPUMemory"))
|
||||
return HWLOC_MEMORY_TIER_GPU;
|
||||
if (!strcasecmp(name, "SPM"))
|
||||
return HWLOC_MEMORY_TIER_SPM;
|
||||
if (!strcasecmp(name, "NVM"))
|
||||
return HWLOC_MEMORY_TIER_NVM;
|
||||
if (!strcasecmp(name, "CXL-DRAM"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_DRAM;
|
||||
if (!strcasecmp(name, "CXL-HBM"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_HBM;
|
||||
if (!strcasecmp(name, "CXL-GPUMemory"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_GPU;
|
||||
if (!strcasecmp(name, "CXL-SPM"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_SPM;
|
||||
if (!strcasecmp(name, "CXL-NVM"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_NVM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* factorized tier, grouping multiple nodes */
|
||||
struct hwloc_memory_tier_s {
|
||||
hwloc_obj_t node;
|
||||
uint64_t local_bw;
|
||||
enum hwloc_memory_tier_type_e {
|
||||
/* warning the order is important for guess_memory_tiers() after qsort() */
|
||||
HWLOC_MEMORY_TIER_UNKNOWN,
|
||||
HWLOC_MEMORY_TIER_DRAM,
|
||||
HWLOC_MEMORY_TIER_HBM,
|
||||
HWLOC_MEMORY_TIER_SPM, /* Specific-Purpose Memory is usually HBM, we'll use BW to confirm */
|
||||
HWLOC_MEMORY_TIER_NVM,
|
||||
HWLOC_MEMORY_TIER_GPU,
|
||||
} type;
|
||||
hwloc_nodeset_t nodeset;
|
||||
uint64_t local_bw_min, local_bw_max;
|
||||
uint64_t local_lat_min, local_lat_max;
|
||||
hwloc_memory_tier_type_t type;
|
||||
};
|
||||
|
||||
static int compare_tiers(const void *_a, const void *_b)
|
||||
/* early tier discovery, one entry per node */
|
||||
struct hwloc_memory_node_info_s {
|
||||
hwloc_obj_t node;
|
||||
uint64_t local_bw;
|
||||
uint64_t local_lat;
|
||||
hwloc_memory_tier_type_t type;
|
||||
unsigned rank;
|
||||
};
|
||||
|
||||
static int compare_node_infos_by_type_and_bw(const void *_a, const void *_b)
|
||||
{
|
||||
const struct hwloc_memory_tier_s *a = _a, *b = _b;
|
||||
/* sort by type of tier first */
|
||||
const struct hwloc_memory_node_info_s *a = _a, *b = _b;
|
||||
/* sort by type of node first */
|
||||
if (a->type != b->type)
|
||||
return a->type - b->type;
|
||||
/* then by bandwidth */
|
||||
|
@ -1247,180 +1305,560 @@ static int compare_tiers(const void *_a, const void *_b)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology)
|
||||
static int compare_tiers_by_bw_and_type(const void *_a, const void *_b)
|
||||
{
|
||||
struct hwloc_internal_memattr_s *imattr;
|
||||
struct hwloc_memory_tier_s *tiers;
|
||||
unsigned i, j, n;
|
||||
const char *env;
|
||||
int spm_is_hbm = -1; /* -1 will guess from BW, 0 no, 1 forced */
|
||||
int mark_dram = 1;
|
||||
unsigned first_spm, first_nvm;
|
||||
hwloc_uint64_t max_unknown_bw, min_spm_bw;
|
||||
|
||||
env = getenv("HWLOC_MEMTIERS_GUESS");
|
||||
if (env) {
|
||||
if (!strcmp(env, "none")) {
|
||||
return 0;
|
||||
} else if (!strcmp(env, "default")) {
|
||||
/* nothing */
|
||||
} else if (!strcmp(env, "spm_is_hbm")) {
|
||||
hwloc_debug("Assuming SPM-tier is HBM, ignore bandwidth\n");
|
||||
spm_is_hbm = 1;
|
||||
} else if (HWLOC_SHOW_CRITICAL_ERRORS()) {
|
||||
fprintf(stderr, "hwloc: Failed to recognize HWLOC_MEMTIERS_GUESS value %s\n", env);
|
||||
}
|
||||
const struct hwloc_memory_tier_s *a = _a, *b = _b;
|
||||
/* sort by (average) BW first */
|
||||
if (a->local_bw_min && b->local_bw_min) {
|
||||
if (a->local_bw_min + a->local_bw_max > b->local_bw_min + b->local_bw_max)
|
||||
return -1;
|
||||
else if (a->local_bw_min + a->local_bw_max < b->local_bw_min + b->local_bw_max)
|
||||
return 1;
|
||||
}
|
||||
/* then by tier type */
|
||||
if (a->type != b->type)
|
||||
return a->type - b->type;
|
||||
return 0;
|
||||
}
|
||||
|
||||
imattr = &topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH];
|
||||
|
||||
if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
||||
hwloc__imattr_refresh(topology, imattr);
|
||||
static struct hwloc_memory_tier_s *
|
||||
hwloc__group_memory_tiers(hwloc_topology_t topology,
|
||||
unsigned *nr_tiers_p)
|
||||
{
|
||||
struct hwloc_internal_memattr_s *imattr_bw, *imattr_lat;
|
||||
struct hwloc_memory_node_info_s *nodeinfos;
|
||||
struct hwloc_memory_tier_s *tiers;
|
||||
unsigned nr_tiers;
|
||||
float bw_threshold = 0.1;
|
||||
float lat_threshold = 0.1;
|
||||
const char *env;
|
||||
unsigned i, j, n;
|
||||
|
||||
n = hwloc_get_nbobjs_by_depth(topology, HWLOC_TYPE_DEPTH_NUMANODE);
|
||||
assert(n);
|
||||
|
||||
tiers = malloc(n * sizeof(*tiers));
|
||||
if (!tiers)
|
||||
return -1;
|
||||
env = getenv("HWLOC_MEMTIERS_BANDWIDTH_THRESHOLD");
|
||||
if (env)
|
||||
bw_threshold = atof(env);
|
||||
|
||||
env = getenv("HWLOC_MEMTIERS_LATENCY_THRESHOLD");
|
||||
if (env)
|
||||
lat_threshold = atof(env);
|
||||
|
||||
imattr_bw = &topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH];
|
||||
imattr_lat = &topology->memattrs[HWLOC_MEMATTR_ID_LATENCY];
|
||||
|
||||
if (!(imattr_bw->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
||||
hwloc__imattr_refresh(topology, imattr_bw);
|
||||
if (!(imattr_lat->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
||||
hwloc__imattr_refresh(topology, imattr_lat);
|
||||
|
||||
nodeinfos = malloc(n * sizeof(*nodeinfos));
|
||||
if (!nodeinfos)
|
||||
return NULL;
|
||||
|
||||
for(i=0; i<n; i++) {
|
||||
hwloc_obj_t node;
|
||||
const char *daxtype;
|
||||
struct hwloc_internal_location_s iloc;
|
||||
struct hwloc_internal_memattr_target_s *imtg = NULL;
|
||||
struct hwloc_internal_memattr_initiator_s *imi;
|
||||
struct hwloc_internal_memattr_target_s *imtg;
|
||||
|
||||
node = hwloc_get_obj_by_depth(topology, HWLOC_TYPE_DEPTH_NUMANODE, i);
|
||||
assert(node);
|
||||
tiers[i].node = node;
|
||||
nodeinfos[i].node = node;
|
||||
|
||||
/* defaults */
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_UNKNOWN;
|
||||
tiers[i].local_bw = 0; /* unknown */
|
||||
/* defaults to unknown */
|
||||
nodeinfos[i].type = HWLOC_MEMORY_TIER_UNKNOWN;
|
||||
nodeinfos[i].local_bw = 0;
|
||||
nodeinfos[i].local_lat = 0;
|
||||
|
||||
daxtype = hwloc_obj_get_info_by_name(node, "DAXType");
|
||||
/* mark NVM, SPM and GPU nodes */
|
||||
if (daxtype && !strcmp(daxtype, "NVM"))
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_NVM;
|
||||
if (daxtype && !strcmp(daxtype, "SPM"))
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_SPM;
|
||||
if (node->subtype && !strcmp(node->subtype, "GPUMemory"))
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_GPU;
|
||||
nodeinfos[i].type = HWLOC_MEMORY_TIER_GPU;
|
||||
else if (daxtype && !strcmp(daxtype, "NVM"))
|
||||
nodeinfos[i].type = HWLOC_MEMORY_TIER_NVM;
|
||||
else if (daxtype && !strcmp(daxtype, "SPM"))
|
||||
nodeinfos[i].type = HWLOC_MEMORY_TIER_SPM;
|
||||
/* add CXL flag */
|
||||
if (hwloc_obj_get_info_by_name(node, "CXLDevice") != NULL) {
|
||||
/* CXL is always SPM for now. HBM and DRAM not possible here yet.
|
||||
* Hence remove all but NVM first.
|
||||
*/
|
||||
nodeinfos[i].type &= HWLOC_MEMORY_TIER_NVM;
|
||||
nodeinfos[i].type |= HWLOC_MEMORY_TIER_CXL;
|
||||
}
|
||||
|
||||
if (spm_is_hbm == -1) {
|
||||
for(j=0; j<imattr->nr_targets; j++)
|
||||
if (imattr->targets[j].obj == node) {
|
||||
imtg = &imattr->targets[j];
|
||||
break;
|
||||
}
|
||||
if (imtg && !hwloc_bitmap_iszero(node->cpuset)) {
|
||||
iloc.type = HWLOC_LOCATION_TYPE_CPUSET;
|
||||
iloc.location.cpuset = node->cpuset;
|
||||
imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0);
|
||||
if (imi)
|
||||
tiers[i].local_bw = imi->value;
|
||||
/* get local bandwidth */
|
||||
imtg = NULL;
|
||||
for(j=0; j<imattr_bw->nr_targets; j++)
|
||||
if (imattr_bw->targets[j].obj == node) {
|
||||
imtg = &imattr_bw->targets[j];
|
||||
break;
|
||||
}
|
||||
if (imtg && !hwloc_bitmap_iszero(node->cpuset)) {
|
||||
struct hwloc_internal_memattr_initiator_s *imi;
|
||||
iloc.type = HWLOC_LOCATION_TYPE_CPUSET;
|
||||
iloc.location.cpuset = node->cpuset;
|
||||
imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0);
|
||||
if (imi)
|
||||
nodeinfos[i].local_bw = imi->value;
|
||||
}
|
||||
/* get local latency */
|
||||
imtg = NULL;
|
||||
for(j=0; j<imattr_lat->nr_targets; j++)
|
||||
if (imattr_lat->targets[j].obj == node) {
|
||||
imtg = &imattr_lat->targets[j];
|
||||
break;
|
||||
}
|
||||
if (imtg && !hwloc_bitmap_iszero(node->cpuset)) {
|
||||
struct hwloc_internal_memattr_initiator_s *imi;
|
||||
iloc.type = HWLOC_LOCATION_TYPE_CPUSET;
|
||||
iloc.location.cpuset = node->cpuset;
|
||||
imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0);
|
||||
if (imi)
|
||||
nodeinfos[i].local_lat = imi->value;
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort nodes.
|
||||
* We could also sort by the existing subtype.
|
||||
* KNL is the only case where subtypes are set in backends, but we set memattrs as well there.
|
||||
* Also HWLOC_MEMTIERS_REFRESH would be a special value to ignore existing subtypes.
|
||||
*/
|
||||
hwloc_debug("Sorting memory node infos...\n");
|
||||
qsort(nodeinfos, n, sizeof(*nodeinfos), compare_node_infos_by_type_and_bw);
|
||||
#ifdef HWLOC_DEBUG
|
||||
for(i=0; i<n; i++)
|
||||
hwloc_debug(" node info %u = node L#%u P#%u with info type %lx and local BW %llu lat %llu\n",
|
||||
i,
|
||||
nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index,
|
||||
nodeinfos[i].type,
|
||||
(unsigned long long) nodeinfos[i].local_bw,
|
||||
(unsigned long long) nodeinfos[i].local_lat);
|
||||
#endif
|
||||
/* now we have UNKNOWN nodes (sorted by BW only), then known ones */
|
||||
|
||||
/* iterate among them and add a rank value.
|
||||
* start from rank 0 and switch to next rank when the type changes or when the BW or latendy difference is > threshold */
|
||||
hwloc_debug("Starting memory tier #0 and iterating over nodes...\n");
|
||||
nodeinfos[0].rank = 0;
|
||||
for(i=1; i<n; i++) {
|
||||
/* reuse the same rank by default */
|
||||
nodeinfos[i].rank = nodeinfos[i-1].rank;
|
||||
/* comparing type */
|
||||
if (nodeinfos[i].type != nodeinfos[i-1].type) {
|
||||
hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of type\n",
|
||||
nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index);
|
||||
nodeinfos[i].rank++;
|
||||
continue;
|
||||
}
|
||||
/* comparing bandwidth */
|
||||
if (nodeinfos[i].local_bw && nodeinfos[i-1].local_bw) {
|
||||
float bw_ratio = (float)nodeinfos[i].local_bw/(float)nodeinfos[i-1].local_bw;
|
||||
if (bw_ratio < 1.)
|
||||
bw_ratio = 1./bw_ratio;
|
||||
if (bw_ratio > 1.0 + bw_threshold) {
|
||||
nodeinfos[i].rank++;
|
||||
hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of bandwidth\n",
|
||||
nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/* comparing latency */
|
||||
if (nodeinfos[i].local_lat && nodeinfos[i-1].local_lat) {
|
||||
float lat_ratio = (float)nodeinfos[i].local_lat/(float)nodeinfos[i-1].local_lat;
|
||||
if (lat_ratio < 1.)
|
||||
lat_ratio = 1./lat_ratio;
|
||||
if (lat_ratio > 1.0 + lat_threshold) {
|
||||
hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of latency\n",
|
||||
nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index);
|
||||
nodeinfos[i].rank++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* FIXME: if there are cpuset-intersecting nodes in same tier, split again? */
|
||||
hwloc_debug(" Found %u tiers total\n", nodeinfos[n-1].rank + 1);
|
||||
|
||||
/* sort tiers */
|
||||
qsort(tiers, n, sizeof(*tiers), compare_tiers);
|
||||
hwloc_debug("Sorting memory tiers...\n");
|
||||
for(i=0; i<n; i++)
|
||||
hwloc_debug(" tier %u = node L#%u P#%u with tier type %d and local BW #%llu\n",
|
||||
i,
|
||||
tiers[i].node->logical_index, tiers[i].node->os_index,
|
||||
tiers[i].type, (unsigned long long) tiers[i].local_bw);
|
||||
|
||||
/* now we have UNKNOWN tiers (sorted by BW), then SPM tiers (sorted by BW), then NVM, then GPU */
|
||||
|
||||
/* iterate over UNKNOWN tiers, and find their BW */
|
||||
/* now group nodeinfos into factorized tiers */
|
||||
nr_tiers = nodeinfos[n-1].rank + 1;
|
||||
tiers = calloc(nr_tiers, sizeof(*tiers));
|
||||
if (!tiers)
|
||||
goto out_with_nodeinfos;
|
||||
for(i=0; i<nr_tiers; i++) {
|
||||
tiers[i].nodeset = hwloc_bitmap_alloc();
|
||||
if (!tiers[i].nodeset)
|
||||
goto out_with_tiers;
|
||||
tiers[i].local_bw_min = tiers[i].local_bw_max = 0;
|
||||
tiers[i].local_lat_min = tiers[i].local_lat_max = 0;
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_UNKNOWN;
|
||||
}
|
||||
for(i=0; i<n; i++) {
|
||||
if (tiers[i].type > HWLOC_MEMORY_TIER_UNKNOWN)
|
||||
break;
|
||||
}
|
||||
first_spm = i;
|
||||
/* get max BW from first */
|
||||
if (first_spm > 0)
|
||||
max_unknown_bw = tiers[0].local_bw;
|
||||
else
|
||||
max_unknown_bw = 0;
|
||||
|
||||
/* there are no DRAM or HBM tiers yet */
|
||||
|
||||
/* iterate over SPM tiers, and find their BW */
|
||||
for(i=first_spm; i<n; i++) {
|
||||
if (tiers[i].type > HWLOC_MEMORY_TIER_SPM)
|
||||
break;
|
||||
}
|
||||
first_nvm = i;
|
||||
/* get min BW from last */
|
||||
if (first_nvm > first_spm)
|
||||
min_spm_bw = tiers[first_nvm-1].local_bw;
|
||||
else
|
||||
min_spm_bw = 0;
|
||||
|
||||
/* FIXME: if there's more than 10% between some sets of nodes inside a tier, split it? */
|
||||
/* FIXME: if there are cpuset-intersecting nodes in same tier, abort? */
|
||||
|
||||
if (spm_is_hbm == -1) {
|
||||
/* if we have BW for all SPM and UNKNOWN
|
||||
* and all SPM BW are 2x superior to all UNKNOWN BW
|
||||
*/
|
||||
hwloc_debug("UNKNOWN-memory-tier max bandwidth %llu\n", (unsigned long long) max_unknown_bw);
|
||||
hwloc_debug("SPM-memory-tier min bandwidth %llu\n", (unsigned long long) min_spm_bw);
|
||||
if (max_unknown_bw > 0 && min_spm_bw > 0 && max_unknown_bw*2 < min_spm_bw) {
|
||||
hwloc_debug("assuming SPM means HBM and !SPM means DRAM since bandwidths are very different\n");
|
||||
spm_is_hbm = 1;
|
||||
} else {
|
||||
hwloc_debug("cannot assume SPM means HBM\n");
|
||||
spm_is_hbm = 0;
|
||||
}
|
||||
unsigned rank = nodeinfos[i].rank;
|
||||
assert(rank < nr_tiers);
|
||||
hwloc_bitmap_set(tiers[rank].nodeset, nodeinfos[i].node->os_index);
|
||||
assert(tiers[rank].type == HWLOC_MEMORY_TIER_UNKNOWN
|
||||
|| tiers[rank].type == nodeinfos[i].type);
|
||||
tiers[rank].type = nodeinfos[i].type;
|
||||
/* nodeinfos are sorted in BW order, no need to compare */
|
||||
if (!tiers[rank].local_bw_min)
|
||||
tiers[rank].local_bw_min = nodeinfos[i].local_bw;
|
||||
tiers[rank].local_bw_max = nodeinfos[i].local_bw;
|
||||
/* compare latencies to update min/max */
|
||||
if (!tiers[rank].local_lat_min || nodeinfos[i].local_lat < tiers[rank].local_lat_min)
|
||||
tiers[rank].local_lat_min = nodeinfos[i].local_lat;
|
||||
if (!tiers[rank].local_lat_max || nodeinfos[i].local_lat > tiers[rank].local_lat_max)
|
||||
tiers[rank].local_lat_max = nodeinfos[i].local_lat;
|
||||
}
|
||||
|
||||
if (spm_is_hbm) {
|
||||
for(i=0; i<first_spm; i++)
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_DRAM;
|
||||
for(i=first_spm; i<first_nvm; i++)
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_HBM;
|
||||
}
|
||||
|
||||
if (first_spm == n)
|
||||
mark_dram = 0;
|
||||
|
||||
/* now apply subtypes */
|
||||
for(i=0; i<n; i++) {
|
||||
const char *type = NULL;
|
||||
if (tiers[i].node->subtype) /* don't overwrite the existing subtype */
|
||||
continue;
|
||||
switch (tiers[i].type) {
|
||||
case HWLOC_MEMORY_TIER_DRAM:
|
||||
if (mark_dram)
|
||||
type = "DRAM";
|
||||
break;
|
||||
case HWLOC_MEMORY_TIER_HBM:
|
||||
type = "HBM";
|
||||
break;
|
||||
case HWLOC_MEMORY_TIER_SPM:
|
||||
type = "SPM";
|
||||
break;
|
||||
case HWLOC_MEMORY_TIER_NVM:
|
||||
type = "NVM";
|
||||
break;
|
||||
default:
|
||||
/* GPU memory is already marked with subtype="GPUMemory",
|
||||
* UNKNOWN doesn't deserve any subtype
|
||||
*/
|
||||
break;
|
||||
}
|
||||
if (type) {
|
||||
hwloc_debug("Marking node L#%u P#%u as %s\n", tiers[i].node->logical_index, tiers[i].node->os_index, type);
|
||||
tiers[i].node->subtype = strdup(type);
|
||||
}
|
||||
}
|
||||
free(nodeinfos);
|
||||
*nr_tiers_p = nr_tiers;
|
||||
return tiers;
|
||||
|
||||
out_with_tiers:
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
hwloc_bitmap_free(tiers[i].nodeset);
|
||||
free(tiers);
|
||||
out_with_nodeinfos:
|
||||
free(nodeinfos);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
enum hwloc_guess_memtiers_flag {
|
||||
HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM = 1<<0,
|
||||
HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM = 1<<1
|
||||
};
|
||||
|
||||
static int
|
||||
hwloc__guess_dram_hbm_tiers(struct hwloc_memory_tier_s *tier1,
|
||||
struct hwloc_memory_tier_s *tier2,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct hwloc_memory_tier_s *tmp;
|
||||
|
||||
if (!tier1->local_bw_min || !tier2->local_bw_min) {
|
||||
hwloc_debug(" Missing BW info\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* reorder tiers by BW */
|
||||
if (tier1->local_bw_min > tier2->local_bw_min) {
|
||||
tmp = tier1; tier1 = tier2; tier2 = tmp;
|
||||
}
|
||||
/* tier1 < tier2 */
|
||||
|
||||
hwloc_debug(" tier1 BW %llu-%llu vs tier2 BW %llu-%llu\n",
|
||||
(unsigned long long) tier1->local_bw_min,
|
||||
(unsigned long long) tier1->local_bw_max,
|
||||
(unsigned long long) tier2->local_bw_min,
|
||||
(unsigned long long) tier2->local_bw_max);
|
||||
if (tier2->local_bw_min <= tier1->local_bw_max * 2) {
|
||||
/* tier2 BW isn't 2x tier1, we cannot guess HBM */
|
||||
hwloc_debug(" BW difference isn't >2x\n");
|
||||
return -1;
|
||||
}
|
||||
/* tier2 BW is >2x tier1 */
|
||||
|
||||
if ((flags & HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM)
|
||||
&& hwloc_bitmap_isset(tier2->nodeset, 0)) {
|
||||
/* node0 is not DRAM, and we assume that's not possible */
|
||||
hwloc_debug(" node0 shouldn't have HBM BW\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* assume tier1 == DRAM and tier2 == HBM */
|
||||
tier1->type = HWLOC_MEMORY_TIER_DRAM;
|
||||
tier2->type = HWLOC_MEMORY_TIER_HBM;
|
||||
hwloc_debug(" Success\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__guess_memory_tiers_types(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
unsigned nr_tiers,
|
||||
struct hwloc_memory_tier_s *tiers)
|
||||
{
|
||||
unsigned long flags;
|
||||
const char *env;
|
||||
unsigned nr_unknown, nr_spm;
|
||||
struct hwloc_memory_tier_s *unknown_tier[2], *spm_tier;
|
||||
unsigned i;
|
||||
|
||||
flags = 0;
|
||||
env = getenv("HWLOC_MEMTIERS_GUESS");
|
||||
if (env) {
|
||||
if (!strcmp(env, "none"))
|
||||
return 0;
|
||||
/* by default, we don't guess anything unsure */
|
||||
if (!strcmp(env, "all"))
|
||||
/* enable all typical cases */
|
||||
flags = ~0UL;
|
||||
if (strstr(env, "spm_is_hbm")) {
|
||||
hwloc_debug("Assuming SPM-tier is HBM, ignore bandwidth\n");
|
||||
flags |= HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM;
|
||||
}
|
||||
if (strstr(env, "node0_is_dram")) {
|
||||
hwloc_debug("Assuming node0 is DRAM\n");
|
||||
flags |= HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM;
|
||||
}
|
||||
}
|
||||
|
||||
if (nr_tiers == 1)
|
||||
/* Likely DRAM only, but could also be HBM-only in non-SPM mode.
|
||||
* We cannot be sure, but it doesn't matter since there's a single tier.
|
||||
*/
|
||||
return 0;
|
||||
|
||||
nr_unknown = nr_spm = 0;
|
||||
unknown_tier[0] = unknown_tier[1] = spm_tier = NULL;
|
||||
for(i=0; i<nr_tiers; i++) {
|
||||
switch (tiers[i].type) {
|
||||
case HWLOC_MEMORY_TIER_UNKNOWN:
|
||||
if (nr_unknown < 2)
|
||||
unknown_tier[nr_unknown] = &tiers[i];
|
||||
nr_unknown++;
|
||||
break;
|
||||
case HWLOC_MEMORY_TIER_SPM:
|
||||
spm_tier = &tiers[i];
|
||||
nr_spm++;
|
||||
break;
|
||||
case HWLOC_MEMORY_TIER_DRAM:
|
||||
case HWLOC_MEMORY_TIER_HBM:
|
||||
/* not possible */
|
||||
abort();
|
||||
default:
|
||||
/* ignore HBM, NVM, ... */
|
||||
break;
|
||||
}
|
||||
}
|
||||
hwloc_debug("Found %u unknown memory tiers and %u SPM\n",
|
||||
nr_unknown, nr_spm);
|
||||
|
||||
/* Try to guess DRAM + HBM common cases.
|
||||
* Other things we'd like to detect:
|
||||
* single unknown => DRAM or HBM? HBM won't be SPM on HBM-only CPUs
|
||||
* unknown + CXL DRAM => DRAM or HBM?
|
||||
*/
|
||||
if (nr_unknown == 2 && !nr_spm) {
|
||||
/* 2 unknown, could be DRAM + non-SPM HBM */
|
||||
hwloc_debug(" Trying to guess 2 unknown tiers using BW\n");
|
||||
hwloc__guess_dram_hbm_tiers(unknown_tier[0], unknown_tier[1], flags);
|
||||
} else if (nr_unknown == 1 && nr_spm == 1) {
|
||||
/* 1 unknown + 1 SPM, could be DRAM + SPM HBM */
|
||||
hwloc_debug(" Trying to guess 1 unknown + 1 SPM tiers using BW\n");
|
||||
hwloc__guess_dram_hbm_tiers(unknown_tier[0], spm_tier, flags);
|
||||
}
|
||||
|
||||
if (flags & HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM) {
|
||||
/* force mark SPM as HBM */
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
if (tiers[i].type == HWLOC_MEMORY_TIER_SPM) {
|
||||
hwloc_debug("Forcing SPM tier to HBM");
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_HBM;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM) {
|
||||
/* force mark node0's tier as DRAM if we couldn't guess it */
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
if (hwloc_bitmap_isset(tiers[i].nodeset, 0)
|
||||
&& tiers[i].type == HWLOC_MEMORY_TIER_UNKNOWN) {
|
||||
hwloc_debug("Forcing node0 tier to DRAM");
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_DRAM;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* parses something like 0xf=HBM;0x0f=DRAM;0x00f=CXL-DRAM */
|
||||
static struct hwloc_memory_tier_s *
|
||||
hwloc__force_memory_tiers(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
unsigned *nr_tiers_p,
|
||||
const char *_env)
|
||||
{
|
||||
struct hwloc_memory_tier_s *tiers = NULL;
|
||||
unsigned nr_tiers, i;
|
||||
hwloc_bitmap_t nodeset = NULL;
|
||||
char *env;
|
||||
const char *tmp;
|
||||
|
||||
env = strdup(_env);
|
||||
if (!env) {
|
||||
fprintf(stderr, "[hwloc/memtiers] failed to duplicate HWLOC_MEMTIERS envvar\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
tmp = env;
|
||||
nr_tiers = 1;
|
||||
while (1) {
|
||||
tmp = strchr(tmp, ';');
|
||||
if (!tmp)
|
||||
break;
|
||||
tmp++;
|
||||
nr_tiers++;
|
||||
}
|
||||
|
||||
nodeset = hwloc_bitmap_alloc();
|
||||
if (!nodeset) {
|
||||
fprintf(stderr, "[hwloc/memtiers] failed to allocated forced tiers' nodeset\n");
|
||||
goto out_with_envvar;
|
||||
}
|
||||
|
||||
tiers = calloc(nr_tiers, sizeof(*tiers));
|
||||
if (!tiers) {
|
||||
fprintf(stderr, "[hwloc/memtiers] failed to allocated forced tiers\n");
|
||||
goto out_with_nodeset;
|
||||
}
|
||||
nr_tiers = 0;
|
||||
|
||||
tmp = env;
|
||||
while (1) {
|
||||
char *end;
|
||||
char *equal;
|
||||
hwloc_memory_tier_type_t type;
|
||||
|
||||
end = strchr(tmp, ';');
|
||||
if (end)
|
||||
*end = '\0';
|
||||
|
||||
equal = strchr(tmp, '=');
|
||||
if (!equal) {
|
||||
fprintf(stderr, "[hwloc/memtiers] missing `=' before end of forced tier description at `%s'\n", tmp);
|
||||
goto out_with_tiers;
|
||||
}
|
||||
*equal = '\0';
|
||||
|
||||
hwloc_bitmap_sscanf(nodeset, tmp);
|
||||
if (hwloc_bitmap_iszero(nodeset)) {
|
||||
fprintf(stderr, "[hwloc/memtiers] empty forced tier nodeset `%s', aborting\n", tmp);
|
||||
goto out_with_tiers;
|
||||
}
|
||||
type = hwloc_memory_tier_type_sscanf(equal+1);
|
||||
if (!type)
|
||||
hwloc_debug("failed to recognize forced tier type `%s'\n", equal+1);
|
||||
tiers[nr_tiers].nodeset = hwloc_bitmap_dup(nodeset);
|
||||
tiers[nr_tiers].type = type;
|
||||
tiers[nr_tiers].local_bw_min = tiers[nr_tiers].local_bw_max = 0;
|
||||
tiers[nr_tiers].local_lat_min = tiers[nr_tiers].local_lat_max = 0;
|
||||
nr_tiers++;
|
||||
if (!end)
|
||||
break;
|
||||
tmp = end+1;
|
||||
}
|
||||
|
||||
free(env);
|
||||
hwloc_bitmap_free(nodeset);
|
||||
hwloc_debug("Forcing %u memory tiers\n", nr_tiers);
|
||||
#ifdef HWLOC_DEBUG
|
||||
for(i=0; i<nr_tiers; i++) {
|
||||
char *s;
|
||||
hwloc_bitmap_asprintf(&s, tiers[i].nodeset);
|
||||
hwloc_debug(" tier #%u type %lx nodeset %s\n", i, tiers[i].type, s);
|
||||
free(s);
|
||||
}
|
||||
#endif
|
||||
*nr_tiers_p = nr_tiers;
|
||||
return tiers;
|
||||
|
||||
out_with_tiers:
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
hwloc_bitmap_free(tiers[i].nodeset);
|
||||
free(tiers);
|
||||
out_with_nodeset:
|
||||
hwloc_bitmap_free(nodeset);
|
||||
out_with_envvar:
|
||||
free(env);
|
||||
out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc__apply_memory_tiers_subtypes(hwloc_topology_t topology,
|
||||
unsigned nr_tiers,
|
||||
struct hwloc_memory_tier_s *tiers,
|
||||
int force)
|
||||
{
|
||||
hwloc_obj_t node = NULL;
|
||||
hwloc_debug("Marking node tiers\n");
|
||||
while ((node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node)) != NULL) {
|
||||
unsigned j;
|
||||
for(j=0; j<nr_tiers; j++) {
|
||||
if (hwloc_bitmap_isset(tiers[j].nodeset, node->os_index)) {
|
||||
const char *subtype = hwloc_memory_tier_type_snprintf(tiers[j].type);
|
||||
if (!node->subtype || force) { /* don't overwrite the existing subtype unless forced */
|
||||
if (subtype) { /* don't set a subtype for unknown tiers */
|
||||
hwloc_debug(" marking node L#%u P#%u as %s (was %s)\n", node->logical_index, node->os_index, subtype, node->subtype);
|
||||
free(node->subtype);
|
||||
node->subtype = strdup(subtype);
|
||||
}
|
||||
} else
|
||||
hwloc_debug(" node L#%u P#%u already marked as %s, not setting %s\n",
|
||||
node->logical_index, node->os_index, node->subtype, subtype);
|
||||
if (nr_tiers > 1) {
|
||||
char tmp[20];
|
||||
snprintf(tmp, sizeof(tmp), "%u", j);
|
||||
hwloc__add_info_nodup(&node->infos, &node->infos_count, "MemoryTier", tmp, 1);
|
||||
}
|
||||
break; /* each node is in a single tier */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology, int force_subtype)
|
||||
{
|
||||
struct hwloc_memory_tier_s *tiers;
|
||||
unsigned nr_tiers;
|
||||
unsigned i;
|
||||
const char *env;
|
||||
|
||||
env = getenv("HWLOC_MEMTIERS");
|
||||
if (env) {
|
||||
if (!strcmp(env, "none"))
|
||||
goto out;
|
||||
tiers = hwloc__force_memory_tiers(topology, &nr_tiers, env);
|
||||
if (tiers) {
|
||||
assert(nr_tiers > 0);
|
||||
force_subtype = 1;
|
||||
goto ready;
|
||||
}
|
||||
}
|
||||
|
||||
tiers = hwloc__group_memory_tiers(topology, &nr_tiers);
|
||||
if (!tiers)
|
||||
goto out;
|
||||
|
||||
hwloc__guess_memory_tiers_types(topology, nr_tiers, tiers);
|
||||
|
||||
/* sort tiers by BW first, then by type */
|
||||
hwloc_debug("Sorting memory tiers...\n");
|
||||
qsort(tiers, nr_tiers, sizeof(*tiers), compare_tiers_by_bw_and_type);
|
||||
|
||||
ready:
|
||||
#ifdef HWLOC_DEBUG
|
||||
for(i=0; i<nr_tiers; i++) {
|
||||
char *s;
|
||||
hwloc_bitmap_asprintf(&s, tiers[i].nodeset);
|
||||
hwloc_debug(" tier %u = nodes %s with type %lx and local BW %llu-%llu lat %llu-%llu\n",
|
||||
i,
|
||||
s, tiers[i].type,
|
||||
(unsigned long long) tiers[i].local_bw_min,
|
||||
(unsigned long long) tiers[i].local_bw_max,
|
||||
(unsigned long long) tiers[i].local_lat_min,
|
||||
(unsigned long long) tiers[i].local_lat_max);
|
||||
free(s);
|
||||
}
|
||||
#endif
|
||||
|
||||
hwloc__apply_memory_tiers_subtypes(topology, nr_tiers, tiers, force_subtype);
|
||||
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
hwloc_bitmap_free(tiers[i].nodeset);
|
||||
free(tiers);
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
|
11
src/3rdparty/hwloc/src/shmem.c
vendored
11
src/3rdparty/hwloc/src/shmem.c
vendored
|
@ -23,6 +23,7 @@ struct hwloc_shmem_header {
|
|||
uint32_t header_length; /* where the actual topology starts in the file/mapping */
|
||||
uint64_t mmap_address; /* virtual address to pass to mmap */
|
||||
uint64_t mmap_length; /* length to pass to mmap (includes the header) */
|
||||
/* we will pad the end to a multiple of pointer size so that the topology is well aligned */
|
||||
};
|
||||
|
||||
#define HWLOC_SHMEM_MALLOC_ALIGN 8UL
|
||||
|
@ -85,6 +86,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology,
|
|||
hwloc_topology_t new;
|
||||
struct hwloc_tma tma;
|
||||
struct hwloc_shmem_header header;
|
||||
uint32_t header_length = (sizeof(header) + sizeof(void*) - 1) & ~(sizeof(void*) - 1); /* pad to a multiple of pointer size */
|
||||
void *mmap_res;
|
||||
int err;
|
||||
|
||||
|
@ -100,7 +102,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology,
|
|||
hwloc_internal_memattrs_refresh(topology);
|
||||
|
||||
header.header_version = HWLOC_SHMEM_HEADER_VERSION;
|
||||
header.header_length = sizeof(header);
|
||||
header.header_length = header_length;
|
||||
header.mmap_address = (uintptr_t) mmap_address;
|
||||
header.mmap_length = length;
|
||||
|
||||
|
@ -127,7 +129,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology,
|
|||
|
||||
tma.malloc = tma_shmem_malloc;
|
||||
tma.dontfree = 1;
|
||||
tma.data = (char *)mmap_res + sizeof(header);
|
||||
tma.data = (char *)mmap_res + header_length;
|
||||
err = hwloc__topology_dup(&new, topology, &tma);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
@ -154,6 +156,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
|
|||
{
|
||||
hwloc_topology_t new, old;
|
||||
struct hwloc_shmem_header header;
|
||||
uint32_t header_length = (sizeof(header) + sizeof(void*) - 1) & ~(sizeof(void*) - 1); /* pad to a multiple of pointer size */
|
||||
void *mmap_res;
|
||||
int err;
|
||||
|
||||
|
@ -171,7 +174,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
|
|||
return -1;
|
||||
|
||||
if (header.header_version != HWLOC_SHMEM_HEADER_VERSION
|
||||
|| header.header_length != sizeof(header)
|
||||
|| header.header_length != header_length
|
||||
|| header.mmap_address != (uintptr_t) mmap_address
|
||||
|| header.mmap_length != length) {
|
||||
errno = EINVAL;
|
||||
|
@ -186,7 +189,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
|
|||
goto out_with_mmap;
|
||||
}
|
||||
|
||||
old = (hwloc_topology_t)((char*)mmap_address + sizeof(header));
|
||||
old = (hwloc_topology_t)((char*)mmap_address + header_length);
|
||||
if (hwloc_topology_abi_check(old) < 0) {
|
||||
errno = EINVAL;
|
||||
goto out_with_mmap;
|
||||
|
|
154
src/3rdparty/hwloc/src/topology-synthetic.c
vendored
154
src/3rdparty/hwloc/src/topology-synthetic.c
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -23,6 +23,7 @@ struct hwloc_synthetic_attr_s {
|
|||
unsigned depth; /* For caches/groups */
|
||||
hwloc_obj_cache_type_t cachetype; /* For caches */
|
||||
hwloc_uint64_t memorysize; /* For caches/memory */
|
||||
hwloc_uint64_t memorysidecachesize; /* Single level of memory-side-cache in-front of a NUMA node */
|
||||
};
|
||||
|
||||
struct hwloc_synthetic_indexes_s {
|
||||
|
@ -380,6 +381,9 @@ hwloc_synthetic_parse_attrs(const char *attrs, const char **next_posp,
|
|||
} else if (!iscache && !strncmp("memory=", attrs, 7)) {
|
||||
memorysize = hwloc_synthetic_parse_memory_attr(attrs+7, &attrs);
|
||||
|
||||
} else if (!strncmp("memorysidecachesize=", attrs, 20)) {
|
||||
sattr->memorysidecachesize = hwloc_synthetic_parse_memory_attr(attrs+20, &attrs);
|
||||
|
||||
} else if (!strncmp("indexes=", attrs, 8)) {
|
||||
index_string = attrs+8;
|
||||
attrs += 8;
|
||||
|
@ -387,10 +391,9 @@ hwloc_synthetic_parse_attrs(const char *attrs, const char **next_posp,
|
|||
attrs += index_string_length;
|
||||
|
||||
} else {
|
||||
if (verbose)
|
||||
fprintf(stderr, "Unknown attribute at '%s'\n", attrs);
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
size_t length = strcspn(attrs, " )");
|
||||
fprintf(stderr, "hwloc/synthetic: Ignoring unknown attribute at '%s'\n", attrs);
|
||||
attrs += length;
|
||||
}
|
||||
|
||||
if (' ' == *attrs)
|
||||
|
@ -416,6 +419,32 @@ hwloc_synthetic_parse_attrs(const char *attrs, const char **next_posp,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc_synthetic_set_default_attrs(struct hwloc_synthetic_attr_s *sattr,
|
||||
int *type_count)
|
||||
{
|
||||
hwloc_obj_type_t type = sattr->type;
|
||||
|
||||
if (type == HWLOC_OBJ_GROUP) {
|
||||
if (sattr->depth == (unsigned)-1)
|
||||
sattr->depth = type_count[HWLOC_OBJ_GROUP]--;
|
||||
|
||||
} else if (hwloc__obj_type_is_cache(type)) {
|
||||
if (!sattr->memorysize) {
|
||||
if (1 == sattr->depth)
|
||||
/* 32KiB in L1 */
|
||||
sattr->memorysize = 32*1024;
|
||||
else
|
||||
/* *4 at each level, starting from 1MiB for L2, unified */
|
||||
sattr->memorysize = 256ULL*1024 << (2*sattr->depth);
|
||||
}
|
||||
|
||||
} else if (type == HWLOC_OBJ_NUMANODE && !sattr->memorysize) {
|
||||
/* 1GiB in memory nodes. */
|
||||
sattr->memorysize = 1024*1024*1024;
|
||||
}
|
||||
}
|
||||
|
||||
/* frees level until arity = 0 */
|
||||
static void
|
||||
hwloc_synthetic_free_levels(struct hwloc_synthetic_backend_data_s *data)
|
||||
|
@ -465,6 +494,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
|||
data->level[0].indexes.string = NULL;
|
||||
data->level[0].indexes.array = NULL;
|
||||
data->level[0].attr.memorysize = 0;
|
||||
data->level[0].attr.memorysidecachesize = 0;
|
||||
data->level[0].attached = NULL;
|
||||
type_count[HWLOC_OBJ_MACHINE] = 1;
|
||||
if (*description == '(') {
|
||||
|
@ -514,6 +544,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
|||
if (attached) {
|
||||
attached->attr.type = type;
|
||||
attached->attr.memorysize = 0;
|
||||
attached->attr.memorysidecachesize = 0;
|
||||
/* attached->attr.depth and .cachetype unused */
|
||||
attached->next = NULL;
|
||||
pprev = &data->level[count-1].attached;
|
||||
|
@ -601,7 +632,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
|||
}
|
||||
if (!item) {
|
||||
if (verbose)
|
||||
fprintf(stderr,"Synthetic string with disallow 0 number of objects at '%s'\n", pos);
|
||||
fprintf(stderr,"Synthetic string with disallowed 0 number of objects at '%s'\n", pos);
|
||||
errno = EINVAL;
|
||||
goto error;
|
||||
}
|
||||
|
@ -611,6 +642,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
|||
data->level[count].indexes.string = NULL;
|
||||
data->level[count].indexes.array = NULL;
|
||||
data->level[count].attr.memorysize = 0;
|
||||
data->level[count].attr.memorysidecachesize = 0;
|
||||
if (*next_pos == '(') {
|
||||
err = hwloc_synthetic_parse_attrs(next_pos+1, &next_pos, &data->level[count].attr, &data->level[count].indexes, verbose);
|
||||
if (err < 0)
|
||||
|
@ -796,6 +828,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
|||
data->level[1].indexes.string = NULL;
|
||||
data->level[1].indexes.array = NULL;
|
||||
data->level[1].attr.memorysize = 0;
|
||||
data->level[1].attr.memorysidecachesize = 0;
|
||||
data->level[1].totalwidth = data->level[0].totalwidth;
|
||||
/* update arity to insert a single NUMA node per parent */
|
||||
data->level[1].arity = data->level[0].arity;
|
||||
|
@ -803,30 +836,14 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
|||
count++;
|
||||
}
|
||||
|
||||
/* set default attributes that depend on the depth/hierarchy of levels */
|
||||
for (i=0; i<count; i++) {
|
||||
struct hwloc_synthetic_attached_s *attached;
|
||||
struct hwloc_synthetic_level_data_s *curlevel = &data->level[i];
|
||||
hwloc_obj_type_t type = curlevel->attr.type;
|
||||
|
||||
if (type == HWLOC_OBJ_GROUP) {
|
||||
if (curlevel->attr.depth == (unsigned)-1)
|
||||
curlevel->attr.depth = type_count[HWLOC_OBJ_GROUP]--;
|
||||
|
||||
} else if (hwloc__obj_type_is_cache(type)) {
|
||||
if (!curlevel->attr.memorysize) {
|
||||
if (1 == curlevel->attr.depth)
|
||||
/* 32KiB in L1 */
|
||||
curlevel->attr.memorysize = 32*1024;
|
||||
else
|
||||
/* *4 at each level, starting from 1MiB for L2, unified */
|
||||
curlevel->attr.memorysize = 256ULL*1024 << (2*curlevel->attr.depth);
|
||||
}
|
||||
|
||||
} else if (type == HWLOC_OBJ_NUMANODE && !curlevel->attr.memorysize) {
|
||||
/* 1GiB in memory nodes. */
|
||||
curlevel->attr.memorysize = 1024*1024*1024;
|
||||
}
|
||||
|
||||
hwloc_synthetic_process_indexes(data, &data->level[i].indexes, data->level[i].totalwidth, verbose);
|
||||
hwloc_synthetic_set_default_attrs(&curlevel->attr, type_count);
|
||||
for(attached = curlevel->attached; attached != NULL; attached = attached->next)
|
||||
hwloc_synthetic_set_default_attrs(&attached->attr, type_count);
|
||||
hwloc_synthetic_process_indexes(data, &curlevel->indexes, curlevel->totalwidth, verbose);
|
||||
}
|
||||
|
||||
hwloc_synthetic_process_indexes(data, &data->numa_attached_indexes, data->numa_attached_nr, verbose);
|
||||
|
@ -859,6 +876,12 @@ hwloc_synthetic_set_attr(struct hwloc_synthetic_attr_s *sattr,
|
|||
obj->attr->numanode.page_types[0].size = 4096;
|
||||
obj->attr->numanode.page_types[0].count = sattr->memorysize / 4096;
|
||||
break;
|
||||
case HWLOC_OBJ_MEMCACHE:
|
||||
obj->attr->cache.depth = 1;
|
||||
obj->attr->cache.linesize = 64;
|
||||
obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
|
||||
obj->attr->cache.size = sattr->memorysidecachesize;
|
||||
break;
|
||||
case HWLOC_OBJ_PACKAGE:
|
||||
case HWLOC_OBJ_DIE:
|
||||
break;
|
||||
|
@ -926,6 +949,14 @@ hwloc_synthetic_insert_attached(struct hwloc_topology *topology,
|
|||
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, child, "synthetic:attached");
|
||||
|
||||
if (attached->attr.memorysidecachesize) {
|
||||
hwloc_obj_t mscachechild = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MEMCACHE, HWLOC_UNKNOWN_INDEX);
|
||||
mscachechild->cpuset = hwloc_bitmap_dup(set);
|
||||
mscachechild->nodeset = hwloc_bitmap_dup(child->nodeset);
|
||||
hwloc_synthetic_set_attr(&attached->attr, mscachechild);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, mscachechild, "synthetic:attached:mscache");
|
||||
}
|
||||
|
||||
hwloc_synthetic_insert_attached(topology, data, attached->next, set);
|
||||
}
|
||||
|
||||
|
@ -977,6 +1008,14 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
|
|||
hwloc_synthetic_set_attr(&curlevel->attr, obj);
|
||||
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "synthetic");
|
||||
|
||||
if (type == HWLOC_OBJ_NUMANODE && curlevel->attr.memorysidecachesize) {
|
||||
hwloc_obj_t mscachechild = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MEMCACHE, HWLOC_UNKNOWN_INDEX);
|
||||
mscachechild->cpuset = hwloc_bitmap_dup(set);
|
||||
mscachechild->nodeset = hwloc_bitmap_dup(obj->nodeset);
|
||||
hwloc_synthetic_set_attr(&curlevel->attr, mscachechild);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, mscachechild, "synthetic:mscache");
|
||||
}
|
||||
}
|
||||
|
||||
hwloc_synthetic_insert_attached(topology, data, curlevel->attached, set);
|
||||
|
@ -1217,6 +1256,7 @@ hwloc__export_synthetic_indexes(hwloc_obj_t *level, unsigned total,
|
|||
|
||||
static int
|
||||
hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
|
||||
unsigned long flags,
|
||||
hwloc_obj_t obj,
|
||||
char *buffer, size_t buflen)
|
||||
{
|
||||
|
@ -1224,6 +1264,7 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
|
|||
const char * prefix = "(";
|
||||
char cachesize[64] = "";
|
||||
char memsize[64] = "";
|
||||
char memorysidecachesize[64] = "";
|
||||
int needindexes = 0;
|
||||
|
||||
if (hwloc__obj_type_is_cache(obj->type) && obj->attr->cache.size) {
|
||||
|
@ -1236,6 +1277,19 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
|
|||
prefix, (unsigned long long) obj->attr->numanode.local_memory);
|
||||
prefix = separator;
|
||||
}
|
||||
if (obj->type == HWLOC_OBJ_NUMANODE && !(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1)) {
|
||||
hwloc_obj_t memorysidecache = obj->parent;
|
||||
hwloc_uint64_t size = 0;
|
||||
while (memorysidecache && memorysidecache->type == HWLOC_OBJ_MEMCACHE) {
|
||||
size += memorysidecache->attr->cache.size;
|
||||
memorysidecache = memorysidecache->parent;
|
||||
}
|
||||
if (size) {
|
||||
snprintf(memorysidecachesize, sizeof(memorysidecachesize), "%smemorysidecachesize=%llu",
|
||||
prefix, (unsigned long long) size);
|
||||
prefix = separator;
|
||||
}
|
||||
}
|
||||
if (!obj->logical_index /* only display indexes once per level (not for non-first NUMA children, etc.) */
|
||||
&& (obj->type == HWLOC_OBJ_PU || obj->type == HWLOC_OBJ_NUMANODE)) {
|
||||
hwloc_obj_t cur = obj;
|
||||
|
@ -1247,12 +1301,12 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
|
|||
cur = cur->next_cousin;
|
||||
}
|
||||
}
|
||||
if (*cachesize || *memsize || needindexes) {
|
||||
if (*cachesize || *memsize || *memorysidecachesize || needindexes) {
|
||||
ssize_t tmplen = buflen;
|
||||
char *tmp = buffer;
|
||||
int res, ret = 0;
|
||||
|
||||
res = hwloc_snprintf(tmp, tmplen, "%s%s%s", cachesize, memsize, needindexes ? "" : ")");
|
||||
res = hwloc_snprintf(tmp, tmplen, "%s%s%s%s", cachesize, memsize, memorysidecachesize, needindexes ? "" : ")");
|
||||
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
|
||||
return -1;
|
||||
|
||||
|
@ -1326,7 +1380,7 @@ hwloc__export_synthetic_obj(struct hwloc_topology * topology, unsigned long flag
|
|||
|
||||
if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) {
|
||||
/* obj attributes */
|
||||
res = hwloc__export_synthetic_obj_attr(topology, obj, tmp, tmplen);
|
||||
res = hwloc__export_synthetic_obj_attr(topology, flags, obj, tmp, tmplen);
|
||||
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
|
||||
return -1;
|
||||
}
|
||||
|
@ -1351,7 +1405,7 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign
|
|||
|
||||
if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) {
|
||||
/* v1: export a single NUMA child */
|
||||
if (parent->memory_arity > 1 || mchild->type != HWLOC_OBJ_NUMANODE) {
|
||||
if (parent->memory_arity > 1) {
|
||||
/* not supported */
|
||||
if (verbose)
|
||||
fprintf(stderr, "Cannot export to synthetic v1 if multiple memory children are attached to the same location.\n");
|
||||
|
@ -1362,6 +1416,9 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign
|
|||
if (needprefix)
|
||||
hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ' ');
|
||||
|
||||
/* ignore memcaches and export the NUMA node */
|
||||
while (mchild->type != HWLOC_OBJ_NUMANODE)
|
||||
mchild = mchild->memory_first_child;
|
||||
res = hwloc__export_synthetic_obj(topology, flags, mchild, 1, tmp, tmplen);
|
||||
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
|
||||
return -1;
|
||||
|
@ -1369,16 +1426,25 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign
|
|||
}
|
||||
|
||||
while (mchild) {
|
||||
/* FIXME: really recurse to export memcaches and numanode,
|
||||
/* The core doesn't support shared memcache for now (because ACPI and Linux don't).
|
||||
* So, for each mchild here, recurse only in the first children at each level.
|
||||
*
|
||||
* FIXME: whenever supported by the core, really recurse to export memcaches and numanode,
|
||||
* but it requires clever parsing of [ memcache [numa] [numa] ] during import,
|
||||
* better attaching of things to describe the hierarchy.
|
||||
*/
|
||||
hwloc_obj_t numanode = mchild;
|
||||
/* only export the first NUMA node leaf of each memory child
|
||||
* FIXME: This assumes mscache aren't shared between nodes, that's true in current platforms
|
||||
/* Only export the first NUMA node leaf of each memory child.
|
||||
* Memcaches are ignored here, they will be summed and exported as a single attribute
|
||||
* of the NUMA node in hwloc__export_synthetic_obj().
|
||||
*/
|
||||
while (numanode && numanode->type != HWLOC_OBJ_NUMANODE) {
|
||||
assert(numanode->arity == 1);
|
||||
if (verbose && numanode->memory_arity > 1) {
|
||||
static int warned = 0;
|
||||
if (!warned)
|
||||
fprintf(stderr, "Ignoring non-first memory children at non-first level of memory hierarchy.\n");
|
||||
warned = 1;
|
||||
}
|
||||
numanode = numanode->memory_first_child;
|
||||
}
|
||||
assert(numanode); /* there's always a numanode at the bottom of the memory tree */
|
||||
|
@ -1511,17 +1577,21 @@ hwloc_topology_export_synthetic(struct hwloc_topology * topology,
|
|||
|
||||
if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) {
|
||||
/* v1 requires all NUMA at the same level */
|
||||
hwloc_obj_t node;
|
||||
hwloc_obj_t node, parent;
|
||||
signed pdepth;
|
||||
|
||||
node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0);
|
||||
assert(node);
|
||||
assert(hwloc__obj_type_is_normal(node->parent->type)); /* only depth-1 memory children for now */
|
||||
pdepth = node->parent->depth;
|
||||
parent = node->parent;
|
||||
while (!hwloc__obj_type_is_normal(parent->type))
|
||||
parent = parent->parent;
|
||||
pdepth = parent->depth;
|
||||
|
||||
while ((node = node->next_cousin) != NULL) {
|
||||
assert(hwloc__obj_type_is_normal(node->parent->type)); /* only depth-1 memory children for now */
|
||||
if (node->parent->depth != pdepth) {
|
||||
parent = node->parent;
|
||||
while (!hwloc__obj_type_is_normal(parent->type))
|
||||
parent = parent->parent;
|
||||
if (parent->depth != pdepth) {
|
||||
if (verbose)
|
||||
fprintf(stderr, "Cannot export to synthetic v1 if memory is attached to parents at different depths.\n");
|
||||
errno = EINVAL;
|
||||
|
@ -1534,7 +1604,7 @@ hwloc_topology_export_synthetic(struct hwloc_topology * topology,
|
|||
|
||||
if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) {
|
||||
/* obj attributes */
|
||||
res = hwloc__export_synthetic_obj_attr(topology, obj, tmp, tmplen);
|
||||
res = hwloc__export_synthetic_obj_attr(topology, flags, obj, tmp, tmplen);
|
||||
if (res > 0)
|
||||
needprefix = 1;
|
||||
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
|
||||
|
|
26
src/3rdparty/hwloc/src/topology-windows.c
vendored
26
src/3rdparty/hwloc/src/topology-windows.c
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012, 2020 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -367,7 +367,7 @@ hwloc_win_get_processor_groups(void)
|
|||
|
||||
if (nr_processor_groups > 1 && SIZEOF_VOID_P == 4) {
|
||||
if (HWLOC_SHOW_ALL_ERRORS())
|
||||
fprintf(stderr, "hwloc: multiple processor groups found on 32bits Windows, topology may be invalid/incomplete.\n");
|
||||
fprintf(stderr, "hwloc/windows: multiple processor groups found on 32bits Windows, topology may be invalid/incomplete.\n");
|
||||
}
|
||||
|
||||
length = 0;
|
||||
|
@ -987,7 +987,11 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||
OSVERSIONINFOEX osvi;
|
||||
char versionstr[20];
|
||||
char hostname[122] = "";
|
||||
unsigned hostname_size = sizeof(hostname);
|
||||
#if !defined(__CYGWIN__)
|
||||
DWORD hostname_size = sizeof(hostname);
|
||||
#else
|
||||
size_t hostname_size = sizeof(hostname);
|
||||
#endif
|
||||
int has_efficiencyclass = 0;
|
||||
struct hwloc_win_efficiency_classes eclasses;
|
||||
char *env = getenv("HWLOC_WINDOWS_PROCESSOR_GROUP_OBJS");
|
||||
|
@ -1051,12 +1055,16 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||
unsigned efficiency_class = 0;
|
||||
GROUP_AFFINITY *GroupMask;
|
||||
|
||||
/* Ignore unknown caches */
|
||||
if (procInfo->Relationship == RelationCache
|
||||
&& procInfo->Cache.Type != CacheUnified
|
||||
&& procInfo->Cache.Type != CacheData
|
||||
&& procInfo->Cache.Type != CacheInstruction)
|
||||
continue;
|
||||
if (procInfo->Relationship == RelationCache) {
|
||||
if (!topology->want_some_cpu_caches)
|
||||
/* TODO: check if RelationAll&~RelationCache works? */
|
||||
continue;
|
||||
if (procInfo->Cache.Type != CacheUnified
|
||||
&& procInfo->Cache.Type != CacheData
|
||||
&& procInfo->Cache.Type != CacheInstruction)
|
||||
/* Ignore unknown caches */
|
||||
continue;
|
||||
}
|
||||
|
||||
id = HWLOC_UNKNOWN_INDEX;
|
||||
switch (procInfo->Relationship) {
|
||||
|
|
308
src/3rdparty/hwloc/src/topology-x86.c
vendored
308
src/3rdparty/hwloc/src/topology-x86.c
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2010-2022 Inria. All rights reserved.
|
||||
* Copyright © 2010-2023 Inria. All rights reserved.
|
||||
* Copyright © 2010-2013 Université Bordeaux
|
||||
* Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -38,6 +38,12 @@ struct hwloc_x86_backend_data_s {
|
|||
int apicid_unique;
|
||||
char *src_cpuiddump_path;
|
||||
int is_knl;
|
||||
int is_hybrid;
|
||||
int found_die_ids;
|
||||
int found_complex_ids;
|
||||
int found_unit_ids;
|
||||
int found_module_ids;
|
||||
int found_tile_ids;
|
||||
};
|
||||
|
||||
/************************************
|
||||
|
@ -80,7 +86,7 @@ cpuiddump_read(const char *dirpath, unsigned idx)
|
|||
|
||||
cpuiddump = malloc(sizeof(*cpuiddump));
|
||||
if (!cpuiddump) {
|
||||
fprintf(stderr, "Failed to allocate cpuiddump for PU #%u, ignoring cpuiddump.\n", idx);
|
||||
fprintf(stderr, "hwloc/x86: Failed to allocate cpuiddump for PU #%u, ignoring cpuiddump.\n", idx);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -91,7 +97,7 @@ cpuiddump_read(const char *dirpath, unsigned idx)
|
|||
snprintf(filename, filenamelen, "%s/pu%u", dirpath, idx);
|
||||
file = fopen(filename, "r");
|
||||
if (!file) {
|
||||
fprintf(stderr, "Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename);
|
||||
fprintf(stderr, "hwloc/x86: Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename);
|
||||
goto out_with_filename;
|
||||
}
|
||||
|
||||
|
@ -100,7 +106,7 @@ cpuiddump_read(const char *dirpath, unsigned idx)
|
|||
nr++;
|
||||
cpuiddump->entries = malloc(nr * sizeof(struct cpuiddump_entry));
|
||||
if (!cpuiddump->entries) {
|
||||
fprintf(stderr, "Failed to allocate %u cpuiddump entries for PU #%u, ignoring cpuiddump.\n", nr, idx);
|
||||
fprintf(stderr, "hwloc/x86: Failed to allocate %u cpuiddump entries for PU #%u, ignoring cpuiddump.\n", nr, idx);
|
||||
goto out_with_file;
|
||||
}
|
||||
|
||||
|
@ -156,7 +162,7 @@ cpuiddump_find_by_input(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *e
|
|||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Couldn't find %x,%x,%x,%x in dumped cpuid, returning 0s.\n",
|
||||
fprintf(stderr, "hwloc/x86: Couldn't find %x,%x,%x,%x in dumped cpuid, returning 0s.\n",
|
||||
*eax, *ebx, *ecx, *edx);
|
||||
*eax = 0;
|
||||
*ebx = 0;
|
||||
|
@ -210,7 +216,8 @@ struct procinfo {
|
|||
#define TILE 4
|
||||
#define MODULE 5
|
||||
#define DIE 6
|
||||
#define HWLOC_X86_PROCINFO_ID_NR 7
|
||||
#define COMPLEX 7
|
||||
#define HWLOC_X86_PROCINFO_ID_NR 8
|
||||
unsigned ids[HWLOC_X86_PROCINFO_ID_NR];
|
||||
unsigned *otherids;
|
||||
unsigned levels;
|
||||
|
@ -314,7 +321,7 @@ static void read_amd_caches_topoext(struct procinfo *infos, struct cpuiddump *sr
|
|||
/* the code below doesn't want any other cache yet */
|
||||
assert(!infos->numcaches);
|
||||
|
||||
for (cachenum = 0; ; cachenum++) {
|
||||
for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
|
||||
eax = 0x8000001d;
|
||||
ecx = cachenum;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
|
@ -325,7 +332,7 @@ static void read_amd_caches_topoext(struct procinfo *infos, struct cpuiddump *sr
|
|||
|
||||
cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
|
||||
if (cache) {
|
||||
for (cachenum = 0; ; cachenum++) {
|
||||
for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
|
||||
unsigned long linesize, linepart, ways, sets;
|
||||
eax = 0x8000001d;
|
||||
ecx = cachenum;
|
||||
|
@ -378,7 +385,7 @@ static void read_intel_caches(struct hwloc_x86_backend_data_s *data, struct proc
|
|||
unsigned cachenum;
|
||||
struct cacheinfo *cache;
|
||||
|
||||
for (cachenum = 0; ; cachenum++) {
|
||||
for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
|
||||
eax = 0x04;
|
||||
ecx = cachenum;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
|
@ -400,7 +407,7 @@ static void read_intel_caches(struct hwloc_x86_backend_data_s *data, struct proc
|
|||
infos->cache = tmpcaches;
|
||||
cache = &infos->cache[oldnumcaches];
|
||||
|
||||
for (cachenum = 0; ; cachenum++) {
|
||||
for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
|
||||
unsigned long linesize, linepart, ways, sets;
|
||||
eax = 0x04;
|
||||
ecx = cachenum;
|
||||
|
@ -480,7 +487,7 @@ static void read_amd_cores_legacy(struct procinfo *infos, struct cpuiddump *src_
|
|||
}
|
||||
|
||||
/* AMD unit/node from CPUID 0x8000001e leaf (topoext) */
|
||||
static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags, struct cpuiddump *src_cpuiddump)
|
||||
static void read_amd_cores_topoext(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned long flags, struct cpuiddump *src_cpuiddump)
|
||||
{
|
||||
unsigned apic_id, nodes_per_proc = 0;
|
||||
unsigned eax, ebx, ecx, edx;
|
||||
|
@ -510,6 +517,7 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
|
|||
unsigned cores_per_unit;
|
||||
/* coreid was obtained from read_amd_cores_legacy() earlier */
|
||||
infos->ids[UNIT] = ebx & 0xff;
|
||||
data->found_unit_ids = 1;
|
||||
cores_per_unit = ((ebx >> 8) & 0xff) + 1;
|
||||
hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, infos->ids[NODE], cores_per_unit, infos->ids[UNIT]);
|
||||
/* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor).
|
||||
|
@ -524,20 +532,35 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
|
|||
}
|
||||
}
|
||||
|
||||
/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration) */
|
||||
static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump)
|
||||
/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration)
|
||||
* or AMD complex/ccd from CPUID 0x80000026 (extended CPU topology)
|
||||
*/
|
||||
static void read_extended_topo(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned leaf, enum cpuid_type cpuid_type, struct cpuiddump *src_cpuiddump)
|
||||
{
|
||||
unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id;
|
||||
unsigned level, apic_nextshift, apic_type, apic_id = 0, apic_shift = 0, id;
|
||||
unsigned threadid __hwloc_attribute_unused = 0; /* shut-up compiler */
|
||||
unsigned eax, ebx, ecx = 0, edx;
|
||||
int apic_packageshift = 0;
|
||||
|
||||
for (level = 0; ; level++) {
|
||||
for (level = 0; level<32 /* guard */; level++) {
|
||||
ecx = level;
|
||||
eax = leaf;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
if (!eax && !ebx)
|
||||
break;
|
||||
/* Intel specifies that the 0x0b/0x1f loop should stop when we get "invalid domain" (0 in ecx[8:15])
|
||||
* (if so, we also get 0 in eax/ebx for invalid subleaves).
|
||||
* However AMD rather says that the 0x80000026/0x0b loop should stop when we get "no thread at this level" (0 in ebx[0:15]).
|
||||
* Zhaoxin follows the Intel specs but also returns "no thread at this level" for the last *valid* level (at least on KH-4000).
|
||||
* From the Linux kernel code, it's very likely that AMD also returns "invalid domain"
|
||||
* (because detect_extended_topology() uses that for all x86 CPUs)
|
||||
* but keep with the official doc until AMD can clarify that (see #593).
|
||||
*/
|
||||
if (cpuid_type == amd) {
|
||||
if (!(ebx & 0xffff))
|
||||
break;
|
||||
} else {
|
||||
if (!(ecx & 0xff00))
|
||||
break;
|
||||
}
|
||||
apic_packageshift = eax & 0x1f;
|
||||
}
|
||||
|
||||
|
@ -545,47 +568,73 @@ static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf,
|
|||
infos->otherids = malloc(level * sizeof(*infos->otherids));
|
||||
if (infos->otherids) {
|
||||
infos->levels = level;
|
||||
for (level = 0; ; level++) {
|
||||
for (level = 0; level<32 /* guard */; level++) {
|
||||
ecx = level;
|
||||
eax = leaf;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
if (!eax && !ebx)
|
||||
break;
|
||||
if (cpuid_type == amd) {
|
||||
if (!(ebx & 0xffff))
|
||||
break;
|
||||
} else {
|
||||
if (!(ecx & 0xff00))
|
||||
break;
|
||||
}
|
||||
apic_nextshift = eax & 0x1f;
|
||||
apic_number = ebx & 0xffff;
|
||||
apic_type = (ecx & 0xff00) >> 8;
|
||||
apic_id = edx;
|
||||
id = (apic_id >> apic_shift) & ((1 << (apic_packageshift - apic_shift)) - 1);
|
||||
hwloc_debug("x2APIC %08x %u: nextshift %u num %2u type %u id %2u\n", apic_id, level, apic_nextshift, apic_number, apic_type, id);
|
||||
hwloc_debug("x2APIC %08x %u: nextshift %u nextnumber %2u type %u id %2u\n",
|
||||
apic_id,
|
||||
level,
|
||||
apic_nextshift,
|
||||
ebx & 0xffff /* number of threads in next level */,
|
||||
apic_type,
|
||||
id);
|
||||
infos->apicid = apic_id;
|
||||
infos->otherids[level] = UINT_MAX;
|
||||
switch (apic_type) {
|
||||
case 1:
|
||||
threadid = id;
|
||||
/* apic_number is the actual number of threads per core */
|
||||
break;
|
||||
case 2:
|
||||
infos->ids[CORE] = id;
|
||||
/* apic_number is the actual number of threads per die */
|
||||
break;
|
||||
case 3:
|
||||
infos->ids[MODULE] = id;
|
||||
/* apic_number is the actual number of threads per tile */
|
||||
break;
|
||||
case 4:
|
||||
infos->ids[TILE] = id;
|
||||
/* apic_number is the actual number of threads per die */
|
||||
break;
|
||||
case 5:
|
||||
infos->ids[DIE] = id;
|
||||
/* apic_number is the actual number of threads per package */
|
||||
break;
|
||||
default:
|
||||
hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
|
||||
infos->otherids[level] = apic_id >> apic_shift;
|
||||
break;
|
||||
}
|
||||
apic_shift = apic_nextshift;
|
||||
switch (apic_type) {
|
||||
case 1:
|
||||
threadid = id;
|
||||
break;
|
||||
case 2:
|
||||
infos->ids[CORE] = id;
|
||||
break;
|
||||
case 3:
|
||||
if (leaf == 0x80000026) {
|
||||
data->found_complex_ids = 1;
|
||||
infos->ids[COMPLEX] = id;
|
||||
} else {
|
||||
data->found_module_ids = 1;
|
||||
infos->ids[MODULE] = id;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if (leaf == 0x80000026) {
|
||||
data->found_die_ids = 1;
|
||||
infos->ids[DIE] = id;
|
||||
} else {
|
||||
data->found_tile_ids = 1;
|
||||
infos->ids[TILE] = id;
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
if (leaf == 0x80000026) {
|
||||
goto unknown_type;
|
||||
} else {
|
||||
data->found_die_ids = 1;
|
||||
infos->ids[DIE] = id;
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
/* TODO: "DieGrp" on Intel */
|
||||
/* fallthrough */
|
||||
default:
|
||||
unknown_type:
|
||||
hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
|
||||
infos->otherids[level] = apic_id >> apic_shift;
|
||||
break;
|
||||
}
|
||||
apic_shift = apic_nextshift;
|
||||
}
|
||||
infos->apicid = apic_id;
|
||||
infos->ids[PKG] = apic_id >> apic_shift;
|
||||
|
@ -704,12 +753,13 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
}
|
||||
|
||||
if (highest_cpuid >= 0x1a && has_hybrid(features)) {
|
||||
/* Get hybrid cpu information from cpuid 0x1a */
|
||||
/* Get hybrid cpu information from cpuid 0x1a on Intel */
|
||||
eax = 0x1a;
|
||||
ecx = 0;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
infos->hybridcoretype = eax >> 24;
|
||||
infos->hybridnativemodel = eax & 0xffffff;
|
||||
data->is_hybrid = 1;
|
||||
}
|
||||
|
||||
/*********************************************************************************
|
||||
|
@ -731,23 +781,30 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
*
|
||||
* Only needed when x2apic supported if NUMA nodes are needed.
|
||||
*/
|
||||
read_amd_cores_topoext(infos, flags, src_cpuiddump);
|
||||
read_amd_cores_topoext(data, infos, flags, src_cpuiddump);
|
||||
}
|
||||
|
||||
if ((cpuid_type == intel) && highest_cpuid >= 0x1f) {
|
||||
if ((cpuid_type == amd) && highest_ext_cpuid >= 0x80000026) {
|
||||
/* Get socket/die/complex/core/thread information from cpuid 0x80000026
|
||||
* (AMD Extended CPU Topology)
|
||||
*/
|
||||
read_extended_topo(data, infos, 0x80000026, cpuid_type, src_cpuiddump);
|
||||
|
||||
} else if ((cpuid_type == intel || cpuid_type == zhaoxin) && highest_cpuid >= 0x1f) {
|
||||
/* Get package/die/module/tile/core/thread information from cpuid 0x1f
|
||||
* (Intel v2 Extended Topology Enumeration)
|
||||
*/
|
||||
read_intel_cores_exttopoenum(infos, 0x1f, src_cpuiddump);
|
||||
read_extended_topo(data, infos, 0x1f, cpuid_type, src_cpuiddump);
|
||||
|
||||
} else if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == zhaoxin)
|
||||
&& highest_cpuid >= 0x0b && has_x2apic(features)) {
|
||||
/* Get package/core/thread information from cpuid 0x0b
|
||||
* (Intel v1 Extended Topology Enumeration)
|
||||
*/
|
||||
read_intel_cores_exttopoenum(infos, 0x0b, src_cpuiddump);
|
||||
read_extended_topo(data, infos, 0x0b, cpuid_type, src_cpuiddump);
|
||||
}
|
||||
|
||||
if (backend->topology->want_some_cpu_caches) {
|
||||
/**************************************
|
||||
* Get caches from CPU-specific leaves
|
||||
*/
|
||||
|
@ -845,6 +902,7 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hwloc_bitmap_isset(data->apicid_set, infos->apicid))
|
||||
data->apicid_unique = 0;
|
||||
|
@ -1046,21 +1104,34 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
|
||||
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
|
||||
if (fulldiscovery) {
|
||||
/* Look for AMD Compute units inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
UNIT, "Compute Unit",
|
||||
HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0);
|
||||
/* Look for Intel Modules inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
MODULE, "Module",
|
||||
HWLOC_GROUP_KIND_INTEL_MODULE, 0);
|
||||
/* Look for Intel Tiles inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
TILE, "Tile",
|
||||
HWLOC_GROUP_KIND_INTEL_TILE, 0);
|
||||
if (data->found_unit_ids) {
|
||||
/* Look for AMD Complex inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
COMPLEX, "Complex",
|
||||
HWLOC_GROUP_KIND_AMD_COMPLEX, 0);
|
||||
}
|
||||
if (data->found_unit_ids) {
|
||||
/* Look for AMD Compute units inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
UNIT, "Compute Unit",
|
||||
HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0);
|
||||
}
|
||||
if (data->found_module_ids) {
|
||||
/* Look for Intel Modules inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
MODULE, "Module",
|
||||
HWLOC_GROUP_KIND_INTEL_MODULE, 0);
|
||||
}
|
||||
if (data->found_tile_ids) {
|
||||
/* Look for Intel Tiles inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
TILE, "Tile",
|
||||
HWLOC_GROUP_KIND_INTEL_TILE, 0);
|
||||
}
|
||||
|
||||
/* Look for unknown objects */
|
||||
if (infos[one].otherids) {
|
||||
|
@ -1094,7 +1165,8 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
}
|
||||
}
|
||||
|
||||
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) {
|
||||
if (data->found_die_ids
|
||||
&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) {
|
||||
/* Look for Intel Dies inside packages */
|
||||
if (fulldiscovery) {
|
||||
hwloc_bitmap_t die_cpuset;
|
||||
|
@ -1349,40 +1421,45 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long
|
|||
if (data->apicid_unique) {
|
||||
summarize(backend, infos, flags);
|
||||
|
||||
if (has_hybrid(features) && !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) {
|
||||
if (data->is_hybrid
|
||||
&& !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) {
|
||||
/* use hybrid info for cpukinds */
|
||||
hwloc_bitmap_t atomset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_t coreset = hwloc_bitmap_alloc();
|
||||
for(i=0; i<nbprocs; i++) {
|
||||
if (infos[i].hybridcoretype == 0x20)
|
||||
hwloc_bitmap_set(atomset, i);
|
||||
else if (infos[i].hybridcoretype == 0x40)
|
||||
hwloc_bitmap_set(coreset, i);
|
||||
}
|
||||
/* register IntelAtom set if any */
|
||||
if (!hwloc_bitmap_iszero(atomset)) {
|
||||
struct hwloc_info_s infoattr;
|
||||
infoattr.name = (char *) "CoreType";
|
||||
infoattr.value = (char *) "IntelAtom";
|
||||
hwloc_internal_cpukinds_register(topology, atomset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
|
||||
/* the cpuset is given to the callee */
|
||||
} else {
|
||||
hwloc_bitmap_free(atomset);
|
||||
}
|
||||
/* register IntelCore set if any */
|
||||
if (!hwloc_bitmap_iszero(coreset)) {
|
||||
struct hwloc_info_s infoattr;
|
||||
infoattr.name = (char *) "CoreType";
|
||||
infoattr.value = (char *) "IntelCore";
|
||||
hwloc_internal_cpukinds_register(topology, coreset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
|
||||
/* the cpuset is given to the callee */
|
||||
} else {
|
||||
hwloc_bitmap_free(coreset);
|
||||
if (cpuid_type == intel) {
|
||||
/* Hybrid Intel */
|
||||
hwloc_bitmap_t atomset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_t coreset = hwloc_bitmap_alloc();
|
||||
for(i=0; i<nbprocs; i++) {
|
||||
if (infos[i].hybridcoretype == 0x20)
|
||||
hwloc_bitmap_set(atomset, i);
|
||||
else if (infos[i].hybridcoretype == 0x40)
|
||||
hwloc_bitmap_set(coreset, i);
|
||||
}
|
||||
/* register IntelAtom set if any */
|
||||
if (!hwloc_bitmap_iszero(atomset)) {
|
||||
struct hwloc_info_s infoattr;
|
||||
infoattr.name = (char *) "CoreType";
|
||||
infoattr.value = (char *) "IntelAtom";
|
||||
hwloc_internal_cpukinds_register(topology, atomset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
|
||||
/* the cpuset is given to the callee */
|
||||
} else {
|
||||
hwloc_bitmap_free(atomset);
|
||||
}
|
||||
/* register IntelCore set if any */
|
||||
if (!hwloc_bitmap_iszero(coreset)) {
|
||||
struct hwloc_info_s infoattr;
|
||||
infoattr.name = (char *) "CoreType";
|
||||
infoattr.value = (char *) "IntelCore";
|
||||
hwloc_internal_cpukinds_register(topology, coreset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
|
||||
/* the cpuset is given to the callee */
|
||||
} else {
|
||||
hwloc_bitmap_free(coreset);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
hwloc_debug("x86 APIC IDs aren't unique, x86 discovery ignored.\n");
|
||||
/* do nothing and return success, so that the caller does nothing either */
|
||||
}
|
||||
/* if !data->apicid_unique, do nothing and return success, so that the caller does nothing either */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1459,7 +1536,15 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
|
|||
unsigned i;
|
||||
unsigned highest_cpuid;
|
||||
unsigned highest_ext_cpuid;
|
||||
/* This stores cpuid features with the same indexing as Linux */
|
||||
/* This stores cpuid features with the same indexing as Linux:
|
||||
* [0] = 0x1 edx
|
||||
* [1] = 0x80000001 edx
|
||||
* [4] = 0x1 ecx
|
||||
* [6] = 0x80000001 ecx
|
||||
* [9] = 0x7/0 ebx
|
||||
* [16] = 0x7/0 ecx
|
||||
* [18] = 0x7/0 edx
|
||||
*/
|
||||
unsigned features[19] = { 0 };
|
||||
struct procinfo *infos = NULL;
|
||||
enum cpuid_type cpuid_type = unknown;
|
||||
|
@ -1579,6 +1664,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
|
|||
ecx = 0;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
features[9] = ebx;
|
||||
features[16] = ecx;
|
||||
features[18] = edx;
|
||||
}
|
||||
|
||||
|
@ -1730,17 +1816,17 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s
|
|||
sprintf(path, "%s/hwloc-cpuid-info", src_cpuiddump_path);
|
||||
file = fopen(path, "r");
|
||||
if (!file) {
|
||||
fprintf(stderr, "Couldn't open dumped cpuid summary %s\n", path);
|
||||
fprintf(stderr, "hwloc/x86: Couldn't open dumped cpuid summary %s\n", path);
|
||||
goto out_with_path;
|
||||
}
|
||||
if (!fgets(line, sizeof(line), file)) {
|
||||
fprintf(stderr, "Found read dumped cpuid summary in %s\n", path);
|
||||
fprintf(stderr, "hwloc/x86: Found read dumped cpuid summary in %s\n", path);
|
||||
fclose(file);
|
||||
goto out_with_path;
|
||||
}
|
||||
fclose(file);
|
||||
if (strcmp(line, "Architecture: x86\n")) {
|
||||
fprintf(stderr, "Found non-x86 dumped cpuid summary in %s: %s\n", path, line);
|
||||
fprintf(stderr, "hwloc/x86: Found non-x86 dumped cpuid summary in %s: %s\n", path, line);
|
||||
goto out_with_path;
|
||||
}
|
||||
free(path);
|
||||
|
@ -1752,19 +1838,19 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s
|
|||
if (!*end)
|
||||
hwloc_bitmap_set(set, idx);
|
||||
else
|
||||
fprintf(stderr, "Ignoring invalid dirent `%s' in dumped cpuid directory `%s'\n",
|
||||
fprintf(stderr, "hwloc/x86: Ignoring invalid dirent `%s' in dumped cpuid directory `%s'\n",
|
||||
dirent->d_name, src_cpuiddump_path);
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
|
||||
if (hwloc_bitmap_iszero(set)) {
|
||||
fprintf(stderr, "Did not find any valid pu%%u entry in dumped cpuid directory `%s'\n",
|
||||
fprintf(stderr, "hwloc/x86: Did not find any valid pu%%u entry in dumped cpuid directory `%s'\n",
|
||||
src_cpuiddump_path);
|
||||
return -1;
|
||||
} else if (hwloc_bitmap_last(set) != hwloc_bitmap_weight(set) - 1) {
|
||||
/* The x86 backends enforces contigous set of PUs starting at 0 so far */
|
||||
fprintf(stderr, "Found non-contigous pu%%u range in dumped cpuid directory `%s'\n",
|
||||
fprintf(stderr, "hwloc/x86: Found non-contigous pu%%u range in dumped cpuid directory `%s'\n",
|
||||
src_cpuiddump_path);
|
||||
return -1;
|
||||
}
|
||||
|
@ -1816,9 +1902,15 @@ hwloc_x86_component_instantiate(struct hwloc_topology *topology,
|
|||
|
||||
/* default values */
|
||||
data->is_knl = 0;
|
||||
data->is_hybrid = 0;
|
||||
data->apicid_set = hwloc_bitmap_alloc();
|
||||
data->apicid_unique = 1;
|
||||
data->src_cpuiddump_path = NULL;
|
||||
data->found_die_ids = 0;
|
||||
data->found_complex_ids = 0;
|
||||
data->found_unit_ids = 0;
|
||||
data->found_module_ids = 0;
|
||||
data->found_tile_ids = 0;
|
||||
|
||||
src_cpuiddump_path = getenv("HWLOC_CPUID_PATH");
|
||||
if (src_cpuiddump_path) {
|
||||
|
@ -1829,7 +1921,7 @@ hwloc_x86_component_instantiate(struct hwloc_topology *topology,
|
|||
assert(!hwloc_bitmap_iszero(set)); /* enforced by hwloc_x86_check_cpuiddump_input() */
|
||||
data->nbprocs = hwloc_bitmap_weight(set);
|
||||
} else {
|
||||
fprintf(stderr, "Ignoring dumped cpuid directory.\n");
|
||||
fprintf(stderr, "hwloc/x86: Ignoring dumped cpuid directory.\n");
|
||||
}
|
||||
hwloc_bitmap_free(set);
|
||||
}
|
||||
|
|
|
@ -411,12 +411,12 @@ hwloc_nolibxml_backend_init(struct hwloc_xml_backend_data_s *bdata,
|
|||
bdata->data = nbdata;
|
||||
|
||||
if (xmlbuffer) {
|
||||
nbdata->buffer = malloc(xmlbuflen+1);
|
||||
nbdata->buffer = malloc(xmlbuflen);
|
||||
if (!nbdata->buffer)
|
||||
goto out_with_nbdata;
|
||||
nbdata->buflen = xmlbuflen+1;
|
||||
nbdata->buflen = xmlbuflen;
|
||||
memcpy(nbdata->buffer, xmlbuffer, xmlbuflen);
|
||||
nbdata->buffer[xmlbuflen] = '\0';
|
||||
nbdata->buffer[xmlbuflen-1] = '\0'; /* make sure it's there as requested in the API */
|
||||
|
||||
} else {
|
||||
int err = hwloc_nolibxml_read_file(xmlpath, &nbdata->buffer, &nbdata->buflen);
|
||||
|
@ -453,8 +453,9 @@ hwloc_nolibxml_import_diff(struct hwloc__xml_import_state_s *state,
|
|||
buffer = malloc(xmlbuflen);
|
||||
if (!buffer)
|
||||
goto out;
|
||||
memcpy(buffer, xmlbuffer, xmlbuflen);
|
||||
buflen = xmlbuflen;
|
||||
memcpy(buffer, xmlbuffer, xmlbuflen);
|
||||
buffer[xmlbuflen-1] = '\0'; /* make sure it's there as requested in the API */
|
||||
|
||||
} else {
|
||||
ret = hwloc_nolibxml_read_file(xmlpath, &buffer, &buflen);
|
||||
|
|
77
src/3rdparty/hwloc/src/topology-xml.c
vendored
77
src/3rdparty/hwloc/src/topology-xml.c
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011, 2020 Université Bordeaux
|
||||
* Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -562,7 +562,13 @@ hwloc__xml_import_pagetype(hwloc_topology_t topology __hwloc_attribute_unused, s
|
|||
char *attrname, *attrvalue;
|
||||
if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
|
||||
break;
|
||||
if (!strcmp(attrname, "size"))
|
||||
if (!strcmp(attrname, "info")) {
|
||||
char *infoname, *infovalue;
|
||||
int ret = hwloc___xml_import_info(&infoname, &infovalue, state);
|
||||
if (ret < 0)
|
||||
return -1;
|
||||
/* ignored */
|
||||
} else if (!strcmp(attrname, "size"))
|
||||
size = strtoull(attrvalue, NULL, 10);
|
||||
else if (!strcmp(attrname, "count"))
|
||||
count = strtoull(attrvalue, NULL, 10);
|
||||
|
@ -1160,6 +1166,48 @@ hwloc__xml_import_object(hwloc_topology_t topology,
|
|||
data->last_numanode = obj;
|
||||
}
|
||||
|
||||
/* 3.0 forward compatibility */
|
||||
if (data->version_major >= 3 && obj->type == HWLOC_OBJ_OS_DEVICE) {
|
||||
/* osdev.type changed into bitmak in 3.0 */
|
||||
if (obj->attr->osdev.type & 3 /* STORAGE|MEMORY for BLOCK */) {
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_BLOCK;
|
||||
} else if (obj->attr->osdev.type & 8 /* COPROC for COPROC and rsmi/nvml GPUs */) {
|
||||
if (obj->subtype && (!strcmp(obj->subtype, "RSMI") || !strcmp(obj->subtype, "NVML")))
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;
|
||||
else
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC;
|
||||
} else if (obj->attr->osdev.type & 4 /* GPU for non-COPROC GPUs */) {
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;
|
||||
} else if (obj->attr->osdev.type & 32 /* OFED */) {
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_OPENFABRICS;
|
||||
} else if (obj->attr->osdev.type & 16 /* NET for NET and BXI v2-fake-OFED */) {
|
||||
if (obj->subtype && !strcmp(obj->subtype, "BXI"))
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_OPENFABRICS;
|
||||
else
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_NETWORK;
|
||||
} else if (obj->attr->osdev.type & 64 /* DMA */) {
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_DMA;
|
||||
} else { /* none or unknown */
|
||||
obj->attr->osdev.type = (hwloc_obj_osdev_type_t) -1;
|
||||
}
|
||||
/* Backend info only in root */
|
||||
if (obj->subtype && !hwloc_obj_get_info_by_name(obj, "Backend")) {
|
||||
if (!strcmp(obj->subtype, "CUDA")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "CUDA");
|
||||
} else if (!strcmp(obj->subtype, "NVML")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "NVML");
|
||||
} else if (!strcmp(obj->subtype, "OpenCL")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "OpenCL");
|
||||
} else if (!strcmp(obj->subtype, "RSMI")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "RSMI");
|
||||
} else if (!strcmp(obj->subtype, "LevelZero")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "LevelZero");
|
||||
} else if (!strcmp(obj->subtype, "Display")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "GL");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!hwloc_filter_check_keep_object(topology, obj)) {
|
||||
/* Ignore this object instead of inserting it.
|
||||
*
|
||||
|
@ -1433,7 +1481,14 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
|
|||
if (ret <= 0)
|
||||
break;
|
||||
|
||||
if (!strcmp(tag, "indexes"))
|
||||
if (!strcmp(tag, "info")) {
|
||||
char *infoname, *infovalue;
|
||||
ret = hwloc___xml_import_info(&infoname, &infovalue, state);
|
||||
if (ret < 0)
|
||||
goto out_with_arrays;
|
||||
/* ignored */
|
||||
continue;
|
||||
} else if (!strcmp(tag, "indexes"))
|
||||
is_index = 1;
|
||||
else if (!strcmp(tag, "u64values"))
|
||||
is_u64values = 1;
|
||||
|
@ -1766,6 +1821,10 @@ hwloc__xml_import_memattr(hwloc_topology_t topology,
|
|||
|
||||
if (!strcmp(tag, "memattr_value")) {
|
||||
ret = hwloc__xml_import_memattr_value(topology, id, flags, &childstate);
|
||||
} else if (!strcmp(tag, "info")) {
|
||||
char *infoname, *infovalue;
|
||||
ret = hwloc___xml_import_info(&infoname, &infovalue, &childstate);
|
||||
/* ignored */
|
||||
} else {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: memattr with unrecognized child %s\n",
|
||||
|
@ -2094,9 +2153,10 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
|
|||
if (ret < 0)
|
||||
goto failed;
|
||||
|
||||
if (data->version_major > 2) {
|
||||
if (data->version_major > 3
|
||||
|| (data->version_major == 3 && data->version_minor > 0)) {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: cannot import XML version %u.%u > 2\n",
|
||||
fprintf(stderr, "%s: cannot import XML version %u.%u > 3.0\n",
|
||||
data->msgprefix, data->version_major, data->version_minor);
|
||||
goto err;
|
||||
}
|
||||
|
@ -2144,6 +2204,13 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
|
|||
ret = hwloc__xml_import_cpukind(topology, &childstate);
|
||||
if (ret < 0)
|
||||
goto failed;
|
||||
} else if (!strcmp(tag, "info")) {
|
||||
char *infoname, *infovalue;
|
||||
ret = hwloc___xml_import_info(&infoname, &infovalue, &childstate);
|
||||
if (ret < 0)
|
||||
goto failed;
|
||||
/* move 3.x topology info back to the root object */
|
||||
hwloc_obj_add_info(topology->levels[0][0], infoname, infovalue);
|
||||
} else {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring unknown tag `%s' after root object.\n",
|
||||
|
|
207
src/3rdparty/hwloc/src/topology.c
vendored
207
src/3rdparty/hwloc/src/topology.c
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012, 2020 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright © 2022 IBM Corporation. All rights reserved.
|
||||
|
@ -146,21 +146,24 @@ report_insert_error_format_obj(char *buf, size_t buflen, hwloc_obj_t obj)
|
|||
char typestr[64];
|
||||
char *cpusetstr;
|
||||
char *nodesetstr = NULL;
|
||||
char indexstr[64] = "";
|
||||
char groupstr[64] = "";
|
||||
|
||||
hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0);
|
||||
hwloc_bitmap_asprintf(&cpusetstr, obj->cpuset);
|
||||
if (obj->os_index != HWLOC_UNKNOWN_INDEX)
|
||||
snprintf(indexstr, sizeof(indexstr), "P#%u ", obj->os_index);
|
||||
if (obj->type == HWLOC_OBJ_GROUP)
|
||||
snprintf(groupstr, sizeof(groupstr), "groupkind %u-%u ", obj->attr->group.kind, obj->attr->group.subkind);
|
||||
if (obj->nodeset) /* may be missing during insert */
|
||||
hwloc_bitmap_asprintf(&nodesetstr, obj->nodeset);
|
||||
if (obj->os_index != HWLOC_UNKNOWN_INDEX)
|
||||
snprintf(buf, buflen, "%s (P#%u cpuset %s%s%s)",
|
||||
typestr, obj->os_index, cpusetstr,
|
||||
nodesetstr ? " nodeset " : "",
|
||||
nodesetstr ? nodesetstr : "");
|
||||
else
|
||||
snprintf(buf, buflen, "%s (cpuset %s%s%s)",
|
||||
typestr, cpusetstr,
|
||||
nodesetstr ? " nodeset " : "",
|
||||
nodesetstr ? nodesetstr : "");
|
||||
snprintf(buf, buflen, "%s (%s%s%s%s%scpuset %s%s%s)",
|
||||
typestr,
|
||||
indexstr,
|
||||
obj->subtype ? "subtype " : "", obj->subtype ? obj->subtype : "", obj->subtype ? " " : "",
|
||||
groupstr,
|
||||
cpusetstr,
|
||||
nodesetstr ? " nodeset " : "", nodesetstr ? nodesetstr : "");
|
||||
free(cpusetstr);
|
||||
free(nodesetstr);
|
||||
}
|
||||
|
@ -178,8 +181,9 @@ static void report_insert_error(hwloc_obj_t new, hwloc_obj_t old, const char *ms
|
|||
fprintf(stderr, "****************************************************************************\n");
|
||||
fprintf(stderr, "* hwloc %s received invalid information from the operating system.\n", HWLOC_VERSION);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* Failed with: %s\n", msg);
|
||||
fprintf(stderr, "* while inserting %s at %s\n", newstr, oldstr);
|
||||
fprintf(stderr, "* Failed with error: %s\n", msg);
|
||||
fprintf(stderr, "* while inserting %s\n", newstr);
|
||||
fprintf(stderr, "* at %s\n", oldstr);
|
||||
fprintf(stderr, "* coming from: %s\n", reason);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* The following FAQ entry in the hwloc documentation may help:\n");
|
||||
|
@ -679,7 +683,8 @@ unlink_and_free_object_and_children(hwloc_obj_t *pobj)
|
|||
void
|
||||
hwloc_free_object_and_children(hwloc_obj_t obj)
|
||||
{
|
||||
unlink_and_free_object_and_children(&obj);
|
||||
if (obj)
|
||||
unlink_and_free_object_and_children(&obj);
|
||||
}
|
||||
|
||||
/* Free an object, its next siblings and their children without unlinking from parent.
|
||||
|
@ -1925,6 +1930,22 @@ hwloc_topology_alloc_group_object(struct hwloc_topology *topology)
|
|||
return hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX);
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_topology_free_group_object(struct hwloc_topology *topology, hwloc_obj_t obj)
|
||||
{
|
||||
if (!topology->is_loaded) {
|
||||
/* this could actually work, see insert() below */
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
if (topology->adopted_shmem_addr) {
|
||||
errno = EPERM;
|
||||
return -1;
|
||||
}
|
||||
hwloc_free_unlinked_object(obj);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root);
|
||||
static void propagate_total_memory(hwloc_obj_t obj);
|
||||
static void hwloc_set_group_depth(hwloc_topology_t topology);
|
||||
|
@ -1935,7 +1956,7 @@ static int hwloc_connect_special_levels(hwloc_topology_t topology);
|
|||
hwloc_obj_t
|
||||
hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t obj)
|
||||
{
|
||||
hwloc_obj_t res, root;
|
||||
hwloc_obj_t res, root, child;
|
||||
int cmp;
|
||||
|
||||
if (!topology->is_loaded) {
|
||||
|
@ -1945,6 +1966,7 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t
|
|||
return NULL;
|
||||
}
|
||||
if (topology->adopted_shmem_addr) {
|
||||
hwloc_free_unlinked_object(obj);
|
||||
errno = EPERM;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1998,6 +2020,7 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t
|
|||
res = hwloc__insert_object_by_cpuset(topology, NULL, obj, NULL /* do not show errors on stdout */);
|
||||
} else {
|
||||
/* just merge root */
|
||||
hwloc_free_unlinked_object(obj);
|
||||
res = root;
|
||||
}
|
||||
|
||||
|
@ -2024,6 +2047,13 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t
|
|||
if (hwloc_topology_reconnect(topology, 0) < 0)
|
||||
return NULL;
|
||||
|
||||
/* Compute group total_memory. */
|
||||
res->total_memory = 0;
|
||||
for_each_child(child, res)
|
||||
res->total_memory += child->total_memory;
|
||||
for_each_memory_child(child, res)
|
||||
res->total_memory += child->total_memory;
|
||||
|
||||
hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]);
|
||||
hwloc_set_group_depth(topology);
|
||||
|
||||
|
@ -2254,11 +2284,13 @@ fixup_sets(hwloc_obj_t obj)
|
|||
int
|
||||
hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src)
|
||||
{
|
||||
#define ADD_OTHER_OBJ_SET(_dst, _src, _set) \
|
||||
if ((_src)->_set) { \
|
||||
if (!(_dst)->_set) \
|
||||
(_dst)->_set = hwloc_bitmap_alloc(); \
|
||||
hwloc_bitmap_or((_dst)->_set, (_dst)->_set, (_src)->_set); \
|
||||
#define ADD_OTHER_OBJ_SET(_dst, _src, _set) \
|
||||
if ((_src)->_set) { \
|
||||
if (!(_dst)->_set) \
|
||||
(_dst)->_set = hwloc_bitmap_alloc(); \
|
||||
if (!(_dst)->_set \
|
||||
|| hwloc_bitmap_or((_dst)->_set, (_dst)->_set, (_src)->_set) < 0) \
|
||||
return -1; \
|
||||
}
|
||||
ADD_OTHER_OBJ_SET(dst, src, cpuset);
|
||||
ADD_OTHER_OBJ_SET(dst, src, complete_cpuset);
|
||||
|
@ -3730,6 +3762,7 @@ hwloc__topology_init (struct hwloc_topology **topologyp,
|
|||
|
||||
hwloc__topology_filter_init(topology);
|
||||
|
||||
/* always initialize since we don't know flags to disable those yet */
|
||||
hwloc_internal_distances_init(topology);
|
||||
hwloc_internal_memattrs_init(topology);
|
||||
hwloc_internal_cpukinds_init(topology);
|
||||
|
@ -3942,8 +3975,12 @@ int
|
|||
hwloc_topology_set_cache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter)
|
||||
{
|
||||
unsigned i;
|
||||
for(i=HWLOC_OBJ_L1CACHE; i<HWLOC_OBJ_L3ICACHE; i++)
|
||||
hwloc_topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter);
|
||||
if (topology->is_loaded) {
|
||||
errno = EBUSY;
|
||||
return -1;
|
||||
}
|
||||
for(i=HWLOC_OBJ_L1CACHE; i<=HWLOC_OBJ_L3ICACHE; i++)
|
||||
hwloc__topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3951,17 +3988,25 @@ int
|
|||
hwloc_topology_set_icache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter)
|
||||
{
|
||||
unsigned i;
|
||||
for(i=HWLOC_OBJ_L1ICACHE; i<HWLOC_OBJ_L3ICACHE; i++)
|
||||
hwloc_topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter);
|
||||
if (topology->is_loaded) {
|
||||
errno = EBUSY;
|
||||
return -1;
|
||||
}
|
||||
for(i=HWLOC_OBJ_L1ICACHE; i<=HWLOC_OBJ_L3ICACHE; i++)
|
||||
hwloc__topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_topology_set_io_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter)
|
||||
{
|
||||
hwloc_topology_set_type_filter(topology, HWLOC_OBJ_BRIDGE, filter);
|
||||
hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, filter);
|
||||
hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, filter);
|
||||
if (topology->is_loaded) {
|
||||
errno = EBUSY;
|
||||
return -1;
|
||||
}
|
||||
hwloc__topology_set_type_filter(topology, HWLOC_OBJ_BRIDGE, filter);
|
||||
hwloc__topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, filter);
|
||||
hwloc__topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, filter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3982,9 +4027,12 @@ hwloc_topology_clear (struct hwloc_topology *topology)
|
|||
{
|
||||
/* no need to set to NULL after free() since callers will call setup_defaults() or just destroy the rest of the topology */
|
||||
unsigned l;
|
||||
|
||||
/* always destroy cpukinds/distances/memattrs since there are always initialized during init() */
|
||||
hwloc_internal_cpukinds_destroy(topology);
|
||||
hwloc_internal_distances_destroy(topology);
|
||||
hwloc_internal_memattrs_destroy(topology);
|
||||
|
||||
hwloc_free_object_and_children(topology->levels[0][0]);
|
||||
hwloc_bitmap_free(topology->allowed_cpuset);
|
||||
hwloc_bitmap_free(topology->allowed_nodeset);
|
||||
|
@ -4024,6 +4072,7 @@ hwloc_topology_load (struct hwloc_topology *topology)
|
|||
{
|
||||
struct hwloc_disc_status dstatus;
|
||||
const char *env;
|
||||
unsigned i;
|
||||
int err;
|
||||
|
||||
if (topology->is_loaded) {
|
||||
|
@ -4032,8 +4081,18 @@ hwloc_topology_load (struct hwloc_topology *topology)
|
|||
}
|
||||
|
||||
/* initialize envvar-related things */
|
||||
hwloc_internal_distances_prepare(topology);
|
||||
hwloc_internal_memattrs_prepare(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES))
|
||||
hwloc_internal_distances_prepare(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS))
|
||||
hwloc_internal_memattrs_prepare(topology);
|
||||
|
||||
/* check if any cpu cache filter is not NONE */
|
||||
topology->want_some_cpu_caches = 0;
|
||||
for(i=HWLOC_OBJ_L1CACHE; i<=HWLOC_OBJ_L3ICACHE; i++)
|
||||
if (topology->type_filter[i] != HWLOC_TYPE_FILTER_KEEP_NONE) {
|
||||
topology->want_some_cpu_caches = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (getenv("HWLOC_XML_USERDATA_NOT_DECODED"))
|
||||
topology->userdata_not_decoded = 1;
|
||||
|
@ -4110,23 +4169,32 @@ hwloc_topology_load (struct hwloc_topology *topology)
|
|||
#endif
|
||||
hwloc_topology_check(topology);
|
||||
|
||||
/* Rank cpukinds */
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) {
|
||||
/* Rank cpukinds */
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
}
|
||||
|
||||
/* Mark distances objs arrays as invalid since we may have removed objects
|
||||
* from the topology after adding the distances (remove_empty, etc).
|
||||
* It would be hard to actually verify whether it's needed.
|
||||
*/
|
||||
hwloc_internal_distances_invalidate_cached_objs(topology);
|
||||
/* And refresh distances so that multithreaded concurrent distances_get()
|
||||
* don't refresh() concurrently (disallowed).
|
||||
*/
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES)) {
|
||||
/* Mark distances objs arrays as invalid since we may have removed objects
|
||||
* from the topology after adding the distances (remove_empty, etc).
|
||||
* It would be hard to actually verify whether it's needed.
|
||||
*/
|
||||
hwloc_internal_distances_invalidate_cached_objs(topology);
|
||||
/* And refresh distances so that multithreaded concurrent distances_get()
|
||||
* don't refresh() concurrently (disallowed).
|
||||
*/
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
}
|
||||
|
||||
/* Same for memattrs */
|
||||
hwloc_internal_memattrs_need_refresh(topology);
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
hwloc_internal_memattrs_guess_memory_tiers(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS)) {
|
||||
int force_memtiers = (getenv("HWLOC_MEMTIERS_REFRESH") != NULL);
|
||||
/* Same for memattrs */
|
||||
hwloc_internal_memattrs_need_refresh(topology);
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
/* update memtiers unless XML */
|
||||
if (force_memtiers || strcmp(topology->backends->component->name, "xml"))
|
||||
hwloc_internal_memattrs_guess_memory_tiers(topology, force_memtiers);
|
||||
}
|
||||
|
||||
topology->is_loaded = 1;
|
||||
|
||||
|
@ -4185,20 +4253,11 @@ restrict_object_by_cpuset(hwloc_topology_t topology, unsigned long flags, hwloc_
|
|||
hwloc_bitmap_andnot(obj->cpuset, obj->cpuset, droppedcpuset);
|
||||
hwloc_bitmap_andnot(obj->complete_cpuset, obj->complete_cpuset, droppedcpuset);
|
||||
modified = 1;
|
||||
} else {
|
||||
if ((flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS)
|
||||
&& hwloc_bitmap_iszero(obj->complete_cpuset)) {
|
||||
/* we're empty, there's a NUMAnode below us, it'll be removed this time */
|
||||
modified = 1;
|
||||
}
|
||||
/* nodeset cannot intersect unless cpuset intersects or is empty */
|
||||
if (droppednodeset)
|
||||
assert(!hwloc_bitmap_intersects(obj->complete_nodeset, droppednodeset)
|
||||
|| hwloc_bitmap_iszero(obj->complete_cpuset));
|
||||
}
|
||||
if (droppednodeset) {
|
||||
if (droppednodeset && hwloc_bitmap_intersects(obj->complete_nodeset, droppednodeset)) {
|
||||
hwloc_bitmap_andnot(obj->nodeset, obj->nodeset, droppednodeset);
|
||||
hwloc_bitmap_andnot(obj->complete_nodeset, obj->complete_nodeset, droppednodeset);
|
||||
modified = 1;
|
||||
}
|
||||
|
||||
if (modified) {
|
||||
|
@ -4251,20 +4310,11 @@ restrict_object_by_nodeset(hwloc_topology_t topology, unsigned long flags, hwloc
|
|||
hwloc_bitmap_andnot(obj->nodeset, obj->nodeset, droppednodeset);
|
||||
hwloc_bitmap_andnot(obj->complete_nodeset, obj->complete_nodeset, droppednodeset);
|
||||
modified = 1;
|
||||
} else {
|
||||
if ((flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS)
|
||||
&& hwloc_bitmap_iszero(obj->complete_nodeset)) {
|
||||
/* we're empty, there's a PU below us, it'll be removed this time */
|
||||
modified = 1;
|
||||
}
|
||||
/* cpuset cannot intersect unless nodeset intersects or is empty */
|
||||
if (droppedcpuset)
|
||||
assert(!hwloc_bitmap_intersects(obj->complete_cpuset, droppedcpuset)
|
||||
|| hwloc_bitmap_iszero(obj->complete_nodeset));
|
||||
}
|
||||
if (droppedcpuset) {
|
||||
if (droppedcpuset && hwloc_bitmap_intersects(obj->complete_cpuset, droppedcpuset)) {
|
||||
hwloc_bitmap_andnot(obj->cpuset, obj->cpuset, droppedcpuset);
|
||||
hwloc_bitmap_andnot(obj->complete_cpuset, obj->complete_cpuset, droppedcpuset);
|
||||
modified = 1;
|
||||
}
|
||||
|
||||
if (modified) {
|
||||
|
@ -4433,13 +4483,18 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_bitmap_t se
|
|||
if (hwloc_filter_levels_keep_structure(topology) < 0) /* takes care of reconnecting internally */
|
||||
goto out;
|
||||
|
||||
/* some objects may have disappeared, we need to update distances objs arrays */
|
||||
hwloc_internal_distances_invalidate_cached_objs(topology);
|
||||
hwloc_internal_memattrs_need_refresh(topology);
|
||||
/* some objects may have disappeared and sets were modified,
|
||||
* we need to update distances, etc */
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES))
|
||||
hwloc_internal_distances_invalidate_cached_objs(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS))
|
||||
hwloc_internal_memattrs_need_refresh(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS))
|
||||
hwloc_internal_cpukinds_restrict(topology);
|
||||
|
||||
|
||||
hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]);
|
||||
propagate_total_memory(topology->levels[0][0]);
|
||||
hwloc_internal_cpukinds_restrict(topology);
|
||||
|
||||
#ifndef HWLOC_DEBUG
|
||||
if (getenv("HWLOC_DEBUG_CHECK"))
|
||||
|
@ -4527,9 +4582,12 @@ hwloc_topology_allow(struct hwloc_topology *topology,
|
|||
int
|
||||
hwloc_topology_refresh(struct hwloc_topology *topology)
|
||||
{
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS))
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES))
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS))
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -5081,6 +5139,9 @@ hwloc_topology_check(struct hwloc_topology *topology)
|
|||
for(i=HWLOC_OBJ_TYPE_MIN; i<HWLOC_OBJ_TYPE_MAX; i++)
|
||||
assert(obj_type_order[obj_order_type[i]] == i);
|
||||
|
||||
if (!topology->is_loaded)
|
||||
return;
|
||||
|
||||
depth = hwloc_topology_get_depth(topology);
|
||||
|
||||
assert(!topology->modified);
|
||||
|
|
2
src/3rdparty/libethash/CMakeLists.txt
vendored
2
src/3rdparty/libethash/CMakeLists.txt
vendored
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.1)
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
project (ethash C)
|
||||
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Os")
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -37,7 +37,7 @@ public:
|
|||
};
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
#endif /* XMRIG_CPU_H */
|
||||
#endif // XMRIG_CPU_H
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <support@xmrig.com>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -26,6 +26,12 @@
|
|||
#include "crypto/common/Assembly.h"
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_HWLOC
|
||||
using hwloc_const_bitmap_t = const struct hwloc_bitmap_s *;
|
||||
using hwloc_topology_t = struct hwloc_topology *;
|
||||
#endif
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
|
@ -116,10 +122,16 @@ public:
|
|||
virtual size_t threads() const = 0;
|
||||
virtual Vendor vendor() const = 0;
|
||||
virtual uint32_t model() const = 0;
|
||||
|
||||
# ifdef XMRIG_FEATURE_HWLOC
|
||||
virtual bool membind(hwloc_const_bitmap_t nodeset) = 0;
|
||||
virtual const std::vector<uint32_t> &nodeset() const = 0;
|
||||
virtual hwloc_topology_t topology() const = 0;
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
#endif // XMRIG_CPUINFO_H
|
||||
|
|
|
@ -296,7 +296,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
|||
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
|
||||
m_jccErratum =
|
||||
((model == 0x4E) && (stepping == 0x3)) ||
|
||||
((model == 0x55) && (stepping == 0x4)) ||
|
||||
((model == 0x55) && ((stepping == 0x4) || (stepping == 0x7))) ||
|
||||
((model == 0x5E) && (stepping == 0x3)) ||
|
||||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
|
||||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <support@xmrig.com>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -72,11 +72,10 @@ protected:
|
|||
# endif
|
||||
}
|
||||
|
||||
protected:
|
||||
Arch m_arch = ARCH_UNKNOWN;
|
||||
bool m_jccErratum = false;
|
||||
char m_brand[64 + 6]{};
|
||||
size_t m_threads;
|
||||
size_t m_threads = 0;
|
||||
std::vector<int32_t> m_units;
|
||||
Vendor m_vendor = VENDOR_UNKNOWN;
|
||||
|
||||
|
@ -94,7 +93,7 @@ private:
|
|||
};
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
#endif /* XMRIG_BASICCPUINFO_H */
|
||||
#endif // XMRIG_BASICCPUINFO_H
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <support@xmrig.com>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -36,27 +36,22 @@
|
|||
#include "base/io/log/Log.h"
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
uint32_t HwlocCpuInfo::m_features = 0;
|
||||
|
||||
|
||||
static inline bool isCacheObject(hwloc_obj_t obj)
|
||||
#if HWLOC_API_VERSION < 0x20000
|
||||
static inline int hwloc_obj_type_is_cache(hwloc_obj_type_t type)
|
||||
{
|
||||
# if HWLOC_API_VERSION >= 0x20000
|
||||
return hwloc_obj_type_is_cache(obj->type);
|
||||
# else
|
||||
return obj->type == HWLOC_OBJ_CACHE;
|
||||
# endif
|
||||
return type == HWLOC_OBJ_CACHE;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
template <typename func>
|
||||
static inline void findCache(hwloc_obj_t obj, unsigned min, unsigned max, func lambda)
|
||||
{
|
||||
for (size_t i = 0; i < obj->arity; i++) {
|
||||
if (isCacheObject(obj->children[i])) {
|
||||
if (hwloc_obj_type_is_cache(obj->children[i]->type)) {
|
||||
const unsigned depth = obj->children[i]->attr->cache.depth;
|
||||
if (depth < min || depth > max) {
|
||||
continue;
|
||||
|
@ -174,10 +169,6 @@ xmrig::HwlocCpuInfo::HwlocCpuInfo()
|
|||
m_packages = countByType(m_topology, HWLOC_OBJ_PACKAGE);
|
||||
|
||||
if (m_nodes > 1) {
|
||||
if (hwloc_topology_get_support(m_topology)->membind->set_thisthread_membind) {
|
||||
m_features |= SET_THISTHREAD_MEMBIND;
|
||||
}
|
||||
|
||||
m_nodeset.reserve(m_nodes);
|
||||
hwloc_obj_t node = nullptr;
|
||||
|
||||
|
@ -298,8 +289,10 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
|||
cores.reserve(m_cores);
|
||||
findByType(cache, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); });
|
||||
|
||||
const bool L3_exclusive = isCacheExclusive(cache);
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if ((algorithm == Algorithm::GHOSTRIDER_RTM) && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
|
||||
if ((algorithm == Algorithm::GHOSTRIDER_RTM) && L3_exclusive && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
|
||||
// Don't use E-cores on Alder Lake
|
||||
cores.erase(std::remove_if(cores.begin(), cores.end(), [](hwloc_obj_t c) { return hwloc_bitmap_weight(c->cpuset) == 1; }), cores.end());
|
||||
|
||||
|
@ -311,7 +304,6 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
|||
# endif
|
||||
|
||||
size_t L3 = cache->attr->cache.size;
|
||||
const bool L3_exclusive = isCacheExclusive(cache);
|
||||
size_t L2 = 0;
|
||||
int L2_associativity = 0;
|
||||
size_t extra = 0;
|
||||
|
@ -321,7 +313,7 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
|||
if (cache->attr->cache.depth == 3) {
|
||||
for (size_t i = 0; i < cache->arity; ++i) {
|
||||
hwloc_obj_t l2 = cache->children[i];
|
||||
if (!isCacheObject(l2) || l2->attr == nullptr) {
|
||||
if (!hwloc_obj_type_is_cache(l2->type) || l2->attr == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -349,6 +341,10 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
|||
}
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
if ((algorithm.family() == Algorithm::RANDOM_X) && L3_exclusive && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
|
||||
// Use all L3+L2 on latest Intel CPUs with P-cores, E-cores and exclusive L3 cache
|
||||
cacheHashes = (L3 + L2) / scratchpad;
|
||||
}
|
||||
if (extra == 0 && algorithm.l2() > 0) {
|
||||
cacheHashes = std::min<size_t>(std::max<size_t>(L2 / algorithm.l2(), cores.size()), cacheHashes);
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <support@xmrig.com>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -21,12 +21,9 @@
|
|||
|
||||
|
||||
#include "backend/cpu/platform/BasicCpuInfo.h"
|
||||
#include "base/tools/Object.h"
|
||||
|
||||
|
||||
using hwloc_const_bitmap_t = const struct hwloc_bitmap_s *;
|
||||
using hwloc_obj_t = struct hwloc_obj *;
|
||||
using hwloc_topology_t = struct hwloc_topology *;
|
||||
using hwloc_obj_t = struct hwloc_obj *;
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
@ -37,39 +34,27 @@ class HwlocCpuInfo : public BasicCpuInfo
|
|||
public:
|
||||
XMRIG_DISABLE_COPY_MOVE(HwlocCpuInfo)
|
||||
|
||||
|
||||
enum Feature : uint32_t {
|
||||
SET_THISTHREAD_MEMBIND = 1
|
||||
};
|
||||
|
||||
|
||||
HwlocCpuInfo();
|
||||
~HwlocCpuInfo() override;
|
||||
|
||||
static inline bool hasFeature(Feature feature) { return m_features & feature; }
|
||||
|
||||
inline const std::vector<uint32_t> &nodeset() const { return m_nodeset; }
|
||||
inline hwloc_topology_t topology() const { return m_topology; }
|
||||
|
||||
bool membind(hwloc_const_bitmap_t nodeset);
|
||||
|
||||
protected:
|
||||
bool membind(hwloc_const_bitmap_t nodeset) override;
|
||||
CpuThreads threads(const Algorithm &algorithm, uint32_t limit) const override;
|
||||
|
||||
inline const char *backend() const override { return m_backend; }
|
||||
inline size_t cores() const override { return m_cores; }
|
||||
inline size_t L2() const override { return m_cache[2]; }
|
||||
inline size_t L3() const override { return m_cache[3]; }
|
||||
inline size_t nodes() const override { return m_nodes; }
|
||||
inline size_t packages() const override { return m_packages; }
|
||||
inline const char *backend() const override { return m_backend; }
|
||||
inline const std::vector<uint32_t> &nodeset() const override { return m_nodeset; }
|
||||
inline hwloc_topology_t topology() const override { return m_topology; }
|
||||
inline size_t cores() const override { return m_cores; }
|
||||
inline size_t L2() const override { return m_cache[2]; }
|
||||
inline size_t L3() const override { return m_cache[3]; }
|
||||
inline size_t nodes() const override { return m_nodes; }
|
||||
inline size_t packages() const override { return m_packages; }
|
||||
|
||||
private:
|
||||
CpuThreads allThreads(const Algorithm &algorithm, uint32_t limit) const;
|
||||
void processTopLevelCache(hwloc_obj_t cache, const Algorithm &algorithm, CpuThreads &threads, size_t limit) const;
|
||||
void setThreads(size_t threads);
|
||||
|
||||
static uint32_t m_features;
|
||||
|
||||
char m_backend[20] = { 0 };
|
||||
hwloc_topology_t m_topology = nullptr;
|
||||
size_t m_cache[5] = { 0 };
|
||||
|
@ -80,7 +65,7 @@ private:
|
|||
};
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
#endif /* XMRIG_HWLOCCPUINFO_H */
|
||||
#endif // XMRIG_HWLOCCPUINFO_H
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018 Riku Voipio <riku.voipio@iki.fi>
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <support@xmrig.com>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -17,7 +17,6 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "base/tools/String.h"
|
||||
#include "3rdparty/fmt/core.h"
|
||||
|
||||
|
@ -84,6 +83,7 @@ static const id_part arm_part[] = {
|
|||
{ 0xc27, "Cortex-M7" },
|
||||
{ 0xc60, "Cortex-M0+" },
|
||||
{ 0xd01, "Cortex-A32" },
|
||||
{ 0xd02, "Cortex-A34" },
|
||||
{ 0xd03, "Cortex-A53" },
|
||||
{ 0xd04, "Cortex-A35" },
|
||||
{ 0xd05, "Cortex-A55" },
|
||||
|
@ -97,40 +97,60 @@ static const id_part arm_part[] = {
|
|||
{ 0xd0d, "Cortex-A77" },
|
||||
{ 0xd0e, "Cortex-A76AE" },
|
||||
{ 0xd13, "Cortex-R52" },
|
||||
{ 0xd15, "Cortex-R82" },
|
||||
{ 0xd20, "Cortex-M23" },
|
||||
{ 0xd21, "Cortex-M33" },
|
||||
{ 0xd40, "Neoverse-V1" },
|
||||
{ 0xd41, "Cortex-A78" },
|
||||
{ 0xd42, "Cortex-A78AE" },
|
||||
{ 0xd43, "Cortex-A65AE" },
|
||||
{ 0xd44, "Cortex-X1" },
|
||||
{ 0xd46, "Cortex-A510" },
|
||||
{ 0xd47, "Cortex-A710" },
|
||||
{ 0xd48, "Cortex-X2" },
|
||||
{ 0xd49, "Neoverse-N2" },
|
||||
{ 0xd4a, "Neoverse-E1" },
|
||||
{ 0xd4b, "Cortex-A78C" },
|
||||
{ -1, nullptr },
|
||||
{ 0xd4c, "Cortex-X1C" },
|
||||
{ 0xd4d, "Cortex-A715" },
|
||||
{ 0xd4e, "Cortex-X3" },
|
||||
{ 0xd4f, "Neoverse-V2" },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
static const id_part brcm_part[] = {
|
||||
{ 0x0f, "Brahma B15" },
|
||||
{ 0x100, "Brahma B53" },
|
||||
{ 0x0f, "Brahma-B15" },
|
||||
{ 0x100, "Brahma-B53" },
|
||||
{ 0x516, "ThunderX2" },
|
||||
{ -1, nullptr },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
static const id_part dec_part[] = {
|
||||
{ 0xa10, "SA110" },
|
||||
{ 0xa11, "SA1100" },
|
||||
{ -1, nullptr },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
static const id_part cavium_part[] = {
|
||||
{ 0x0a0, "ThunderX" },
|
||||
{ 0x0a1, "ThunderX 88XX" },
|
||||
{ 0x0a2, "ThunderX 81XX" },
|
||||
{ 0x0a3, "ThunderX 83XX" },
|
||||
{ 0x0af, "ThunderX2 99xx" },
|
||||
{ -1, nullptr },
|
||||
{ 0x0a1, "ThunderX-88XX" },
|
||||
{ 0x0a2, "ThunderX-81XX" },
|
||||
{ 0x0a3, "ThunderX-83XX" },
|
||||
{ 0x0af, "ThunderX2-99xx" },
|
||||
{ 0x0b0, "OcteonTX2" },
|
||||
{ 0x0b1, "OcteonTX2-98XX" },
|
||||
{ 0x0b2, "OcteonTX2-96XX" },
|
||||
{ 0x0b3, "OcteonTX2-95XX" },
|
||||
{ 0x0b4, "OcteonTX2-95XXN" },
|
||||
{ 0x0b5, "OcteonTX2-95XXMM" },
|
||||
{ 0x0b6, "OcteonTX2-95XXO" },
|
||||
{ 0x0b8, "ThunderX3-T110" },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
static const id_part apm_part[] = {
|
||||
{ 0x000, "X-Gene" },
|
||||
{ -1, nullptr },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
static const id_part qcom_part[] = {
|
||||
|
@ -141,36 +161,43 @@ static const id_part qcom_part[] = {
|
|||
{ 0x201, "Kryo" },
|
||||
{ 0x205, "Kryo" },
|
||||
{ 0x211, "Kryo" },
|
||||
{ 0x800, "Falkor V1/Kryo" },
|
||||
{ 0x801, "Kryo V2" },
|
||||
{ 0x800, "Falkor-V1/Kryo" },
|
||||
{ 0x801, "Kryo-V2" },
|
||||
{ 0x802, "Kryo-3XX-Gold" },
|
||||
{ 0x803, "Kryo-3XX-Silver" },
|
||||
{ 0x804, "Kryo-4XX-Gold" },
|
||||
{ 0x805, "Kryo-4XX-Silver" },
|
||||
{ 0xc00, "Falkor" },
|
||||
{ 0xc01, "Saphira" },
|
||||
{ -1, nullptr },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
static const id_part samsung_part[] = {
|
||||
{ 0x001, "exynos-m1" },
|
||||
{ -1, nullptr },
|
||||
{ 0x002, "exynos-m3" },
|
||||
{ 0x003, "exynos-m4" },
|
||||
{ 0x004, "exynos-m5" },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
static const id_part nvidia_part[] = {
|
||||
{ 0x000, "Denver" },
|
||||
{ 0x003, "Denver 2" },
|
||||
{ 0x004, "Carmel" },
|
||||
{ -1, nullptr },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
static const id_part marvell_part[] = {
|
||||
{ 0x131, "Feroceon 88FR131" },
|
||||
{ 0x131, "Feroceon-88FR131" },
|
||||
{ 0x581, "PJ4/PJ4b" },
|
||||
{ 0x584, "PJ4B-MP" },
|
||||
{ -1, nullptr },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
static const id_part faraday_part[] = {
|
||||
{ 0x526, "FA526" },
|
||||
{ 0x626, "FA626" },
|
||||
{ -1, nullptr },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
static const id_part intel_part[] = {
|
||||
|
@ -195,23 +222,50 @@ static const id_part intel_part[] = {
|
|||
{ 0x689, "PXA31x" },
|
||||
{ 0xb11, "SA1110" },
|
||||
{ 0xc12, "IPX1200" },
|
||||
{ -1, nullptr },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
static const struct id_part fujitsu_part[] = {
|
||||
{ 0x001, "A64FX" },
|
||||
{ -1, "unknown" },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
static const id_part hisi_part[] = {
|
||||
{ 0xd01, "Kunpeng-920" }, /* aka tsv110 */
|
||||
{ -1, nullptr },
|
||||
{ 0xd01, "Kunpeng-920" }, /* aka tsv110 */
|
||||
{ 0xd40, "Cortex-A76" }, /* HiSilicon uses this ID though advertises A76 */
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
static const id_part apple_part[] = {
|
||||
{ 0x022, "M1" },
|
||||
{ 0x023, "M1" },
|
||||
{ -1, nullptr },
|
||||
{ 0x024, "M1-Pro" },
|
||||
{ 0x025, "M1-Pro" },
|
||||
{ 0x028, "M1-Max" },
|
||||
{ 0x029, "M1-Max" },
|
||||
{ 0x032, "M2" },
|
||||
{ 0x033, "M2" },
|
||||
{ 0x034, "M2-Pro" },
|
||||
{ 0x035, "M2-Pro" },
|
||||
{ 0x038, "M2-Max" },
|
||||
{ 0x039, "M2-Max" },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
|
||||
static const struct id_part ft_part[] = {
|
||||
{ 0x660, "FTC660" },
|
||||
{ 0x661, "FTC661" },
|
||||
{ 0x662, "FTC662" },
|
||||
{ 0x663, "FTC663" },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
|
||||
static const struct id_part ampere_part[] = {
|
||||
{ 0xac3, "Ampere-1" },
|
||||
{ 0xac4, "Ampere-1a" },
|
||||
{ -1, nullptr }
|
||||
};
|
||||
|
||||
|
||||
|
@ -229,7 +283,9 @@ static const hw_impl hw_implementer[] = {
|
|||
{ 0x56, marvell_part, "Marvell" },
|
||||
{ 0x61, apple_part, "Apple" },
|
||||
{ 0x66, faraday_part, "Faraday" },
|
||||
{ 0x69, intel_part, "Intel" }
|
||||
{ 0x69, intel_part, "Intel" },
|
||||
{ 0x70, ft_part, "Phytium" },
|
||||
{ 0xc0, ampere_part, "Ampere" }
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -353,13 +353,9 @@ bool xmrig::CudaLib::open()
|
|||
# ifdef XMRIG_OS_LINUX
|
||||
if (m_loader == defaultLoader) {
|
||||
m_loader = Process::location(Process::ExeLocation, m_loader);
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (uv_dlopen(m_loader, &cudaLib) == 0) {
|
||||
return true;
|
||||
if (uv_dlopen(m_loader, &cudaLib) == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
|
|
|
@ -138,6 +138,93 @@ __kernel void blake2b_initial_hash(__global void *out, __global const void* bloc
|
|||
t[7] = hash[7];
|
||||
}
|
||||
|
||||
void blake2b_512_process_double_block_variable(ulong *out, ulong* m, __global const ulong* in, uint in_len, uint out_len)
|
||||
{
|
||||
ulong v[16] =
|
||||
{
|
||||
iv0 ^ (0x01010000u | out_len), iv1, iv2, iv3, iv4 , iv5, iv6, iv7,
|
||||
iv0 , iv1, iv2, iv3, iv4 ^ 128, iv5, iv6, iv7,
|
||||
};
|
||||
|
||||
BLAKE2B_ROUNDS();
|
||||
|
||||
ulong h[8];
|
||||
v[0] = h[0] = v[0] ^ v[8] ^ iv0 ^ (0x01010000u | out_len);
|
||||
v[1] = h[1] = v[1] ^ v[9] ^ iv1;
|
||||
v[2] = h[2] = v[2] ^ v[10] ^ iv2;
|
||||
v[3] = h[3] = v[3] ^ v[11] ^ iv3;
|
||||
v[4] = h[4] = v[4] ^ v[12] ^ iv4;
|
||||
v[5] = h[5] = v[5] ^ v[13] ^ iv5;
|
||||
v[6] = h[6] = v[6] ^ v[14] ^ iv6;
|
||||
v[7] = h[7] = v[7] ^ v[15] ^ iv7;
|
||||
v[8] = iv0;
|
||||
v[9] = iv1;
|
||||
v[10] = iv2;
|
||||
v[11] = iv3;
|
||||
v[12] = iv4 ^ in_len;
|
||||
v[13] = iv5;
|
||||
v[14] = ~iv6;
|
||||
v[15] = iv7;
|
||||
|
||||
m[ 0] = (in_len > 128) ? in[16] : 0;
|
||||
m[ 1] = (in_len > 136) ? in[17] : 0;
|
||||
m[ 2] = (in_len > 144) ? in[18] : 0;
|
||||
m[ 3] = (in_len > 152) ? in[19] : 0;
|
||||
m[ 4] = (in_len > 160) ? in[20] : 0;
|
||||
m[ 5] = (in_len > 168) ? in[21] : 0;
|
||||
m[ 6] = (in_len > 176) ? in[22] : 0;
|
||||
m[ 7] = (in_len > 184) ? in[23] : 0;
|
||||
m[ 8] = (in_len > 192) ? in[24] : 0;
|
||||
m[ 9] = (in_len > 200) ? in[25] : 0;
|
||||
m[10] = (in_len > 208) ? in[26] : 0;
|
||||
m[11] = (in_len > 216) ? in[27] : 0;
|
||||
m[12] = (in_len > 224) ? in[28] : 0;
|
||||
m[13] = (in_len > 232) ? in[29] : 0;
|
||||
m[14] = (in_len > 240) ? in[30] : 0;
|
||||
m[15] = (in_len > 248) ? in[31] : 0;
|
||||
|
||||
if (in_len % sizeof(ulong))
|
||||
m[(in_len - 128) / sizeof(ulong)] &= (ulong)(-1) >> (64 - (in_len % sizeof(ulong)) * 8);
|
||||
|
||||
BLAKE2B_ROUNDS();
|
||||
|
||||
if (out_len > 0) out[0] = h[0] ^ v[0] ^ v[8];
|
||||
if (out_len > 8) out[1] = h[1] ^ v[1] ^ v[9];
|
||||
if (out_len > 16) out[2] = h[2] ^ v[2] ^ v[10];
|
||||
if (out_len > 24) out[3] = h[3] ^ v[3] ^ v[11];
|
||||
if (out_len > 32) out[4] = h[4] ^ v[4] ^ v[12];
|
||||
if (out_len > 40) out[5] = h[5] ^ v[5] ^ v[13];
|
||||
if (out_len > 48) out[6] = h[6] ^ v[6] ^ v[14];
|
||||
if (out_len > 56) out[7] = h[7] ^ v[7] ^ v[15];
|
||||
}
|
||||
|
||||
__attribute__((reqd_work_group_size(64, 1, 1)))
|
||||
__kernel void blake2b_initial_hash_double(__global void *out, __global const void* blockTemplate, uint blockTemplateSize, uint start_nonce)
|
||||
{
|
||||
const uint global_index = get_global_id(0);
|
||||
|
||||
__global const ulong* p = (__global const ulong*) blockTemplate;
|
||||
|
||||
ulong m[16] = { p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15] };
|
||||
|
||||
const ulong nonce = start_nonce + global_index;
|
||||
m[4] = (m[4] & ((ulong)(-1) >> 8)) | (nonce << 56);
|
||||
m[5] = (m[5] & ((ulong)(-1) << 24)) | (nonce >> 8);
|
||||
|
||||
ulong hash[8];
|
||||
blake2b_512_process_double_block_variable(hash, m, p, blockTemplateSize, 64);
|
||||
|
||||
__global ulong* t = ((__global ulong*) out) + global_index * 8;
|
||||
t[0] = hash[0];
|
||||
t[1] = hash[1];
|
||||
t[2] = hash[2];
|
||||
t[3] = hash[3];
|
||||
t[4] = hash[4];
|
||||
t[5] = hash[5];
|
||||
t[6] = hash[6];
|
||||
t[7] = hash[7];
|
||||
}
|
||||
|
||||
#define in_len 256
|
||||
|
||||
#define out_len 32
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,58 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "backend/opencl/kernels/rx/Blake2bInitialHashDoubleKernel.h"
|
||||
#include "backend/opencl/wrappers/OclLib.h"
|
||||
|
||||
|
||||
void xmrig::Blake2bInitialHashDoubleKernel::enqueue(cl_command_queue queue, size_t threads)
|
||||
{
|
||||
const size_t gthreads = threads;
|
||||
static const size_t lthreads = 64;
|
||||
|
||||
enqueueNDRange(queue, 1, nullptr, >hreads, <hreads);
|
||||
}
|
||||
|
||||
|
||||
// __kernel void blake2b_initial_hash_double(__global void *out, __global const void* blockTemplate, uint blockTemplateSize, uint start_nonce)
|
||||
void xmrig::Blake2bInitialHashDoubleKernel::setArgs(cl_mem out, cl_mem blockTemplate)
|
||||
{
|
||||
setArg(0, sizeof(cl_mem), &out);
|
||||
setArg(1, sizeof(cl_mem), &blockTemplate);
|
||||
}
|
||||
|
||||
|
||||
void xmrig::Blake2bInitialHashDoubleKernel::setBlobSize(size_t size)
|
||||
{
|
||||
const uint32_t s = size;
|
||||
|
||||
setArg(2, sizeof(uint32_t), &s);
|
||||
}
|
||||
|
||||
|
||||
void xmrig::Blake2bInitialHashDoubleKernel::setNonce(uint32_t nonce)
|
||||
{
|
||||
setArg(3, sizeof(uint32_t), &nonce);
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_BLAKE2BINITIALHASHDOUBLEKERNEL_H
|
||||
#define XMRIG_BLAKE2BINITIALHASHDOUBLEKERNEL_H
|
||||
|
||||
|
||||
#include "backend/opencl/wrappers/OclKernel.h"
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
class Blake2bInitialHashDoubleKernel : public OclKernel
|
||||
{
|
||||
public:
|
||||
inline Blake2bInitialHashDoubleKernel(cl_program program) : OclKernel(program, "blake2b_initial_hash_double") {}
|
||||
|
||||
void enqueue(cl_command_queue queue, size_t threads);
|
||||
void setArgs(cl_mem out, cl_mem blockTemplate);
|
||||
void setBlobSize(size_t size);
|
||||
void setNonce(uint32_t nonce);
|
||||
};
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
#endif /* XMRIG_BLAKE2BINITIALHASHDOUBLEKERNEL_H */
|
|
@ -80,6 +80,7 @@ if (WITH_OPENCL)
|
|||
if (WITH_RANDOMX)
|
||||
list(APPEND HEADERS_BACKEND_OPENCL
|
||||
src/backend/opencl/kernels/rx/Blake2bHashRegistersKernel.h
|
||||
src/backend/opencl/kernels/rx/Blake2bInitialHashDoubleKernel.h
|
||||
src/backend/opencl/kernels/rx/Blake2bInitialHashKernel.h
|
||||
src/backend/opencl/kernels/rx/ExecuteVmKernel.h
|
||||
src/backend/opencl/kernels/rx/FillAesKernel.h
|
||||
|
@ -96,6 +97,7 @@ if (WITH_OPENCL)
|
|||
list(APPEND SOURCES_BACKEND_OPENCL
|
||||
src/backend/opencl/generators/ocl_generic_rx_generator.cpp
|
||||
src/backend/opencl/kernels/rx/Blake2bHashRegistersKernel.cpp
|
||||
src/backend/opencl/kernels/rx/Blake2bInitialHashDoubleKernel.cpp
|
||||
src/backend/opencl/kernels/rx/Blake2bInitialHashKernel.cpp
|
||||
src/backend/opencl/kernels/rx/ExecuteVmKernel.cpp
|
||||
src/backend/opencl/kernels/rx/FillAesKernel.cpp
|
||||
|
|
|
@ -23,6 +23,9 @@
|
|||
*/
|
||||
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
#include "backend/opencl/runners/OclBaseRunner.h"
|
||||
#include "backend/opencl/cl/OclSource.h"
|
||||
#include "backend/opencl/OclCache.h"
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
#include "backend/opencl/runners/OclCnRunner.h"
|
||||
#include "backend/opencl/kernels/Cn0Kernel.h"
|
||||
#include "backend/opencl/kernels/Cn1Kernel.h"
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
#include "backend/opencl/runners/OclKawPowRunner.h"
|
||||
#include "backend/common/Tags.h"
|
||||
#include "3rdparty/libethash/ethash_internal.h"
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "backend/opencl/runners/OclRxBaseRunner.h"
|
||||
#include "backend/opencl/kernels/rx/Blake2bHashRegistersKernel.h"
|
||||
#include "backend/opencl/kernels/rx/Blake2bInitialHashKernel.h"
|
||||
#include "backend/opencl/kernels/rx/Blake2bInitialHashDoubleKernel.h"
|
||||
#include "backend/opencl/kernels/rx/FillAesKernel.h"
|
||||
#include "backend/opencl/kernels/rx/FindSharesKernel.h"
|
||||
#include "backend/opencl/kernels/rx/HashAesKernel.h"
|
||||
|
@ -71,6 +72,7 @@ xmrig::OclRxBaseRunner::~OclRxBaseRunner()
|
|||
delete m_fillAes4Rx4_entropy;
|
||||
delete m_hashAes1Rx4;
|
||||
delete m_blake2b_initial_hash;
|
||||
delete m_blake2b_initial_hash_double;
|
||||
delete m_blake2b_hash_registers_32;
|
||||
delete m_blake2b_hash_registers_64;
|
||||
delete m_find_shares;
|
||||
|
@ -87,12 +89,28 @@ void xmrig::OclRxBaseRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
|||
{
|
||||
static const uint32_t zero = 0;
|
||||
|
||||
m_blake2b_initial_hash->setNonce(nonce);
|
||||
if (m_jobSize <= 128) {
|
||||
m_blake2b_initial_hash->setNonce(nonce);
|
||||
}
|
||||
else if (m_jobSize <= 256) {
|
||||
m_blake2b_initial_hash_double->setNonce(nonce);
|
||||
}
|
||||
else {
|
||||
hashOutput[0xFF] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
m_find_shares->setNonce(nonce);
|
||||
|
||||
enqueueWriteBuffer(m_output, CL_FALSE, sizeof(cl_uint) * 0xFF, sizeof(uint32_t), &zero);
|
||||
|
||||
m_blake2b_initial_hash->enqueue(m_queue, m_intensity);
|
||||
if (m_jobSize <= 128) {
|
||||
m_blake2b_initial_hash->enqueue(m_queue, m_intensity);
|
||||
}
|
||||
else {
|
||||
m_blake2b_initial_hash_double->enqueue(m_queue, m_intensity);
|
||||
}
|
||||
|
||||
m_fillAes1Rx4_scratchpad->enqueue(m_queue, m_intensity);
|
||||
|
||||
const uint32_t programCount = RxAlgo::programCount(m_algorithm);
|
||||
|
@ -134,7 +152,11 @@ void xmrig::OclRxBaseRunner::set(const Job &job, uint8_t *blob)
|
|||
|
||||
enqueueWriteBuffer(m_input, CL_TRUE, 0, Job::kMaxBlobSize, blob);
|
||||
|
||||
m_jobSize = job.size();
|
||||
|
||||
m_blake2b_initial_hash->setBlobSize(job.size());
|
||||
m_blake2b_initial_hash_double->setBlobSize(job.size());
|
||||
|
||||
m_find_shares->setTarget(job.target());
|
||||
}
|
||||
|
||||
|
@ -166,6 +188,9 @@ void xmrig::OclRxBaseRunner::build()
|
|||
m_blake2b_initial_hash = new Blake2bInitialHashKernel(m_program);
|
||||
m_blake2b_initial_hash->setArgs(m_hashes, m_input);
|
||||
|
||||
m_blake2b_initial_hash_double = new Blake2bInitialHashDoubleKernel(m_program);
|
||||
m_blake2b_initial_hash_double->setArgs(m_hashes, m_input);
|
||||
|
||||
m_blake2b_hash_registers_32 = new Blake2bHashRegistersKernel(m_program, "blake2b_hash_registers_32");
|
||||
m_blake2b_hash_registers_64 = new Blake2bHashRegistersKernel(m_program, "blake2b_hash_registers_64");
|
||||
|
||||
|
|
|
@ -35,6 +35,7 @@ namespace xmrig {
|
|||
|
||||
class Blake2bHashRegistersKernel;
|
||||
class Blake2bInitialHashKernel;
|
||||
class Blake2bInitialHashDoubleKernel;
|
||||
class FillAesKernel;
|
||||
class FindSharesKernel;
|
||||
class HashAesKernel;
|
||||
|
@ -58,21 +59,24 @@ protected:
|
|||
protected:
|
||||
virtual void execute(uint32_t iteration) = 0;
|
||||
|
||||
Blake2bHashRegistersKernel *m_blake2b_hash_registers_32 = nullptr;
|
||||
Blake2bHashRegistersKernel *m_blake2b_hash_registers_64 = nullptr;
|
||||
Blake2bInitialHashKernel *m_blake2b_initial_hash = nullptr;
|
||||
Blake2bHashRegistersKernel *m_blake2b_hash_registers_32 = nullptr;
|
||||
Blake2bHashRegistersKernel *m_blake2b_hash_registers_64 = nullptr;
|
||||
Blake2bInitialHashKernel *m_blake2b_initial_hash = nullptr;
|
||||
Blake2bInitialHashDoubleKernel *m_blake2b_initial_hash_double = nullptr;
|
||||
Buffer m_seed;
|
||||
cl_mem m_dataset = nullptr;
|
||||
cl_mem m_entropy = nullptr;
|
||||
cl_mem m_hashes = nullptr;
|
||||
cl_mem m_rounding = nullptr;
|
||||
cl_mem m_scratchpads = nullptr;
|
||||
FillAesKernel *m_fillAes1Rx4_scratchpad = nullptr;
|
||||
FillAesKernel *m_fillAes4Rx4_entropy = nullptr;
|
||||
FindSharesKernel *m_find_shares = nullptr;
|
||||
HashAesKernel *m_hashAes1Rx4 = nullptr;
|
||||
uint32_t m_gcn_version = 12;
|
||||
uint32_t m_worksize = 8;
|
||||
cl_mem m_dataset = nullptr;
|
||||
cl_mem m_entropy = nullptr;
|
||||
cl_mem m_hashes = nullptr;
|
||||
cl_mem m_rounding = nullptr;
|
||||
cl_mem m_scratchpads = nullptr;
|
||||
FillAesKernel *m_fillAes1Rx4_scratchpad = nullptr;
|
||||
FillAesKernel *m_fillAes4Rx4_entropy = nullptr;
|
||||
FindSharesKernel *m_find_shares = nullptr;
|
||||
HashAesKernel *m_hashAes1Rx4 = nullptr;
|
||||
uint32_t m_gcn_version = 12;
|
||||
uint32_t m_worksize = 8;
|
||||
|
||||
size_t m_jobSize = 0;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
#include "backend/opencl/runners/OclRxJitRunner.h"
|
||||
#include "backend/opencl/cl/rx/randomx_run_gfx803.h"
|
||||
#include "backend/opencl/cl/rx/randomx_run_gfx900.h"
|
||||
|
|
|
@ -399,6 +399,9 @@ private:
|
|||
uv_loop_t* m_loop = nullptr;
|
||||
uv_thread_t m_loopThread = {};
|
||||
uv_async_t m_shutdownAsync = {};
|
||||
uv_async_t m_batonAsync = {};
|
||||
|
||||
std::vector<KawPowBaton> m_batons;
|
||||
|
||||
static void loop(void* data)
|
||||
{
|
||||
|
@ -419,19 +422,37 @@ void KawPowBuilder::build_async(const IOclRunner& runner, uint64_t period, uint3
|
|||
if (!m_loop) {
|
||||
m_loop = new uv_loop_t{};
|
||||
uv_loop_init(m_loop);
|
||||
uv_async_init(m_loop, &m_shutdownAsync, [](uv_async_t* handle) { uv_close(reinterpret_cast<uv_handle_t*>(handle), nullptr); });
|
||||
|
||||
uv_async_init(m_loop, &m_shutdownAsync, [](uv_async_t* handle)
|
||||
{
|
||||
KawPowBuilder* builder = reinterpret_cast<KawPowBuilder*>(handle->data);
|
||||
uv_close(reinterpret_cast<uv_handle_t*>(&builder->m_shutdownAsync), nullptr);
|
||||
uv_close(reinterpret_cast<uv_handle_t*>(&builder->m_batonAsync), nullptr);
|
||||
});
|
||||
|
||||
uv_async_init(m_loop, &m_batonAsync, [](uv_async_t* handle)
|
||||
{
|
||||
std::vector<KawPowBaton> batons;
|
||||
{
|
||||
KawPowBuilder* b = reinterpret_cast<KawPowBuilder*>(handle->data);
|
||||
|
||||
std::lock_guard<std::mutex> lock(b->m_mutex);
|
||||
batons = std::move(b->m_batons);
|
||||
}
|
||||
|
||||
for (const KawPowBaton& baton : batons) {
|
||||
builder.build(baton.runner, baton.period, baton.worksize);
|
||||
}
|
||||
});
|
||||
|
||||
m_shutdownAsync.data = this;
|
||||
m_batonAsync.data = this;
|
||||
|
||||
uv_thread_create(&m_loopThread, loop, this);
|
||||
}
|
||||
|
||||
KawPowBaton* baton = new KawPowBaton(runner, period, worksize);
|
||||
|
||||
uv_queue_work(m_loop, &baton->req,
|
||||
[](uv_work_t* req) {
|
||||
KawPowBaton* baton = static_cast<KawPowBaton*>(req->data);
|
||||
builder.build(baton->runner, baton->period, baton->worksize);
|
||||
},
|
||||
[](uv_work_t* req, int) { delete static_cast<KawPowBaton*>(req->data); }
|
||||
);
|
||||
m_batons.emplace_back(runner, period, worksize);
|
||||
uv_async_send(&m_batonAsync);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2024 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -25,6 +25,8 @@
|
|||
#include "base/crypto/keccak.h"
|
||||
#include "base/io/Env.h"
|
||||
#include "base/io/json/Json.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/io/log/Tags.h"
|
||||
#include "base/kernel/Base.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include "base/tools/Cvt.h"
|
||||
|
@ -91,7 +93,11 @@ xmrig::Api::Api(Base *base) :
|
|||
xmrig::Api::~Api()
|
||||
{
|
||||
# ifdef XMRIG_FEATURE_HTTP
|
||||
delete m_httpd;
|
||||
if (m_httpd) {
|
||||
m_httpd->stop();
|
||||
delete m_httpd;
|
||||
m_httpd = nullptr; // Ensure the pointer is set to nullptr after deletion
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
|
@ -109,8 +115,15 @@ void xmrig::Api::start()
|
|||
genWorkerId(m_base->config()->apiWorkerId());
|
||||
|
||||
# ifdef XMRIG_FEATURE_HTTP
|
||||
m_httpd = new Httpd(m_base);
|
||||
m_httpd->start();
|
||||
if (!m_httpd) {
|
||||
m_httpd = new Httpd(m_base);
|
||||
if (!m_httpd->start()) {
|
||||
LOG_ERR("%s " RED_BOLD("HTTP API server failed to start."), Tags::network());
|
||||
|
||||
delete m_httpd; // Properly handle failure to start
|
||||
m_httpd = nullptr;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
|
@ -118,7 +131,26 @@ void xmrig::Api::start()
|
|||
void xmrig::Api::stop()
|
||||
{
|
||||
# ifdef XMRIG_FEATURE_HTTP
|
||||
m_httpd->stop();
|
||||
if (m_httpd) {
|
||||
m_httpd->stop();
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
void xmrig::Api::tick()
|
||||
{
|
||||
# ifdef XMRIG_FEATURE_HTTP
|
||||
if (!m_httpd || !m_base->config()->http().isEnabled() || m_httpd->isBound()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (++m_ticks % 10 == 0) {
|
||||
m_ticks = 0;
|
||||
if (m_httpd) {
|
||||
m_httpd->start();
|
||||
}
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2024 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -21,7 +21,6 @@
|
|||
|
||||
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
|
||||
|
||||
#include "base/kernel/interfaces/IBaseListener.h"
|
||||
|
@ -44,7 +43,7 @@ class Api : public IBaseListener
|
|||
public:
|
||||
XMRIG_DISABLE_COPY_MOVE_DEFAULT(Api)
|
||||
|
||||
Api(Base *base);
|
||||
explicit Api(Base *base);
|
||||
~Api() override;
|
||||
|
||||
inline const char *id() const { return m_id; }
|
||||
|
@ -54,6 +53,7 @@ public:
|
|||
void request(const HttpData &req);
|
||||
void start();
|
||||
void stop();
|
||||
void tick();
|
||||
|
||||
protected:
|
||||
void onConfigChanged(Config *config, Config *previousConfig) override;
|
||||
|
@ -65,14 +65,15 @@ private:
|
|||
|
||||
Base *m_base;
|
||||
char m_id[32]{};
|
||||
String m_workerId;
|
||||
const uint64_t m_timestamp;
|
||||
Httpd *m_httpd = nullptr;
|
||||
Httpd *m_httpd = nullptr;
|
||||
std::vector<IApiListener *> m_listeners;
|
||||
String m_workerId;
|
||||
uint8_t m_ticks = 0;
|
||||
};
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
#endif /* XMRIG_API_H */
|
||||
#endif // XMRIG_API_H
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -22,11 +22,6 @@
|
|||
|
||||
#include "base/kernel/interfaces/IBaseListener.h"
|
||||
#include "base/net/http/HttpListener.h"
|
||||
#include "base/tools/Object.h"
|
||||
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
@ -43,9 +38,11 @@ class Httpd : public IBaseListener, public IHttpListener
|
|||
public:
|
||||
XMRIG_DISABLE_COPY_MOVE_DEFAULT(Httpd)
|
||||
|
||||
Httpd(Base *base);
|
||||
explicit Httpd(Base *base);
|
||||
~Httpd() override;
|
||||
|
||||
inline bool isBound() const { return m_server != nullptr; }
|
||||
|
||||
bool start();
|
||||
void stop();
|
||||
|
||||
|
@ -69,7 +66,7 @@ private:
|
|||
};
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
#endif /* XMRIG_HTTPD_H */
|
||||
#endif // XMRIG_HTTPD_H
|
||||
|
|
|
@ -53,6 +53,8 @@ static const CoinInfo coinInfo[] = {
|
|||
{ Algorithm::RX_KEVA, "KVA", "Kevacoin", 0, 0, MAGENTA_BG_BOLD(WHITE_BOLD_S " keva ") },
|
||||
{ Algorithm::KAWPOW_RVN, "RVN", "Ravencoin", 0, 0, BLUE_BG_BOLD( WHITE_BOLD_S " raven ") },
|
||||
{ Algorithm::RX_WOW, "WOW", "Wownero", 300, 100000000000, MAGENTA_BG_BOLD(WHITE_BOLD_S " wownero ") },
|
||||
{ Algorithm::RX_0, "ZEPH", "Zephyr", 120, 1000000000000, BLUE_BG_BOLD( WHITE_BOLD_S " zephyr ") },
|
||||
{ Algorithm::RX_0, "Townforge","Townforge", 30, 100000000, MAGENTA_BG_BOLD(WHITE_BOLD_S " townforge ") },
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -39,6 +39,8 @@ public:
|
|||
KEVA,
|
||||
RAVEN,
|
||||
WOWNERO,
|
||||
ZEPHYR,
|
||||
TOWNFORGE,
|
||||
MAX
|
||||
};
|
||||
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
|
||||
|
||||
#include <cassert>
|
||||
#include <uv.h>
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
@ -40,6 +39,32 @@ static void fsWriteCallback(uv_fs_t *req)
|
|||
} // namespace xmrig
|
||||
|
||||
|
||||
xmrig::FileLogWriter::FileLogWriter()
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
xmrig::FileLogWriter::FileLogWriter(const char* fileName)
|
||||
{
|
||||
init();
|
||||
open(fileName);
|
||||
}
|
||||
|
||||
xmrig::FileLogWriter::~FileLogWriter()
|
||||
{
|
||||
uv_close(reinterpret_cast<uv_handle_t*>(&m_flushAsync), nullptr);
|
||||
|
||||
uv_mutex_destroy(&m_buffersLock);
|
||||
}
|
||||
|
||||
void xmrig::FileLogWriter::init()
|
||||
{
|
||||
uv_mutex_init(&m_buffersLock);
|
||||
|
||||
uv_async_init(uv_default_loop(), &m_flushAsync, on_flush);
|
||||
m_flushAsync.data = this;
|
||||
}
|
||||
|
||||
bool xmrig::FileLogWriter::open(const char *fileName)
|
||||
{
|
||||
assert(fileName != nullptr);
|
||||
|
@ -77,11 +102,12 @@ bool xmrig::FileLogWriter::write(const char *data, size_t size)
|
|||
uv_buf_t buf = uv_buf_init(new char[size], size);
|
||||
memcpy(buf.base, data, size);
|
||||
|
||||
auto req = new uv_fs_t;
|
||||
req->data = buf.base;
|
||||
uv_mutex_lock(&m_buffersLock);
|
||||
|
||||
uv_fs_write(uv_default_loop(), req, m_file, &buf, 1, m_pos, fsWriteCallback);
|
||||
m_pos += size;
|
||||
m_buffers.emplace_back(buf);
|
||||
uv_async_send(&m_flushAsync);
|
||||
|
||||
uv_mutex_unlock(&m_buffersLock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -89,18 +115,38 @@ bool xmrig::FileLogWriter::write(const char *data, size_t size)
|
|||
|
||||
bool xmrig::FileLogWriter::writeLine(const char *data, size_t size)
|
||||
{
|
||||
const uv_buf_t buf[2] = {
|
||||
uv_buf_init(new char[size], size),
|
||||
uv_buf_init(const_cast<char *>(m_endl), sizeof(m_endl) - 1)
|
||||
};
|
||||
if (!isOpen()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(buf[0].base, data, size);
|
||||
constexpr size_t N = sizeof(m_endl) - 1;
|
||||
|
||||
auto req = new uv_fs_t;
|
||||
req->data = buf[0].base;
|
||||
uv_buf_t buf = uv_buf_init(new char[size + N], size + N);
|
||||
memcpy(buf.base, data, size);
|
||||
memcpy(buf.base + size, m_endl, N);
|
||||
|
||||
uv_fs_write(uv_default_loop(), req, m_file, buf, 2, m_pos, fsWriteCallback);
|
||||
m_pos += (buf[0].len + buf[1].len);
|
||||
uv_mutex_lock(&m_buffersLock);
|
||||
|
||||
m_buffers.emplace_back(buf);
|
||||
uv_async_send(&m_flushAsync);
|
||||
|
||||
uv_mutex_unlock(&m_buffersLock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void xmrig::FileLogWriter::flush()
|
||||
{
|
||||
uv_mutex_lock(&m_buffersLock);
|
||||
|
||||
for (uv_buf_t buf : m_buffers) {
|
||||
uv_fs_t* req = new uv_fs_t;
|
||||
req->data = buf.base;
|
||||
|
||||
uv_fs_write(uv_default_loop(), req, m_file, &buf, 1, m_pos, fsWriteCallback);
|
||||
m_pos += buf.len;
|
||||
}
|
||||
m_buffers.clear();
|
||||
|
||||
uv_mutex_unlock(&m_buffersLock);
|
||||
}
|
||||
|
|
|
@ -22,6 +22,8 @@
|
|||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <uv.h>
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
@ -30,8 +32,10 @@ namespace xmrig {
|
|||
class FileLogWriter
|
||||
{
|
||||
public:
|
||||
FileLogWriter() = default;
|
||||
FileLogWriter(const char *fileName) { open(fileName); }
|
||||
FileLogWriter();
|
||||
FileLogWriter(const char* fileName);
|
||||
|
||||
~FileLogWriter();
|
||||
|
||||
inline bool isOpen() const { return m_file >= 0; }
|
||||
inline int64_t pos() const { return m_pos; }
|
||||
|
@ -49,6 +53,16 @@ private:
|
|||
|
||||
int m_file = -1;
|
||||
int64_t m_pos = 0;
|
||||
|
||||
uv_mutex_t m_buffersLock;
|
||||
std::vector<uv_buf_t> m_buffers;
|
||||
|
||||
uv_async_t m_flushAsync;
|
||||
|
||||
void init();
|
||||
|
||||
static void on_flush(uv_async_t* async) { reinterpret_cast<FileLogWriter*>(async->data)->flush(); }
|
||||
void flush();
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -16,7 +16,6 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "base/kernel/Platform.h"
|
||||
|
||||
|
||||
|
@ -42,9 +41,12 @@ void xmrig::Platform::init(const char *userAgent)
|
|||
# ifdef XMRIG_FEATURE_TLS
|
||||
SSL_library_init();
|
||||
SSL_load_error_strings();
|
||||
|
||||
# if OPENSSL_VERSION_NUMBER < 0x30000000L || defined(LIBRESSL_VERSION_NUMBER)
|
||||
ERR_load_BIO_strings();
|
||||
ERR_load_crypto_strings();
|
||||
SSL_load_error_strings();
|
||||
# endif
|
||||
|
||||
OpenSSL_add_all_digests();
|
||||
# endif
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -49,6 +49,12 @@ public:
|
|||
static inline bool isUserActive(uint64_t ms) { return idleTime() < ms; }
|
||||
static inline const String &userAgent() { return m_userAgent; }
|
||||
|
||||
# ifdef XMRIG_OS_WIN
|
||||
static bool hasKeepalive();
|
||||
# else
|
||||
static constexpr bool hasKeepalive() { return true; }
|
||||
# endif
|
||||
|
||||
static bool isOnBatteryPower();
|
||||
static uint64_t idleTime();
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -16,9 +16,7 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "base/kernel/Platform.h"
|
||||
#include "backend/cpu/platform/HwlocCpuInfo.h"
|
||||
#include "backend/cpu/Cpu.h"
|
||||
|
||||
|
||||
|
@ -29,20 +27,21 @@
|
|||
#ifndef XMRIG_OS_APPLE
|
||||
bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id)
|
||||
{
|
||||
auto cpu = static_cast<HwlocCpuInfo *>(Cpu::info());
|
||||
hwloc_obj_t pu = hwloc_get_pu_obj_by_os_index(cpu->topology(), static_cast<unsigned>(cpu_id));
|
||||
auto topology = Cpu::info()->topology();
|
||||
auto pu = hwloc_get_pu_obj_by_os_index(topology, static_cast<unsigned>(cpu_id));
|
||||
|
||||
if (pu == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (hwloc_set_cpubind(cpu->topology(), pu->cpuset, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT) >= 0) {
|
||||
if (hwloc_set_cpubind(topology, pu->cpuset, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT) >= 0) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
return true;
|
||||
}
|
||||
|
||||
const bool result = (hwloc_set_cpubind(cpu->topology(), pu->cpuset, HWLOC_CPUBIND_THREAD) >= 0);
|
||||
const bool result = (hwloc_set_cpubind(topology, pu->cpuset, HWLOC_CPUBIND_THREAD) >= 0);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -16,7 +16,6 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include <algorithm>
|
||||
#include <winsock2.h>
|
||||
#include <windows.h>
|
||||
|
@ -70,6 +69,12 @@ char *xmrig::Platform::createUserAgent()
|
|||
}
|
||||
|
||||
|
||||
bool xmrig::Platform::hasKeepalive()
|
||||
{
|
||||
return winOsVersion().dwMajorVersion >= 6;
|
||||
}
|
||||
|
||||
|
||||
#ifndef XMRIG_FEATURE_HWLOC
|
||||
bool xmrig::Platform::setThreadAffinity(uint64_t cpu_id)
|
||||
{
|
||||
|
|
|
@ -142,7 +142,7 @@ void xmrig::BaseConfig::printVersions()
|
|||
snprintf(buf, sizeof buf, "MSVC/%d", MSVC_VERSION);
|
||||
# endif
|
||||
|
||||
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN_BOLD("%s/%s") WHITE_BOLD(" %s"), "ABOUT", APP_NAME, APP_VERSION, buf);
|
||||
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN_BOLD("%s/%s") WHITE_BOLD(" %s") WHITE_BOLD(" (built for %s") WHITE_BOLD(" %s,") WHITE_BOLD(" %s)"), "ABOUT", APP_NAME, APP_VERSION, buf, APP_OS, APP_ARCH, APP_BITS);
|
||||
|
||||
std::string libs;
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -46,11 +46,11 @@ xmrig::String xmrig::DnsRecord::ip() const
|
|||
|
||||
if (m_type == AAAA) {
|
||||
buf = new char[45]();
|
||||
uv_ip6_name(reinterpret_cast<sockaddr_in6*>(m_data), buf, 45);
|
||||
uv_ip6_name(reinterpret_cast<const sockaddr_in6*>(m_data), buf, 45);
|
||||
}
|
||||
else {
|
||||
buf = new char[16]();
|
||||
uv_ip4_name(reinterpret_cast<sockaddr_in*>(m_data), buf, 16);
|
||||
uv_ip4_name(reinterpret_cast<const sockaddr_in*>(m_data), buf, 16);
|
||||
}
|
||||
|
||||
return buf;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -28,18 +28,19 @@
|
|||
|
||||
namespace xmrig {
|
||||
|
||||
static Storage<DnsUvBackend>* storage = nullptr;
|
||||
|
||||
Storage<DnsUvBackend>& DnsUvBackend::getStorage()
|
||||
static Storage<DnsUvBackend> *storage = nullptr;
|
||||
|
||||
|
||||
Storage<DnsUvBackend> &DnsUvBackend::getStorage()
|
||||
{
|
||||
if (storage == nullptr) storage = new Storage<DnsUvBackend>();
|
||||
if (storage == nullptr) {
|
||||
storage = new Storage<DnsUvBackend>();
|
||||
}
|
||||
|
||||
return *storage;
|
||||
}
|
||||
|
||||
void DnsUvBackend::releaseStorage()
|
||||
{
|
||||
delete storage;
|
||||
}
|
||||
|
||||
static addrinfo hints{};
|
||||
|
||||
|
@ -61,8 +62,14 @@ xmrig::DnsUvBackend::DnsUvBackend()
|
|||
|
||||
xmrig::DnsUvBackend::~DnsUvBackend()
|
||||
{
|
||||
getStorage().release(m_key);
|
||||
releaseStorage();
|
||||
assert(storage);
|
||||
|
||||
storage->release(m_key);
|
||||
|
||||
if (storage->isEmpty()) {
|
||||
delete storage;
|
||||
storage = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2014-2019 heapwolf <https://github.com/heapwolf>
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2024 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -17,7 +17,6 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "base/net/http/HttpApiResponse.h"
|
||||
#include "3rdparty/rapidjson/prettywriter.h"
|
||||
#include "3rdparty/rapidjson/stringbuffer.h"
|
||||
|
@ -65,7 +64,7 @@ void xmrig::HttpApiResponse::end()
|
|||
}
|
||||
}
|
||||
|
||||
if (!m_doc.MemberCount()) {
|
||||
if (m_doc.IsObject() && m_doc.ObjectEmpty()) {
|
||||
return HttpResponse::end();
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#define XMRIG_HTTPRESPONSE_H
|
||||
|
||||
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2019 jtgrassie <https://github.com/jtgrassie>
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2024 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -42,13 +42,14 @@
|
|||
#include "base/io/json/JsonRequest.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/kernel/interfaces/IClientListener.h"
|
||||
#include "base/kernel/Platform.h"
|
||||
#include "base/net/dns/Dns.h"
|
||||
#include "base/net/dns/DnsRecords.h"
|
||||
#include "base/net/stratum/Socks5.h"
|
||||
#include "base/net/tools/NetBuffer.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include "base/tools/Cvt.h"
|
||||
#include "base/tools/cryptonote/BlobReader.h"
|
||||
#include "base/tools/Cvt.h"
|
||||
#include "net/JobResult.h"
|
||||
|
||||
|
||||
|
@ -343,6 +344,9 @@ bool xmrig::Client::close()
|
|||
setState(ClosingState);
|
||||
|
||||
if (uv_is_closing(reinterpret_cast<uv_handle_t*>(m_socket)) == 0) {
|
||||
if (Platform::hasKeepalive()) {
|
||||
uv_tcp_keepalive(m_socket, 0, 60);
|
||||
}
|
||||
uv_close(reinterpret_cast<uv_handle_t*>(m_socket), Client::onClose);
|
||||
}
|
||||
|
||||
|
@ -567,9 +571,9 @@ void xmrig::Client::connect(const sockaddr *addr)
|
|||
uv_tcp_init(uv_default_loop(), m_socket);
|
||||
uv_tcp_nodelay(m_socket, 1);
|
||||
|
||||
# ifndef WIN32
|
||||
uv_tcp_keepalive(m_socket, 1, 60);
|
||||
# endif
|
||||
if (Platform::hasKeepalive()) {
|
||||
uv_tcp_keepalive(m_socket, 1, 60);
|
||||
}
|
||||
|
||||
uv_tcp_connect(req, m_socket, addr, onConnect);
|
||||
}
|
||||
|
@ -585,7 +589,7 @@ void xmrig::Client::handshake()
|
|||
if (isTLS()) {
|
||||
m_expire = Chrono::steadyMSecs() + kResponseTimeout;
|
||||
|
||||
m_tls->handshake();
|
||||
m_tls->handshake(m_pool.isSNI() ? m_pool.host().data() : nullptr);
|
||||
}
|
||||
else
|
||||
# endif
|
||||
|
@ -605,7 +609,7 @@ bool xmrig::Client::parseLogin(const rapidjson::Value &result, int *code)
|
|||
|
||||
parseExtensions(result);
|
||||
|
||||
const bool rc = parseJob(result["job"], code);
|
||||
const bool rc = parseJob(Json::getObject(result, "job"), code);
|
||||
m_jobs = 0;
|
||||
|
||||
return rc;
|
||||
|
@ -840,7 +844,7 @@ void xmrig::Client::parseResponse(int64_t id, const rapidjson::Value &result, co
|
|||
m_listener->onLoginSuccess(this);
|
||||
|
||||
if (m_job.isValid()) {
|
||||
m_listener->onJobReceived(this, m_job, result["job"]);
|
||||
m_listener->onJobReceived(this, m_job, Json::getObject(result, "job"));
|
||||
}
|
||||
|
||||
return;
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2019 Howard Chu <https://github.com/hyc>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright (c) 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright (c) 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright (c) 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright (c) 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright (c) 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright (c) 2019 Howard Chu <https://github.com/hyc>
|
||||
* Copyright (c) 2018-2023 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2023 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -23,7 +23,6 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include <uv.h>
|
||||
|
||||
|
||||
|
@ -34,6 +33,7 @@
|
|||
#include "base/io/json/JsonRequest.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/kernel/interfaces/IClientListener.h"
|
||||
#include "base/kernel/Platform.h"
|
||||
#include "base/net/dns/Dns.h"
|
||||
#include "base/net/dns/DnsRecords.h"
|
||||
#include "base/net/http/Fetch.h"
|
||||
|
@ -42,9 +42,9 @@
|
|||
#include "base/net/stratum/SubmitResult.h"
|
||||
#include "base/net/tools/NetBuffer.h"
|
||||
#include "base/tools/bswap_64.h"
|
||||
#include "base/tools/cryptonote/Signatures.h"
|
||||
#include "base/tools/Cvt.h"
|
||||
#include "base/tools/Timer.h"
|
||||
#include "base/tools/cryptonote/Signatures.h"
|
||||
#include "net/JobResult.h"
|
||||
|
||||
|
||||
|
@ -148,6 +148,11 @@ int64_t xmrig::DaemonClient::submit(const JobResult &result)
|
|||
memcpy(data + sig_offset * 2, result.sig, 64 * 2);
|
||||
memcpy(data + m_blocktemplate.offset(BlockTemplate::TX_PUBKEY_OFFSET) * 2, result.sig_data, 32 * 2);
|
||||
memcpy(data + m_blocktemplate.offset(BlockTemplate::EPH_PUBLIC_KEY_OFFSET) * 2, result.sig_data + 32 * 2, 32 * 2);
|
||||
|
||||
// Handle view tag for txout_to_tagged_key outputs
|
||||
if (m_blocktemplate.outputType() == 3) {
|
||||
Cvt::toHex(data + m_blocktemplate.offset(BlockTemplate::EPH_PUBLIC_KEY_OFFSET) * 2 + 32 * 2, 2, &result.view_tag, 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (result.extra_nonce >= 0) {
|
||||
|
@ -178,7 +183,10 @@ int64_t xmrig::DaemonClient::submit(const JobResult &result)
|
|||
m_results[m_sequence] = SubmitResult(m_sequence, result.diff, result.actualDiff(), 0, result.backend);
|
||||
# endif
|
||||
|
||||
return rpcSend(doc);
|
||||
std::map<std::string, std::string> headers;
|
||||
headers.insert({"X-Hash-Difficulty", std::to_string(result.actualDiff())});
|
||||
|
||||
return rpcSend(doc, headers);
|
||||
}
|
||||
|
||||
|
||||
|
@ -350,9 +358,9 @@ void xmrig::DaemonClient::onResolved(const DnsRecords &records, int status, cons
|
|||
uv_tcp_init(uv_default_loop(), s);
|
||||
uv_tcp_nodelay(s, 1);
|
||||
|
||||
# ifndef WIN32
|
||||
uv_tcp_keepalive(s, 1, 60);
|
||||
# endif
|
||||
if (Platform::hasKeepalive()) {
|
||||
uv_tcp_keepalive(s, 1, 60);
|
||||
}
|
||||
|
||||
if (m_pool.zmq_port() > 0) {
|
||||
delete m_ZMQSocket;
|
||||
|
@ -401,7 +409,8 @@ bool xmrig::DaemonClient::parseJob(const rapidjson::Value ¶ms, int *code)
|
|||
m_blocktemplate.offset(BlockTemplate::TX_PUBKEY_OFFSET) - k,
|
||||
m_blocktemplate.offset(BlockTemplate::TX_EXTRA_NONCE_OFFSET) - k,
|
||||
m_blocktemplate.txExtraNonce().size(),
|
||||
m_blocktemplate.minerTxMerkleTreeBranch()
|
||||
m_blocktemplate.minerTxMerkleTreeBranch(),
|
||||
m_blocktemplate.outputType() == 3
|
||||
);
|
||||
# endif
|
||||
|
||||
|
@ -438,7 +447,7 @@ bool xmrig::DaemonClient::parseJob(const rapidjson::Value ¶ms, int *code)
|
|||
}
|
||||
|
||||
uint8_t derivation[32];
|
||||
if (!generate_key_derivation(m_blocktemplate.blob(BlockTemplate::TX_PUBKEY_OFFSET), secret_viewkey, derivation)) {
|
||||
if (!generate_key_derivation(m_blocktemplate.blob(BlockTemplate::TX_PUBKEY_OFFSET), secret_viewkey, derivation, nullptr)) {
|
||||
return jobError("Failed to generate key derivation for miner signature.");
|
||||
}
|
||||
|
||||
|
@ -553,9 +562,13 @@ int64_t xmrig::DaemonClient::getBlockTemplate()
|
|||
}
|
||||
|
||||
|
||||
int64_t xmrig::DaemonClient::rpcSend(const rapidjson::Document &doc)
|
||||
int64_t xmrig::DaemonClient::rpcSend(const rapidjson::Document &doc, const std::map<std::string, std::string> &headers)
|
||||
{
|
||||
FetchRequest req(HTTP_POST, m_pool.host(), m_pool.port(), kJsonRPC, doc, m_pool.isTLS(), isQuiet());
|
||||
for (const auto &header : headers) {
|
||||
req.headers.insert(header);
|
||||
}
|
||||
|
||||
fetch(tag(), std::move(req), m_httpListener);
|
||||
|
||||
return m_sequence++;
|
||||
|
@ -576,6 +589,9 @@ void xmrig::DaemonClient::retry()
|
|||
}
|
||||
|
||||
if ((m_ZMQConnectionState != ZMQ_NOT_CONNECTED) && (m_ZMQConnectionState != ZMQ_DISCONNECTING)) {
|
||||
if (Platform::hasKeepalive()) {
|
||||
uv_tcp_keepalive(m_ZMQSocket, 0, 60);
|
||||
}
|
||||
uv_close(reinterpret_cast<uv_handle_t*>(m_ZMQSocket), onZMQClose);
|
||||
}
|
||||
|
||||
|
@ -903,6 +919,9 @@ bool xmrig::DaemonClient::ZMQClose(bool shutdown)
|
|||
m_ZMQConnectionState = ZMQ_DISCONNECTING;
|
||||
|
||||
if (uv_is_closing(reinterpret_cast<uv_handle_t*>(m_ZMQSocket)) == 0) {
|
||||
if (Platform::hasKeepalive()) {
|
||||
uv_tcp_keepalive(m_ZMQSocket, 0, 60);
|
||||
}
|
||||
uv_close(reinterpret_cast<uv_handle_t*>(m_ZMQSocket), shutdown ? onZMQShutdown : onZMQClose);
|
||||
if (!shutdown) {
|
||||
retry();
|
||||
|
|
|
@ -86,7 +86,7 @@ private:
|
|||
bool parseJob(const rapidjson::Value ¶ms, int *code);
|
||||
bool parseResponse(int64_t id, const rapidjson::Value &result, const rapidjson::Value &error);
|
||||
int64_t getBlockTemplate();
|
||||
int64_t rpcSend(const rapidjson::Document &doc);
|
||||
int64_t rpcSend(const rapidjson::Document &doc, const std::map<std::string, std::string> &headers = {});
|
||||
void retry();
|
||||
void send(const char *path);
|
||||
void setState(SocketState state);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue