From f9c4c572164eb50129b0b541143ae5e5a409c3c1 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 25 Feb 2024 23:00:45 +0700 Subject: [PATCH 01/13] v6.21.2-dev --- src/version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/version.h b/src/version.h index 1578882e..f22dcd5b 100644 --- a/src/version.h +++ b/src/version.h @@ -22,7 +22,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig miner" -#define APP_VERSION "6.21.1" +#define APP_VERSION "6.21.2-dev" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2024 xmrig.com" @@ -30,7 +30,7 @@ #define APP_VER_MAJOR 6 #define APP_VER_MINOR 21 -#define APP_VER_PATCH 1 +#define APP_VER_PATCH 2 #ifdef _MSC_VER # if (_MSC_VER >= 1930) From b49197f8088fa594e4e9a1794f3f189b1d782b47 Mon Sep 17 00:00:00 2001 From: SChernykh <15806605+SChernykh@users.noreply.github.com> Date: Tue, 27 Feb 2024 23:39:23 +0100 Subject: [PATCH 02/13] Stratum: better check of the login response --- src/base/net/stratum/Client.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/base/net/stratum/Client.cpp b/src/base/net/stratum/Client.cpp index 50e35bcc..68122f93 100644 --- a/src/base/net/stratum/Client.cpp +++ b/src/base/net/stratum/Client.cpp @@ -609,6 +609,11 @@ bool xmrig::Client::parseLogin(const rapidjson::Value &result, int *code) parseExtensions(result); + if (!result.HasMember("job")) { + *code = 2; + return false; + } + const bool rc = parseJob(result["job"], code); m_jobs = 0; From c9b9ef51ee6b376d629264ac4056fc111c7b8b40 Mon Sep 17 00:00:00 2001 From: XMRig Date: Thu, 29 Feb 2024 09:38:47 +0700 Subject: [PATCH 03/13] #2800 Fixed donation with ghostrider algorithm for builds without KawPow algorithm. --- src/base/net/stratum/Client.cpp | 13 ++++--------- src/net/strategies/DonateStrategy.cpp | 2 +- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/base/net/stratum/Client.cpp b/src/base/net/stratum/Client.cpp index 68122f93..54246e72 100644 --- a/src/base/net/stratum/Client.cpp +++ b/src/base/net/stratum/Client.cpp @@ -1,7 +1,7 @@ /* XMRig * Copyright (c) 2019 jtgrassie - * Copyright (c) 2018-2023 SChernykh - * Copyright (c) 2016-2023 XMRig , + * Copyright (c) 2018-2024 SChernykh + * Copyright (c) 2016-2024 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -609,12 +609,7 @@ bool xmrig::Client::parseLogin(const rapidjson::Value &result, int *code) parseExtensions(result); - if (!result.HasMember("job")) { - *code = 2; - return false; - } - - const bool rc = parseJob(result["job"], code); + const bool rc = parseJob(Json::getObject(result, "job"), code); m_jobs = 0; return rc; @@ -849,7 +844,7 @@ void xmrig::Client::parseResponse(int64_t id, const rapidjson::Value &result, co m_listener->onLoginSuccess(this); if (m_job.isValid()) { - m_listener->onJobReceived(this, m_job, result["job"]); + m_listener->onJobReceived(this, m_job, Json::getObject(result, "job")); } return; diff --git a/src/net/strategies/DonateStrategy.cpp b/src/net/strategies/DonateStrategy.cpp index 03447a01..1f647ae4 100644 --- a/src/net/strategies/DonateStrategy.cpp +++ b/src/net/strategies/DonateStrategy.cpp @@ -63,7 +63,7 @@ xmrig::DonateStrategy::DonateStrategy(Controller *controller, IStrategyListener keccak(reinterpret_cast(user.data()), user.size(), hash); Cvt::toHex(m_userId, sizeof(m_userId), hash, 32); -# ifdef XMRIG_ALGO_KAWPOW +# if defined XMRIG_ALGO_KAWPOW || defined XMRIG_ALGO_GHOSTRIDER constexpr Pool::Mode mode = Pool::MODE_AUTO_ETH; # else constexpr Pool::Mode mode = Pool::MODE_POOL; From 48fa095e3e42f41a48367f87fcb0824b8fc36e1e Mon Sep 17 00:00:00 2001 From: SChernykh <15806605+SChernykh@users.noreply.github.com> Date: Thu, 29 Feb 2024 08:31:16 +0100 Subject: [PATCH 04/13] Update bug_report.md --- .github/ISSUE_TEMPLATE/bug_report.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 6404cef2..6061aeec 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -17,6 +17,9 @@ Steps to reproduce the behavior. A clear and concise description of what you expected to happen. **Required data** + - XMRig version + - Either the exact link to a release you downloaded from https://github.com/xmrig/xmrig/releases + - Or the exact command lines that you used to build XMRig - Miner log as text or screenshot - Config file or command line (without wallets) - OS: [e.g. Windows] From 688d4f5ee16fab4d7153c0c92005925b4dd0f008 Mon Sep 17 00:00:00 2001 From: SChernykh <15806605+SChernykh@users.noreply.github.com> Date: Mon, 4 Mar 2024 08:45:22 +0100 Subject: [PATCH 05/13] Thread-safe FileLogWriter --- src/base/io/log/FileLogWriter.cpp | 74 +++++++++++++++++++++++++------ src/base/io/log/FileLogWriter.h | 18 +++++++- 2 files changed, 76 insertions(+), 16 deletions(-) diff --git a/src/base/io/log/FileLogWriter.cpp b/src/base/io/log/FileLogWriter.cpp index b41f7f39..a449dc9a 100644 --- a/src/base/io/log/FileLogWriter.cpp +++ b/src/base/io/log/FileLogWriter.cpp @@ -22,7 +22,6 @@ #include -#include namespace xmrig { @@ -40,6 +39,32 @@ static void fsWriteCallback(uv_fs_t *req) } // namespace xmrig +xmrig::FileLogWriter::FileLogWriter() +{ + init(); +} + +xmrig::FileLogWriter::FileLogWriter(const char* fileName) +{ + init(); + open(fileName); +} + +xmrig::FileLogWriter::~FileLogWriter() +{ + uv_close(reinterpret_cast(&m_flushAsync), nullptr); + + uv_mutex_destroy(&m_buffersLock); +} + +void xmrig::FileLogWriter::init() +{ + uv_mutex_init(&m_buffersLock); + + uv_async_init(uv_default_loop(), &m_flushAsync, on_flush); + m_flushAsync.data = this; +} + bool xmrig::FileLogWriter::open(const char *fileName) { assert(fileName != nullptr); @@ -77,11 +102,12 @@ bool xmrig::FileLogWriter::write(const char *data, size_t size) uv_buf_t buf = uv_buf_init(new char[size], size); memcpy(buf.base, data, size); - auto req = new uv_fs_t; - req->data = buf.base; + uv_mutex_lock(&m_buffersLock); - uv_fs_write(uv_default_loop(), req, m_file, &buf, 1, m_pos, fsWriteCallback); - m_pos += size; + m_buffers.emplace_back(buf); + uv_async_send(&m_flushAsync); + + uv_mutex_unlock(&m_buffersLock); return true; } @@ -89,18 +115,38 @@ bool xmrig::FileLogWriter::write(const char *data, size_t size) bool xmrig::FileLogWriter::writeLine(const char *data, size_t size) { - const uv_buf_t buf[2] = { - uv_buf_init(new char[size], size), - uv_buf_init(const_cast(m_endl), sizeof(m_endl) - 1) - }; + if (!isOpen()) { + return false; + } - memcpy(buf[0].base, data, size); + constexpr size_t N = sizeof(m_endl) - 1; - auto req = new uv_fs_t; - req->data = buf[0].base; + uv_buf_t buf = uv_buf_init(new char[size + N], size + N); + memcpy(buf.base, data, size); + memcpy(buf.base + size, m_endl, N); - uv_fs_write(uv_default_loop(), req, m_file, buf, 2, m_pos, fsWriteCallback); - m_pos += (buf[0].len + buf[1].len); + uv_mutex_lock(&m_buffersLock); + + m_buffers.emplace_back(buf); + uv_async_send(&m_flushAsync); + + uv_mutex_unlock(&m_buffersLock); return true; } + +void xmrig::FileLogWriter::flush() +{ + uv_mutex_lock(&m_buffersLock); + + for (uv_buf_t buf : m_buffers) { + uv_fs_t* req = new uv_fs_t; + req->data = buf.base; + + uv_fs_write(uv_default_loop(), req, m_file, &buf, 1, m_pos, fsWriteCallback); + m_pos += buf.len; + } + m_buffers.clear(); + + uv_mutex_unlock(&m_buffersLock); +} diff --git a/src/base/io/log/FileLogWriter.h b/src/base/io/log/FileLogWriter.h index f3606aa3..66b8a13c 100644 --- a/src/base/io/log/FileLogWriter.h +++ b/src/base/io/log/FileLogWriter.h @@ -22,6 +22,8 @@ #include #include +#include +#include namespace xmrig { @@ -30,8 +32,10 @@ namespace xmrig { class FileLogWriter { public: - FileLogWriter() = default; - FileLogWriter(const char *fileName) { open(fileName); } + FileLogWriter(); + FileLogWriter(const char* fileName); + + ~FileLogWriter(); inline bool isOpen() const { return m_file >= 0; } inline int64_t pos() const { return m_pos; } @@ -49,6 +53,16 @@ private: int m_file = -1; int64_t m_pos = 0; + + uv_mutex_t m_buffersLock; + std::vector m_buffers; + + uv_async_t m_flushAsync; + + void init(); + + static void on_flush(uv_async_t* async) { reinterpret_cast(async->data)->flush(); } + void flush(); }; From 08c43b7e586299bf083d3b78c2132e26fd4a69b7 Mon Sep 17 00:00:00 2001 From: goodmost Date: Tue, 19 Mar 2024 23:19:36 +0800 Subject: [PATCH 06/13] chore: remove repetitive words Signed-off-by: goodmost --- doc/CHANGELOG_OLD.md | 2 +- src/3rdparty/epee/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/CHANGELOG_OLD.md b/doc/CHANGELOG_OLD.md index 70059190..a799cb38 100644 --- a/doc/CHANGELOG_OLD.md +++ b/doc/CHANGELOG_OLD.md @@ -256,7 +256,7 @@ # v2.8.0 - **[#753](https://github.com/xmrig/xmrig/issues/753) Added new algorithm [CryptoNight variant 2](https://github.com/xmrig/xmrig/issues/753) for Monero fork, thanks [@SChernykh](https://github.com/SChernykh).** - - Added global and per thread option `"asm"` and and command line equivalent. + - Added global and per thread option `"asm"` and command line equivalent. - **[#758](https://github.com/xmrig/xmrig/issues/758) Added SSL/TLS support for secure connections to pools.** - Added per pool options `"tls"` and `"tls-fingerprint"` and command line equivalents. - [#767](https://github.com/xmrig/xmrig/issues/767) Added config autosave feature, same with GPU miners. diff --git a/src/3rdparty/epee/README.md b/src/3rdparty/epee/README.md index 57e0efb1..2f06fe06 100644 --- a/src/3rdparty/epee/README.md +++ b/src/3rdparty/epee/README.md @@ -1 +1 @@ -epee - is a small library of helpers, wrappers, tools and and so on, used to make my life easier. +epee - is a small library of helpers, wrappers, tools and so on, used to make my life easier. From 1fb5be6c1d292c14f0d2562f6cb034087b68e9ce Mon Sep 17 00:00:00 2001 From: XMRig Date: Wed, 20 Mar 2024 00:24:46 +0700 Subject: [PATCH 07/13] Update deps. --- scripts/build.hwloc.sh | 2 +- scripts/build.openssl3.sh | 2 +- scripts/build.uv.sh | 6 +++--- scripts/build_deps.sh | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/build.hwloc.sh b/scripts/build.hwloc.sh index 223c5fb0..077d87b8 100755 --- a/scripts/build.hwloc.sh +++ b/scripts/build.hwloc.sh @@ -1,7 +1,7 @@ #!/bin/bash -e HWLOC_VERSION_MAJOR="2" -HWLOC_VERSION_MINOR="9" +HWLOC_VERSION_MINOR="10" HWLOC_VERSION_PATCH="0" HWLOC_VERSION="${HWLOC_VERSION_MAJOR}.${HWLOC_VERSION_MINOR}.${HWLOC_VERSION_PATCH}" diff --git a/scripts/build.openssl3.sh b/scripts/build.openssl3.sh index ffd1b953..70e989f7 100755 --- a/scripts/build.openssl3.sh +++ b/scripts/build.openssl3.sh @@ -1,6 +1,6 @@ #!/bin/bash -e -OPENSSL_VERSION="3.0.7" +OPENSSL_VERSION="3.0.13" mkdir -p deps mkdir -p deps/include diff --git a/scripts/build.uv.sh b/scripts/build.uv.sh index 3ee766a7..3a016a2a 100755 --- a/scripts/build.uv.sh +++ b/scripts/build.uv.sh @@ -1,6 +1,6 @@ #!/bin/bash -e -UV_VERSION="1.44.2" +UV_VERSION="1.48.0" mkdir -p deps mkdir -p deps/include @@ -8,10 +8,10 @@ mkdir -p deps/lib mkdir -p build && cd build -wget https://github.com/libuv/libuv/archive/v${UV_VERSION}.tar.gz -O v${UV_VERSION}.tar.gz +wget https://dist.libuv.org/dist/v${UV_VERSION}/libuv-v${UV_VERSION}.tar.gz -O v${UV_VERSION}.tar.gz tar -xzf v${UV_VERSION}.tar.gz -cd libuv-${UV_VERSION} +cd libuv-v${UV_VERSION} sh autogen.sh ./configure --disable-shared make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu) diff --git a/scripts/build_deps.sh b/scripts/build_deps.sh index e3efbf23..c294f969 100755 --- a/scripts/build_deps.sh +++ b/scripts/build_deps.sh @@ -2,4 +2,4 @@ ./build.uv.sh ./build.hwloc.sh -./build.openssl.sh \ No newline at end of file +./build.openssl3.sh \ No newline at end of file From 5552e1f864b290d9b9674dbdf5f5bb8c21744dc7 Mon Sep 17 00:00:00 2001 From: XMRig Date: Thu, 21 Mar 2024 02:13:01 +0700 Subject: [PATCH 08/13] Fix scripts for systems without bash. --- scripts/build.hwloc.sh | 2 +- scripts/build.hwloc1.sh | 2 +- scripts/build.libressl.sh | 2 +- scripts/build.openssl.sh | 2 +- scripts/build.openssl3.sh | 2 +- scripts/build.uv.sh | 2 +- scripts/build_deps.sh | 2 +- scripts/enable_1gb_pages.sh | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/build.hwloc.sh b/scripts/build.hwloc.sh index 077d87b8..db85fb6f 100755 --- a/scripts/build.hwloc.sh +++ b/scripts/build.hwloc.sh @@ -1,4 +1,4 @@ -#!/bin/bash -e +#!/bin/sh -e HWLOC_VERSION_MAJOR="2" HWLOC_VERSION_MINOR="10" diff --git a/scripts/build.hwloc1.sh b/scripts/build.hwloc1.sh index f1afc285..82572743 100755 --- a/scripts/build.hwloc1.sh +++ b/scripts/build.hwloc1.sh @@ -1,4 +1,4 @@ -#!/bin/bash -e +#!/bin/sh -e HWLOC_VERSION="1.11.13" diff --git a/scripts/build.libressl.sh b/scripts/build.libressl.sh index d32c04fa..6dde9faa 100755 --- a/scripts/build.libressl.sh +++ b/scripts/build.libressl.sh @@ -1,4 +1,4 @@ -#!/bin/bash -e +#!/bin/sh -e LIBRESSL_VERSION="3.5.2" diff --git a/scripts/build.openssl.sh b/scripts/build.openssl.sh index a89b281f..722e11b5 100755 --- a/scripts/build.openssl.sh +++ b/scripts/build.openssl.sh @@ -1,4 +1,4 @@ -#!/bin/bash -e +#!/bin/sh -e OPENSSL_VERSION="1.1.1s" diff --git a/scripts/build.openssl3.sh b/scripts/build.openssl3.sh index 70e989f7..e42fcac0 100755 --- a/scripts/build.openssl3.sh +++ b/scripts/build.openssl3.sh @@ -1,4 +1,4 @@ -#!/bin/bash -e +#!/bin/sh -e OPENSSL_VERSION="3.0.13" diff --git a/scripts/build.uv.sh b/scripts/build.uv.sh index 3a016a2a..ca052f7a 100755 --- a/scripts/build.uv.sh +++ b/scripts/build.uv.sh @@ -1,4 +1,4 @@ -#!/bin/bash -e +#!/bin/sh -e UV_VERSION="1.48.0" diff --git a/scripts/build_deps.sh b/scripts/build_deps.sh index c294f969..d244665f 100755 --- a/scripts/build_deps.sh +++ b/scripts/build_deps.sh @@ -1,4 +1,4 @@ -#!/bin/bash -e +#!/bin/sh -e ./build.uv.sh ./build.hwloc.sh diff --git a/scripts/enable_1gb_pages.sh b/scripts/enable_1gb_pages.sh index 16d889f1..a1fb4c61 100755 --- a/scripts/enable_1gb_pages.sh +++ b/scripts/enable_1gb_pages.sh @@ -1,4 +1,4 @@ -#!/bin/bash -e +#!/bin/sh -e # https://xmrig.com/docs/miner/hugepages#onegb-huge-pages From f6c50b5393ce1ba8596336dc867ec8eefcd9d7a2 Mon Sep 17 00:00:00 2001 From: SChernykh <15806605+SChernykh@users.noreply.github.com> Date: Wed, 20 Mar 2024 21:24:02 +0100 Subject: [PATCH 09/13] Fix RandomX crash when compiled with fortify_source --- src/crypto/randomx/jit_compiler_a64.cpp | 2 +- src/crypto/randomx/jit_compiler_a64.hpp | 2 +- src/crypto/randomx/jit_compiler_x86.cpp | 2 +- src/crypto/randomx/jit_compiler_x86.hpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/crypto/randomx/jit_compiler_a64.cpp b/src/crypto/randomx/jit_compiler_a64.cpp index 05dac9f7..4bfe157e 100644 --- a/src/crypto/randomx/jit_compiler_a64.cpp +++ b/src/crypto/randomx/jit_compiler_a64.cpp @@ -1078,6 +1078,6 @@ void JitCompilerA64::h_NOP(Instruction& instr, uint32_t& codePos) { } -InstructionGeneratorA64 JitCompilerA64::engine[256] = {}; +InstructionGeneratorA64 JitCompilerA64::engine[257] = {}; } diff --git a/src/crypto/randomx/jit_compiler_a64.hpp b/src/crypto/randomx/jit_compiler_a64.hpp index 32ff5166..15c90af8 100644 --- a/src/crypto/randomx/jit_compiler_a64.hpp +++ b/src/crypto/randomx/jit_compiler_a64.hpp @@ -74,7 +74,7 @@ namespace randomx { void enableWriting() const; void enableExecution() const; - static InstructionGeneratorA64 engine[256]; + static InstructionGeneratorA64 engine[257]; private: const bool hugePages; diff --git a/src/crypto/randomx/jit_compiler_x86.cpp b/src/crypto/randomx/jit_compiler_x86.cpp index 7f9fb3b6..78ab8b58 100644 --- a/src/crypto/randomx/jit_compiler_x86.cpp +++ b/src/crypto/randomx/jit_compiler_x86.cpp @@ -1443,6 +1443,6 @@ namespace randomx { emitByte(0x90, code, codePos); } - alignas(64) InstructionGeneratorX86 JitCompilerX86::engine[256] = {}; + alignas(64) InstructionGeneratorX86 JitCompilerX86::engine[257] = {}; } diff --git a/src/crypto/randomx/jit_compiler_x86.hpp b/src/crypto/randomx/jit_compiler_x86.hpp index 15261922..11106b2f 100644 --- a/src/crypto/randomx/jit_compiler_x86.hpp +++ b/src/crypto/randomx/jit_compiler_x86.hpp @@ -81,7 +81,7 @@ namespace randomx { void enableWriting() const; void enableExecution() const; - alignas(64) static InstructionGeneratorX86 engine[256]; + alignas(64) static InstructionGeneratorX86 engine[257]; private: int registerUsage[RegistersCount] = {}; From b8e4eaac87e8325647258dec58cd9279284532cc Mon Sep 17 00:00:00 2001 From: XMRig Date: Thu, 21 Mar 2024 21:03:35 +0700 Subject: [PATCH 10/13] Fix rapidjson assert. --- src/base/net/http/HttpApiResponse.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/base/net/http/HttpApiResponse.cpp b/src/base/net/http/HttpApiResponse.cpp index e6758df9..3ee38a09 100644 --- a/src/base/net/http/HttpApiResponse.cpp +++ b/src/base/net/http/HttpApiResponse.cpp @@ -1,7 +1,7 @@ /* XMRig * Copyright (c) 2014-2019 heapwolf - * Copyright (c) 2018-2021 SChernykh - * Copyright (c) 2016-2021 XMRig , + * Copyright (c) 2018-2024 SChernykh + * Copyright (c) 2016-2024 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,7 +17,6 @@ * along with this program. If not, see . */ - #include "base/net/http/HttpApiResponse.h" #include "3rdparty/rapidjson/prettywriter.h" #include "3rdparty/rapidjson/stringbuffer.h" @@ -65,7 +64,7 @@ void xmrig::HttpApiResponse::end() } } - if (!m_doc.MemberCount()) { + if (m_doc.IsObject() && m_doc.ObjectEmpty()) { return HttpResponse::end(); } From 850b43c0796e262809cbb7aaaf740331179f2a74 Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 22 Mar 2024 01:22:54 +0700 Subject: [PATCH 11/13] Fix build with recent libuv. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d37734ac..acf66cc8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -162,7 +162,7 @@ if (XMRIG_OS_WIN) src/crypto/common/VirtualMemory_win.cpp ) - set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv) + set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv dbghelp) elseif (XMRIG_OS_APPLE) list(APPEND SOURCES_OS src/App_unix.cpp From 7a85257ad4dd91565d53be4cfbe76ccf0045af38 Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 22 Mar 2024 18:14:39 +0700 Subject: [PATCH 12/13] Update hwloc for MSVC builds. --- CMakeLists.txt | 2 +- src/3rdparty/argon2/CMakeLists.txt | 2 +- src/3rdparty/hwloc/CMakeLists.txt | 2 +- src/3rdparty/hwloc/NEWS | 107 +- src/3rdparty/hwloc/README | 496 +++++++- src/3rdparty/hwloc/VERSION | 7 +- .../hwloc/include/hwloc/autogen/config.h | 6 +- src/3rdparty/hwloc/include/hwloc/bitmap.h | 51 +- src/3rdparty/hwloc/include/hwloc/cuda.h | 8 +- src/3rdparty/hwloc/include/hwloc/cudart.h | 8 +- src/3rdparty/hwloc/include/hwloc/diff.h | 17 +- src/3rdparty/hwloc/include/hwloc/distances.h | 37 +- src/3rdparty/hwloc/include/hwloc/export.h | 16 +- src/3rdparty/hwloc/include/hwloc/gl.h | 5 +- .../hwloc/include/hwloc/glibc-sched.h | 13 +- src/3rdparty/hwloc/include/hwloc/helper.h | 1039 +++++++++-------- src/3rdparty/hwloc/include/hwloc/levelzero.h | 10 +- .../hwloc/include/hwloc/linux-libnuma.h | 34 +- src/3rdparty/hwloc/include/hwloc/linux.h | 10 +- src/3rdparty/hwloc/include/hwloc/memattrs.h | 44 +- src/3rdparty/hwloc/include/hwloc/nvml.h | 5 +- src/3rdparty/hwloc/include/hwloc/opencl.h | 8 +- .../hwloc/include/hwloc/openfabrics-verbs.h | 5 +- src/3rdparty/hwloc/include/hwloc/plugins.h | 2 +- src/3rdparty/hwloc/include/hwloc/rename.h | 1 + src/3rdparty/hwloc/include/hwloc/rsmi.h | 5 +- src/3rdparty/hwloc/include/hwloc/shmem.h | 17 +- src/3rdparty/hwloc/include/private/netloc.h | 578 --------- src/3rdparty/hwloc/include/private/private.h | 11 +- src/3rdparty/hwloc/include/private/xml.h | 8 +- src/3rdparty/hwloc/src/components.c | 3 +- src/3rdparty/hwloc/src/diff.c | 26 +- src/3rdparty/hwloc/src/memattrs.c | 758 +++++++++--- src/3rdparty/hwloc/src/shmem.c | 11 +- src/3rdparty/hwloc/src/topology-synthetic.c | 154 ++- src/3rdparty/hwloc/src/topology-windows.c | 26 +- src/3rdparty/hwloc/src/topology-x86.c | 308 +++-- .../hwloc/src/topology-xml-nolibxml.c | 9 +- src/3rdparty/hwloc/src/topology-xml.c | 77 +- src/3rdparty/hwloc/src/topology.c | 207 ++-- src/3rdparty/libethash/CMakeLists.txt | 2 +- src/crypto/ghostrider/CMakeLists.txt | 2 +- 42 files changed, 2554 insertions(+), 1583 deletions(-) delete mode 100644 src/3rdparty/hwloc/include/private/netloc.h diff --git a/CMakeLists.txt b/CMakeLists.txt index acf66cc8..70d57d10 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(xmrig) option(WITH_HWLOC "Enable hwloc support" ON) diff --git a/src/3rdparty/argon2/CMakeLists.txt b/src/3rdparty/argon2/CMakeLists.txt index 7bbe716b..1ad977f0 100644 --- a/src/3rdparty/argon2/CMakeLists.txt +++ b/src/3rdparty/argon2/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(argon2 C) set(CMAKE_C_STANDARD 99) diff --git a/src/3rdparty/hwloc/CMakeLists.txt b/src/3rdparty/hwloc/CMakeLists.txt index ef2ba72d..37b88cbb 100644 --- a/src/3rdparty/hwloc/CMakeLists.txt +++ b/src/3rdparty/hwloc/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project (hwloc C) include_directories(include) diff --git a/src/3rdparty/hwloc/NEWS b/src/3rdparty/hwloc/NEWS index 4ddcbf44..62cc687e 100644 --- a/src/3rdparty/hwloc/NEWS +++ b/src/3rdparty/hwloc/NEWS @@ -1,5 +1,5 @@ Copyright © 2009 CNRS -Copyright © 2009-2022 Inria. All rights reserved. +Copyright © 2009-2023 Inria. All rights reserved. Copyright © 2009-2013 Université Bordeaux Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. Copyright © 2020 Hewlett Packard Enterprise. All rights reserved. @@ -17,6 +17,103 @@ bug fixes (and other actions) for each version of hwloc since version 0.9. +Version 2.10.0 +-------------- +* Heterogeneous Memory core improvements + + Better heuristics to identify the subtype of memory such as HBM, + DRAM, NVM, CXL-DRAM, etc. + + Build memory tiers, i.e. sets of NUMA nodes with the same subtype + and similar performance. + - NUMA node tier ranks are exposed in the new MemoryTier info + attribute (starts from 0 for highest bandwidth tier).. + + See the new Heterogeneous Memory section in the documentation. +* API + + Add hwloc_topology_free_group_object() to discard a Group created + by hwloc_topology_alloc_group_object(). +* Linux backend + + Fix cpukinds on NVIDIA Grace to report identical cores even if they + actually have very small frequency differences. + Thanks to John C. Linford for the report. + + Add CXLDevice attributes to CXL DAX objects and NUMA nodes to show + which PCI device implements which window. + + Ignore buggy memory-side caches and memory attributes when fake NUMA + emulation is enabled on the Linux kernel command-line. + + Add more info attributes in MemoryModule Misc objects, + thanks to Zubiao Xiong for the patch. + + Get CPUModel and CPUFamily info attributes on LoongArch platforms. +* x86 backend + + Add support for new AMD CPUID leaf 0x80000026 for better detection + of Core Complex and Die on Zen4 processors. + + Improve Zhaoxin CPU topology detection. +* Tools + + Input locations and many command-line options (e.g. hwloc-calc -I -N -H, + lstopo --only) now accept filters such as "NUMA[HBM]" so that only + objects are that type and subtype are considered. + - NUMA[tier=1] is also accepted for selecting NUMA nodes depending + on their MemoryTier info attribute. + + Add --object-output to hwloc-calc to report the type as a prefix to + object indexes, e.g. Core:2 instead of 2 in the output of -I. + + hwloc-info --ancestor and --descendants now accepts kinds of objects + instead of single types. + - The new --first option only shows the first matching object. + + Add --children-of-pid to hwloc-ps to show a hierarchy of processes. + Thanks to Antoine Morvan for the suggestion. + + Add --misc-from to lstopo to add Misc objects described in a file. + - To be combined with the new hwloc-ps --lstopo-misc for a customizable + lstopo --top replacement. +* Misc + + lstopo may now configure the layout of memory object placed above, + for instance with --children-order memory:above:vert. + + Fix XML import from memory or stdin when using libxml2 2.12. + + Fix installation failures when configuring with --target, + thanks to Clement Foyer for the patch. + + Fix support for 128bit pointer architectures. + + Remove Netloc. + + +Version 2.9.3 +------------- +* Handle Linux glibc allocation errors in binding routines (CVE-2022-47022). +* Fix hwloc-calc when searching objects on heterogeneous memory platforms, + thanks to Antoine Morvan for the report. +* Fix hwloc_get_next_child() when there are some memory-side caches. +* Don't crash if the topology is empty because Linux cgroups are wrong. +* Improve some hwloc-bind warnings in case of command-line parsing errors. +* Many documentation improvements all over the place, including: + + hwloc_topology_restrict() and hwloc_topology_insert_group() may reorder + children, causing the logical indexes of objects to change. + + +Version 2.9.2 +------------- +* Don't forget L3i when defining filters for multiple levels of caches + with hwloc_topology_set_cache/icache_types_filter(). +* Fix object total_memory after hwloc_topology_insert_group_object(). +* Fix the (non-yet) exporting in synthetic description for complex memory + hierarchies with memory-side caches, etc. +* Fix some default size attributes when building synthetic topologies. +* Fix size units in hwloc-annotate. +* Improve bitmap reallocation error management in many functions. +* Documentation improvements: + + Better document return values of functions. + + Add "Error reporting" section (in hwloc.h and in the doxygen doc). + + Add FAQ entry "What may I disable to make hwloc faster?" + + Improve FAQ entries "Why is lstopo slow?" and + "I only need ..., why should I use hwloc?" + + Clarify how to deal with cpukinds in hwloc-calc and hwloc-bind + manpages. + + +Version 2.9.1 +------------- +* Don't forget to apply object type filters to "perflevel" caches detected + on recent Mac OS X releases, thanks to Michel Lesoinne for the report. +* Fix a failed assertion in hwloc_topology_restrict() when some NUMA nodes + are removed because of HWLOC_RESTRICT_FLAG_REMOVE_CPULESS but no PUs are. + Thanks to Mark Grondona for reporting the issue. +* Mark HPE Cray Slingshot NICs with subtype "Slingshot". + + Version 2.9.0 ------------- * Backends @@ -61,6 +158,14 @@ Version 2.8.0 file from the documentation. +Version 2.7.2 +------------- +* Fix a crash when LevelZero devices have multiple subdevices, + e.g. on PonteVecchio GPUs, thanks to Jonathan Peyton. +* Fix a leak when importing cpukinds from XML, + thanks to Hui Zhou. + + Version 2.7.1 ------------- * Workaround crashes when virtual machines report incoherent x86 CPUID diff --git a/src/3rdparty/hwloc/README b/src/3rdparty/hwloc/README index 43210e63..f2971d07 100644 --- a/src/3rdparty/hwloc/README +++ b/src/3rdparty/hwloc/README @@ -1,4 +1,8 @@ -Introduction +This is a truncated and poorly-formatted version of the documentation main page. +See https://www.open-mpi.org/projects/hwloc/doc/ for more. + + +hwloc Overview The Hardware Locality (hwloc) software project aims at easing the process of discovering hardware resources in parallel architectures. It offers @@ -8,66 +12,456 @@ high-performance computing (HPC) applications, but is also applicable to any project seeking to exploit code and/or data locality on modern computing platforms. -hwloc is actually made of two subprojects distributed together: +hwloc provides command line tools and a C API to obtain the hierarchical map of +key computing elements within a node, such as: NUMA memory nodes, shared +caches, processor packages, dies and cores, processing units (logical +processors or "threads") and even I/O devices. hwloc also gathers various +attributes such as cache and memory information, and is portable across a +variety of different operating systems and platforms. - * The original hwloc project for describing the internals of computing nodes. - It is described in details starting at section Hardware Locality (hwloc) - Introduction. - * The network-oriented companion called netloc (Network Locality), described - in details starting with section Network Locality (netloc). +hwloc primarily aims at helping high-performance computing (HPC) applications, +but is also applicable to any project seeking to exploit code and/or data +locality on modern computing platforms. -See also the Related pages tab above for links to other sections. +hwloc supports the following operating systems: -Netloc may be disabled, but the original hwloc cannot. Both hwloc and netloc -APIs are documented after these sections. + * Linux (with knowledge of cgroups and cpusets, memory targets/initiators, + etc.) on all supported hardware, including Intel Xeon Phi, ScaleMP vSMP, + and NumaScale NumaConnect. + * Solaris (with support for processor sets and logical domains) + * AIX + * Darwin / OS X + * FreeBSD and its variants (such as kFreeBSD/GNU) + * NetBSD + * HP-UX + * Microsoft Windows + * IBM BlueGene/Q Compute Node Kernel (CNK) -Installation +Since it uses standard Operating System information, hwloc's support is mostly +independant from the processor type (x86, powerpc, ...) and just relies on the +Operating System support. The main exception is BSD operating systems (NetBSD, +FreeBSD, etc.) because they do not provide support topology information, hence +hwloc uses an x86-only CPUID-based backend (which can be used for other OSes +too, see the Components and plugins section). -hwloc (https://www.open-mpi.org/projects/hwloc/) is available under the BSD -license. It is hosted as a sub-project of the overall Open MPI project (https:/ -/www.open-mpi.org/). Note that hwloc does not require any functionality from -Open MPI -- it is a wholly separate (and much smaller!) project and code base. -It just happens to be hosted as part of the overall Open MPI project. +To check whether hwloc works on a particular machine, just try to build it and +run lstopo or lstopo-no-graphics. If some things do not look right (e.g. bogus +or missing cache information), see Questions and Bugs. -Basic Installation +hwloc only reports the number of processors on unsupported operating systems; +no topology information is available. -Installation is the fairly common GNU-based process: +For development and debugging purposes, hwloc also offers the ability to work +on "fake" topologies: -shell$ ./configure --prefix=... -shell$ make -shell$ make install + * Symmetrical tree of resources generated from a list of level arities, see + Synthetic topologies. + * Remote machine simulation through the gathering of topology as XML files, + see Importing and exporting topologies from/to XML files. -hwloc- and netloc-specific configure options and requirements are documented in -sections hwloc Installation and Netloc Installation respectively. +hwloc can display the topology in a human-readable format, either in graphical +mode (X11), or by exporting in one of several different formats, including: +plain text, LaTeX tikzpicture, PDF, PNG, and FIG (see Command-line Examples +below). Note that some of the export formats require additional support +libraries. -Also note that if you install supplemental libraries in non-standard locations, -hwloc's configure script may not be able to find them without some help. You -may need to specify additional CPPFLAGS, LDFLAGS, or PKG_CONFIG_PATH values on -the configure command line. +hwloc offers a programming interface for manipulating topologies and objects. +It also brings a powerful CPU bitmap API that is used to describe topology +objects location on physical/logical processors. See the Programming Interface +below. It may also be used to binding applications onto certain cores or memory +nodes. Several utility programs are also provided to ease command-line +manipulation of topology objects, binding of processes, and so on. -For example, if libpciaccess was installed into /opt/pciaccess, hwloc's -configure script may not find it be default. Try adding PKG_CONFIG_PATH to the -./configure command line, like this: +Bindings for several other languages are available from the project website. -./configure PKG_CONFIG_PATH=/opt/pciaccess/lib/pkgconfig ... +Command-line Examples -Running the "lstopo" tool is a good way to check as a graphical output whether -hwloc properly detected the architecture of your node. Netloc command-line -tools can be used to display the network topology interconnecting your nodes. +On a 4-package 2-core machine with hyper-threading, the lstopo tool may show +the following graphical output: -Installing from a Git clone +[dudley] -Additionally, the code can be directly cloned from Git: +Here's the equivalent output in textual form: -shell$ git clone https://github.com/open-mpi/hwloc.git -shell$ cd hwloc -shell$ ./autogen.sh +Machine + NUMANode L#0 (P#0) + Package L#0 + L3 L#0 (4096KB) + L2 L#0 (1024KB) + L1 L#0 (16KB) + Core L#0 + PU L#0 (P#0) + PU L#1 (P#8) + L2 L#1 (1024KB) + L1 L#1 (16KB) + Core L#1 + PU L#2 (P#4) + PU L#3 (P#12) + Package L#1 + L3 L#1 (4096KB) + L2 L#2 (1024KB) + L1 L#2 (16KB) + Core L#2 + PU L#4 (P#1) + PU L#5 (P#9) + L2 L#3 (1024KB) + L1 L#3 (16KB) + Core L#3 + PU L#6 (P#5) + PU L#7 (P#13) + Package L#2 + L3 L#2 (4096KB) + L2 L#4 (1024KB) + L1 L#4 (16KB) + Core L#4 + PU L#8 (P#2) + PU L#9 (P#10) + L2 L#5 (1024KB) + L1 L#5 (16KB) + Core L#5 + PU L#10 (P#6) + PU L#11 (P#14) + Package L#3 + L3 L#3 (4096KB) + L2 L#6 (1024KB) + L1 L#6 (16KB) + Core L#6 + PU L#12 (P#3) + PU L#13 (P#11) + L2 L#7 (1024KB) + L1 L#7 (16KB) + Core L#7 + PU L#14 (P#7) + PU L#15 (P#15) -Note that GNU Autoconf >=2.63, Automake >=1.11 and Libtool >=2.2.6 are required -when building from a Git clone. +Note that there is also an equivalent output in XML that is meant for exporting +/importing topologies but it is hardly readable to human-beings (see Importing +and exporting topologies from/to XML files for details). -Nightly development snapshots are available on the web site, they can be -configured and built without any need for Git or GNU Autotools. +On a 4-package 2-core Opteron NUMA machine (with two core cores disallowed by +the administrator), the lstopo tool may show the following graphical output +(with --disallowed for displaying disallowed objects): + +[hagrid] + +Here's the equivalent output in textual form: + +Machine (32GB total) + Package L#0 + NUMANode L#0 (P#0 8190MB) + L2 L#0 (1024KB) + L1 L#0 (64KB) + Core L#0 + PU L#0 (P#0) + L2 L#1 (1024KB) + L1 L#1 (64KB) + Core L#1 + PU L#1 (P#1) + Package L#1 + NUMANode L#1 (P#1 8192MB) + L2 L#2 (1024KB) + L1 L#2 (64KB) + Core L#2 + PU L#2 (P#2) + L2 L#3 (1024KB) + L1 L#3 (64KB) + Core L#3 + PU L#3 (P#3) + Package L#2 + NUMANode L#2 (P#2 8192MB) + L2 L#4 (1024KB) + L1 L#4 (64KB) + Core L#4 + PU L#4 (P#4) + L2 L#5 (1024KB) + L1 L#5 (64KB) + Core L#5 + PU L#5 (P#5) + Package L#3 + NUMANode L#3 (P#3 8192MB) + L2 L#6 (1024KB) + L1 L#6 (64KB) + Core L#6 + PU L#6 (P#6) + L2 L#7 (1024KB) + L1 L#7 (64KB) + Core L#7 + PU L#7 (P#7) + +On a 2-package quad-core Xeon (pre-Nehalem, with 2 dual-core dies into each +package): + +[emmett] + +Here's the same output in textual form: + +Machine (total 16GB) + NUMANode L#0 (P#0 16GB) + Package L#0 + L2 L#0 (4096KB) + L1 L#0 (32KB) + Core L#0 + PU L#0 (P#0) + L1 L#1 (32KB) + Core L#1 + PU L#1 (P#4) + L2 L#1 (4096KB) + L1 L#2 (32KB) + Core L#2 + PU L#2 (P#2) + L1 L#3 (32KB) + Core L#3 + PU L#3 (P#6) + Package L#1 + L2 L#2 (4096KB) + L1 L#4 (32KB) + Core L#4 + PU L#4 (P#1) + L1 L#5 (32KB) + Core L#5 + PU L#5 (P#5) + L2 L#3 (4096KB) + L1 L#6 (32KB) + Core L#6 + PU L#6 (P#3) + L1 L#7 (32KB) + Core L#7 + PU L#7 (P#7) + +Programming Interface + +The basic interface is available in hwloc.h. Some higher-level functions are +available in hwloc/helper.h to reduce the need to manually manipulate objects +and follow links between them. Documentation for all these is provided later in +this document. Developers may also want to look at hwloc/inlines.h which +contains the actual inline code of some hwloc.h routines, and at this document, +which provides good higher-level topology traversal examples. + +To precisely define the vocabulary used by hwloc, a Terms and Definitions +section is available and should probably be read first. + +Each hwloc object contains a cpuset describing the list of processing units +that it contains. These bitmaps may be used for CPU binding and Memory binding. +hwloc offers an extensive bitmap manipulation interface in hwloc/bitmap.h. + +Moreover, hwloc also comes with additional helpers for interoperability with +several commonly used environments. See the Interoperability With Other +Software section for details. + +The complete API documentation is available in a full set of HTML pages, man +pages, and self-contained PDF files (formatted for both both US letter and A4 +formats) in the source tarball in doc/doxygen-doc/. + +NOTE: If you are building the documentation from a Git clone, you will need to +have Doxygen and pdflatex installed -- the documentation will be built during +the normal "make" process. The documentation is installed during "make install" +to $prefix/share/doc/hwloc/ and your systems default man page tree (under +$prefix, of course). + +Portability + +Operating System have varying support for CPU and memory binding, e.g. while +some Operating Systems provide interfaces for all kinds of CPU and memory +bindings, some others provide only interfaces for a limited number of kinds of +CPU and memory binding, and some do not provide any binding interface at all. +Hwloc's binding functions would then simply return the ENOSYS error (Function +not implemented), meaning that the underlying Operating System does not provide +any interface for them. CPU binding and Memory binding provide more information +on which hwloc binding functions should be preferred because interfaces for +them are usually available on the supported Operating Systems. + +Similarly, the ability of reporting topology information varies from one +platform to another. As shown in Command-line Examples, hwloc can obtain +information on a wide variety of hardware topologies. However, some platforms +and/or operating system versions will only report a subset of this information. +For example, on an PPC64-based system with 8 cores (each with 2 hardware +threads) running a default 2.6.18-based kernel from RHEL 5.4, hwloc is only +able to glean information about NUMA nodes and processor units (PUs). No +information about caches, packages, or cores is available. + +Here's the graphical output from lstopo on this platform when Simultaneous +Multi-Threading (SMT) is enabled: + +[ppc64-with] + +And here's the graphical output from lstopo on this platform when SMT is +disabled: + +[ppc64-with] + +Notice that hwloc only sees half the PUs when SMT is disabled. PU L#6, for +example, seems to change location from NUMA node #0 to #1. In reality, no PUs +"moved" -- they were simply re-numbered when hwloc only saw half as many (see +also Logical index in Indexes and Sets). Hence, PU L#6 in the SMT-disabled +picture probably corresponds to PU L#12 in the SMT-enabled picture. + +This same "PUs have disappeared" effect can be seen on other platforms -- even +platforms / OSs that provide much more information than the above PPC64 system. +This is an unfortunate side-effect of how operating systems report information +to hwloc. + +Note that upgrading the Linux kernel on the same PPC64 system mentioned above +to 2.6.34, hwloc is able to discover all the topology information. The +following picture shows the entire topology layout when SMT is enabled: + +[ppc64-full] + +Developers using the hwloc API or XML output for portable applications should +therefore be extremely careful to not make any assumptions about the structure +of data that is returned. For example, per the above reported PPC topology, it +is not safe to assume that PUs will always be descendants of cores. + +Additionally, future hardware may insert new topology elements that are not +available in this version of hwloc. Long-lived applications that are meant to +span multiple different hardware platforms should also be careful about making +structure assumptions. For example, a new element may someday exist between a +core and a PU. + +API Example + +The following small C example (available in the source tree as ``doc/examples/ +hwloc-hello.c'') prints the topology of the machine and performs some thread +and memory binding. More examples are available in the doc/examples/ directory +of the source tree. + +/* Example hwloc API program. +* +* See other examples under doc/examples/ in the source tree +* for more details. +* +* Copyright (c) 2009-2016 Inria. All rights reserved. +* Copyright (c) 2009-2011 Universit?eacute; Bordeaux +* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +* See COPYING in top-level directory. +* +* hwloc-hello.c +*/ +#include "hwloc.h" +#include +#include +#include +static void print_children(hwloc_topology_t topology, hwloc_obj_t obj, +int depth) +{ +char type[32], attr[1024]; +unsigned i; +hwloc_obj_type_snprintf(type, sizeof(type), obj, 0); +printf("%*s%s", 2*depth, "", type); +if (obj->os_index != (unsigned) -1) +printf("#%u", obj->os_index); +hwloc_obj_attr_snprintf(attr, sizeof(attr), obj, " ", 0); +if (*attr) +printf("(%s)", attr); +printf("\n"); +for (i = 0; i < obj->arity; i++) { +print_children(topology, obj->children[i], depth + 1); +} +} +int main(void) +{ +int depth; +unsigned i, n; +unsigned long size; +int levels; +char string[128]; +int topodepth; +void *m; +hwloc_topology_t topology; +hwloc_cpuset_t cpuset; +hwloc_obj_t obj; +/* Allocate and initialize topology object. */ +hwloc_topology_init(&topology); +/* ... Optionally, put detection configuration here to ignore +some objects types, define a synthetic topology, etc.... +The default is to detect all the objects of the machine that +the caller is allowed to access. See Configure Topology +Detection. */ +/* Perform the topology detection. */ +hwloc_topology_load(topology); +/* Optionally, get some additional topology information +in case we need the topology depth later. */ +topodepth = hwloc_topology_get_depth(topology); +/***************************************************************** +* First example: +* Walk the topology with an array style, from level 0 (always +* the system level) to the lowest level (always the proc level). +*****************************************************************/ +for (depth = 0; depth < topodepth; depth++) { +printf("*** Objects at level %d\n", depth); +for (i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth); +i++) { +hwloc_obj_type_snprintf(string, sizeof(string), +hwloc_get_obj_by_depth(topology, depth, i), 0); +printf("Index %u: %s\n", i, string); +} +} +/***************************************************************** +* Second example: +* Walk the topology with a tree style. +*****************************************************************/ +printf("*** Printing overall tree\n"); +print_children(topology, hwloc_get_root_obj(topology), 0); +/***************************************************************** +* Third example: +* Print the number of packages. +*****************************************************************/ +depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE); +if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) { +printf("*** The number of packages is unknown\n"); +} else { +printf("*** %u package(s)\n", +hwloc_get_nbobjs_by_depth(topology, depth)); +} +/***************************************************************** +* Fourth example: +* Compute the amount of cache that the first logical processor +* has above it. +*****************************************************************/ +levels = 0; +size = 0; +for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0); +obj; +obj = obj->parent) +if (hwloc_obj_type_is_cache(obj->type)) { +levels++; +size += obj->attr->cache.size; +} +printf("*** Logical processor 0 has %d caches totaling %luKB\n", +levels, size / 1024); +/***************************************************************** +* Fifth example: +* Bind to only one thread of the last core of the machine. +* +* First find out where cores are, or else smaller sets of CPUs if +* the OS doesn't have the notion of a "core". +*****************************************************************/ +depth = hwloc_get_type_or_below_depth(topology, HWLOC_OBJ_CORE); +/* Get last core. */ +obj = hwloc_get_obj_by_depth(topology, depth, +hwloc_get_nbobjs_by_depth(topology, depth) - 1); +if (obj) { +/* Get a copy of its cpuset that we may modify. */ +cpuset = hwloc_bitmap_dup(obj->cpuset); +/* Get only one logical processor (in case the core is +SMT/hyper-threaded). */ +hwloc_bitmap_singlify(cpuset); +/* And try to bind ourself there. */ +if (hwloc_set_cpubind(topology, cpuset, 0)) { +char *str; +int error = errno; +hwloc_bitmap_asprintf(&str, obj->cpuset); +printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error)); +free(str); +} +/* Free our cpuset copy */ +hwloc_bitmap_free(cpuset); +} +/***************************************************************** +* Sixth example: +* Allocate some memory on the last NUMA node, bind some existing +* memory to the last NUMA node. +*****************************************************************/ +/* Get last node. There's always at least one. */ +n = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE); +obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, n - 1); +size = 1024*1024; +m = hwloc_alloc_membind(topology, size, obj->nodeset, +HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET); +hwloc_free(topology, m, size); +m = malloc(size); +hwloc_set_area_membind(topology, m, size, obj->nodeset, +HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET); +free(m); +/* Destroy topology object. */ +hwloc_topology_destroy(topology); +return 0; +} + +hwloc provides a pkg-config executable to obtain relevant compiler and linker +flags. For example, it can be used thusly to compile applications that utilize +the hwloc library (assuming GNU Make): + +CFLAGS += $(shell pkg-config --cflags hwloc) +LDLIBS += $(shell pkg-config --libs hwloc) + +hwloc-hello: hwloc-hello.c + $(CC) hwloc-hello.c $(CFLAGS) -o hwloc-hello $(LDLIBS) + +On a machine 2 processor packages -- each package of which has two processing +cores -- the output from running hwloc-hello could be something like the +following: + +shell$ ./hwloc-hello +*** Objects at level 0 +Index 0: Machine +*** Objects at level 1 +Index 0: Package#0 +Index 1: Package#1 +*** Objects at level 2 +Index 0: Core#0 +Index 1: Core#1 +Index 2: Core#3 +Index 3: Core#2 +*** Objects at level 3 +Index 0: PU#0 +Index 1: PU#1 +Index 2: PU#2 +Index 3: PU#3 +*** Printing overall tree +Machine + Package#0 + Core#0 + PU#0 + Core#1 + PU#1 + Package#1 + Core#3 + PU#2 + Core#2 + PU#3 +*** 2 package(s) +*** Logical processor 0 has 0 caches totaling 0KB +shell$ Questions and Bugs @@ -80,6 +474,20 @@ www.open-mpi.org/community/lists/hwloc.php). There is also a #hwloc IRC channel on Libera Chat (irc.libera.chat). +History / Credits + +hwloc is the evolution and merger of the libtopology project and the Portable +Linux Processor Affinity (PLPA) (https://www.open-mpi.org/projects/plpa/) +project. Because of functional and ideological overlap, these two code bases +and ideas were merged and released under the name "hwloc" as an Open MPI +sub-project. + +libtopology was initially developed by the Inria Runtime Team-Project. PLPA was +initially developed by the Open MPI development team as a sub-project. Both are +now deprecated in favor of hwloc, which is distributed as an Open MPI +sub-project. -See https://www.open-mpi.org/projects/hwloc/doc/ for more hwloc documentation. + +See https://www.open-mpi.org/projects/hwloc/doc/ for more hwloc documentation, +actual links to related pages, images, etc. diff --git a/src/3rdparty/hwloc/VERSION b/src/3rdparty/hwloc/VERSION index af3c4889..cd608187 100644 --- a/src/3rdparty/hwloc/VERSION +++ b/src/3rdparty/hwloc/VERSION @@ -8,7 +8,7 @@ # Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too. major=2 -minor=9 +minor=10 release=0 # greek is used for alpha or beta release tags. If it is non-empty, @@ -22,7 +22,7 @@ greek= # The date when this release was created -date="Dec 14, 2022" +date="Dec 04, 2023" # If snapshot=1, then use the value from snapshot_version as the # entire hwloc version (i.e., ignore major, minor, release, and @@ -41,7 +41,6 @@ snapshot_version=${major}.${minor}.${release}${greek}-git # 2. Version numbers are described in the Libtool current:revision:age # format. -libhwloc_so_version=21:1:6 -libnetloc_so_version=0:0:0 +libhwloc_so_version=22:0:7 # Please also update the lines in contrib/windows/libhwloc.vcxproj diff --git a/src/3rdparty/hwloc/include/hwloc/autogen/config.h b/src/3rdparty/hwloc/include/hwloc/autogen/config.h index fcaf70ca..6f45f734 100644 --- a/src/3rdparty/hwloc/include/hwloc/autogen/config.h +++ b/src/3rdparty/hwloc/include/hwloc/autogen/config.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2022 Inria. All rights reserved. + * Copyright © 2009-2023 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -11,9 +11,9 @@ #ifndef HWLOC_CONFIG_H #define HWLOC_CONFIG_H -#define HWLOC_VERSION "2.9.0" +#define HWLOC_VERSION "2.10.0" #define HWLOC_VERSION_MAJOR 2 -#define HWLOC_VERSION_MINOR 9 +#define HWLOC_VERSION_MINOR 10 #define HWLOC_VERSION_RELEASE 0 #define HWLOC_VERSION_GREEK "" diff --git a/src/3rdparty/hwloc/include/hwloc/bitmap.h b/src/3rdparty/hwloc/include/hwloc/bitmap.h index cd118b38..6b56bcb9 100644 --- a/src/3rdparty/hwloc/include/hwloc/bitmap.h +++ b/src/3rdparty/hwloc/include/hwloc/bitmap.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2022 Inria. All rights reserved. + * Copyright © 2009-2023 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -50,9 +50,10 @@ extern "C" { * hwloc_bitmap_free(set); * \endcode * - * \note Most functions below return an int that may be negative in case of - * error. The usual error case would be an internal failure to realloc/extend + * \note Most functions below return 0 on success and -1 on error. + * The usual error case would be an internal failure to realloc/extend * the storage of the bitmap (\p errno would be set to \c ENOMEM). + * See also \ref hwlocality_api_error_reporting. * * \note Several examples of using the bitmap API are available under the * doc/examples/ directory in the source tree. @@ -83,7 +84,13 @@ typedef const struct hwloc_bitmap_s * hwloc_const_bitmap_t; */ HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc(void) __hwloc_attribute_malloc; -/** \brief Allocate a new full bitmap. */ +/** \brief Allocate a new full bitmap. + * + * \returns A valid bitmap or \c NULL. + * + * The bitmap should be freed by a corresponding call to + * hwloc_bitmap_free(). + */ HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc_full(void) __hwloc_attribute_malloc; /** \brief Free bitmap \p bitmap. @@ -119,11 +126,13 @@ HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buf /** \brief Stringify a bitmap into a newly allocated string. * - * \return -1 on error. + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_bitmap_asprintf(char ** strp, hwloc_const_bitmap_t bitmap); /** \brief Parse a bitmap string and stores it in bitmap \p bitmap. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string); @@ -144,11 +153,13 @@ HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_ /** \brief Stringify a bitmap into a newly allocated list string. * - * \return -1 on error. + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_bitmap_list_asprintf(char ** strp, hwloc_const_bitmap_t bitmap); /** \brief Parse a list string and stores it in bitmap \p bitmap. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string); @@ -168,11 +179,13 @@ HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, si /** \brief Stringify a bitmap into a newly allocated taskset-specific string. * - * \return -1 on error. + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_bitmap_taskset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap); /** \brief Parse a taskset-specific bitmap string and stores it in bitmap \p bitmap. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_bitmap_taskset_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string); @@ -279,6 +292,7 @@ HWLOC_DECLSPEC int hwloc_bitmap_to_ulongs(hwloc_const_bitmap_t bitmap, unsigned * When called on the output of hwloc_topology_get_topology_cpuset(), * the returned number is large enough for all cpusets of the topology. * + * \return the number of unsigned longs required. * \return -1 if \p bitmap is infinite. */ HWLOC_DECLSPEC int hwloc_bitmap_nr_ulongs(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; @@ -305,21 +319,23 @@ HWLOC_DECLSPEC int hwloc_bitmap_isfull(hwloc_const_bitmap_t bitmap) __hwloc_attr /** \brief Compute the first index (least significant bit) in bitmap \p bitmap * - * \return -1 if no index is set in \p bitmap. + * \return the first index set in \p bitmap. + * \return -1 if \p bitmap is empty. */ HWLOC_DECLSPEC int hwloc_bitmap_first(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; /** \brief Compute the next index in bitmap \p bitmap which is after index \p prev * - * If \p prev is -1, the first index is returned. - * + * \return the first index set in \p bitmap if \p prev is \c -1. + * \return the next index set in \p bitmap if \p prev is not \c -1. * \return -1 if no index with higher index is set in \p bitmap. */ HWLOC_DECLSPEC int hwloc_bitmap_next(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure; /** \brief Compute the last index (most significant bit) in bitmap \p bitmap * - * \return -1 if no index is set in \p bitmap, or if \p bitmap is infinitely set. + * \return the last index set in \p bitmap. + * \return -1 if \p bitmap is empty, or if \p bitmap is infinitely set. */ HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; @@ -327,28 +343,29 @@ HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attrib * indexes that are in the bitmap). * * \return the number of indexes that are in the bitmap. - * * \return -1 if \p bitmap is infinitely set. */ HWLOC_DECLSPEC int hwloc_bitmap_weight(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; /** \brief Compute the first unset index (least significant bit) in bitmap \p bitmap * - * \return -1 if no index is unset in \p bitmap. + * \return the first unset index in \p bitmap. + * \return -1 if \p bitmap is full. */ HWLOC_DECLSPEC int hwloc_bitmap_first_unset(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; /** \brief Compute the next unset index in bitmap \p bitmap which is after index \p prev * - * If \p prev is -1, the first unset index is returned. - * + * \return the first index unset in \p bitmap if \p prev is \c -1. + * \return the next index unset in \p bitmap if \p prev is not \c -1. * \return -1 if no index with higher index is unset in \p bitmap. */ HWLOC_DECLSPEC int hwloc_bitmap_next_unset(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure; /** \brief Compute the last unset index (most significant bit) in bitmap \p bitmap * - * \return -1 if no index is unset in \p bitmap, or if \p bitmap is infinitely set. + * \return the last index unset in \p bitmap. + * \return -1 if \p bitmap is full, or if \p bitmap is not infinitely set. */ HWLOC_DECLSPEC int hwloc_bitmap_last_unset(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; @@ -428,6 +445,8 @@ HWLOC_DECLSPEC int hwloc_bitmap_not (hwloc_bitmap_t res, hwloc_const_bitmap_t bi /** \brief Test whether bitmaps \p bitmap1 and \p bitmap2 intersects. * * \return 1 if bitmaps intersect, 0 otherwise. + * + * \note The empty bitmap does not intersect any other bitmap. */ HWLOC_DECLSPEC int hwloc_bitmap_intersects (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure; diff --git a/src/3rdparty/hwloc/include/hwloc/cuda.h b/src/3rdparty/hwloc/include/hwloc/cuda.h index 72fb8ccb..e72f9728 100644 --- a/src/3rdparty/hwloc/include/hwloc/cuda.h +++ b/src/3rdparty/hwloc/include/hwloc/cuda.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2021 Inria. All rights reserved. + * Copyright © 2010-2023 Inria. All rights reserved. * Copyright © 2010-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -42,6 +42,9 @@ extern "C" { /** \brief Return the domain, bus and device IDs of the CUDA device \p cudevice. * * Device \p cudevice must match the local machine. + * + * \return 0 on success. + * \return -1 on error, for instance if device information could not be found. */ static __hwloc_inline int hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused, @@ -87,6 +90,9 @@ hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused * * This function is currently only implemented in a meaningful way for * Linux; other systems will simply get a full cpuset. + * + * \return 0 on success. + * \return -1 on error, for instance if device information could not be found. */ static __hwloc_inline int hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, diff --git a/src/3rdparty/hwloc/include/hwloc/cudart.h b/src/3rdparty/hwloc/include/hwloc/cudart.h index 676cffec..ad7f49f1 100644 --- a/src/3rdparty/hwloc/include/hwloc/cudart.h +++ b/src/3rdparty/hwloc/include/hwloc/cudart.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2021 Inria. All rights reserved. + * Copyright © 2010-2023 Inria. All rights reserved. * Copyright © 2010-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -43,6 +43,9 @@ extern "C" { /** \brief Return the domain, bus and device IDs of the CUDA device whose index is \p idx. * * Device index \p idx must match the local machine. + * + * \return 0 on success. + * \return -1 on error, for instance if device information could not be found. */ static __hwloc_inline int hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused, @@ -84,6 +87,9 @@ hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unus * * This function is currently only implemented in a meaningful way for * Linux; other systems will simply get a full cpuset. + * + * \return 0 on success. + * \return -1 on error, for instance if device information could not be found. */ static __hwloc_inline int hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, diff --git a/src/3rdparty/hwloc/include/hwloc/diff.h b/src/3rdparty/hwloc/include/hwloc/diff.h index 0ad0486b..4d822434 100644 --- a/src/3rdparty/hwloc/include/hwloc/diff.h +++ b/src/3rdparty/hwloc/include/hwloc/diff.h @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2020 Inria. All rights reserved. + * Copyright © 2013-2023 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -222,6 +222,8 @@ enum hwloc_topology_diff_apply_flags_e { HWLOC_DECLSPEC int hwloc_topology_diff_apply(hwloc_topology_t topology, hwloc_topology_diff_t diff, unsigned long flags); /** \brief Destroy a list of topology differences. + * + * \return 0. */ HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff); @@ -233,6 +235,8 @@ HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff); * This identifier is usually the name of the other XML file * that contains the reference topology. * + * \return 0 on success, -1 on error. + * * \note the pointer returned in refname should later be freed * by the caller. */ @@ -246,10 +250,17 @@ HWLOC_DECLSPEC int hwloc_topology_diff_load_xml(const char *xmlpath, hwloc_topol * This identifier is usually the name of the other XML file * that contains the reference topology. * This attribute is given back when reading the diff from XML. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, const char *refname, const char *xmlpath); /** \brief Load a list of topology differences from a XML buffer. + * + * Build a list of differences from the XML memory buffer given + * at \p xmlbuffer and of length \p buflen (including an ending \0). + * This buffer may have been filled earlier with + * hwloc_topology_diff_export_xmlbuffer(). * * If not \c NULL, \p refname will be filled with the identifier * string of the reference topology for the difference file, @@ -257,6 +268,8 @@ HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, co * This identifier is usually the name of the other XML file * that contains the reference topology. * + * \return 0 on success, -1 on error. + * * \note the pointer returned in refname should later be freed * by the caller. */ @@ -274,6 +287,8 @@ HWLOC_DECLSPEC int hwloc_topology_diff_load_xmlbuffer(const char *xmlbuffer, int * The returned buffer ends with a \0 that is included in the returned * length. * + * \return 0 on success, -1 on error. + * * \note The XML buffer should later be freed with hwloc_free_xmlbuffer(). */ HWLOC_DECLSPEC int hwloc_topology_diff_export_xmlbuffer(hwloc_topology_diff_t diff, const char *refname, char **xmlbuffer, int *buflen); diff --git a/src/3rdparty/hwloc/include/hwloc/distances.h b/src/3rdparty/hwloc/include/hwloc/distances.h index effa8663..71cca4b5 100644 --- a/src/3rdparty/hwloc/include/hwloc/distances.h +++ b/src/3rdparty/hwloc/include/hwloc/distances.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2022 Inria. All rights reserved. + * Copyright © 2010-2023 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -131,6 +131,8 @@ enum hwloc_distances_kind_e { * * Each distance matrix returned in the \p distances array should be released * by the caller using hwloc_distances_release(). + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_distances_get(hwloc_topology_t topology, @@ -140,6 +142,8 @@ hwloc_distances_get(hwloc_topology_t topology, /** \brief Retrieve distance matrices for object at a specific depth in the topology. * * Identical to hwloc_distances_get() with the additional \p depth filter. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth, @@ -149,6 +153,8 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth, /** \brief Retrieve distance matrices for object of a specific type. * * Identical to hwloc_distances_get() with the additional \p type filter. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, @@ -162,6 +168,8 @@ hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, * The name of the most common structure is "NUMALatency". * Others include "XGMIBandwidth", "XGMIHops", "XeLinkBandwidth", * and "NVLinkBandwidth". + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name, @@ -171,7 +179,12 @@ hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name, /** \brief Get a description of what a distances structure contains. * * For instance "NUMALatency" for hardware-provided NUMA distances (ACPI SLIT), - * or NULL if unknown. + * or \c NULL if unknown. + * + * \return the constant string with the name of the distance structure. + * + * \note The returned name should not be freed by the caller, + * it belongs to the hwloc library. */ HWLOC_DECLSPEC const char * hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances); @@ -252,6 +265,8 @@ enum hwloc_distances_transform_e { * * \p flags must be \c 0 for now. * + * \return 0 on success, -1 on error for instance if flags are invalid. + * * \note Objects in distances array \p objs may be directly modified * in place without using hwloc_distances_transform(). * One may use hwloc_get_obj_with_same_locality() to easily convert @@ -272,6 +287,7 @@ HWLOC_DECLSPEC int hwloc_distances_transform(hwloc_topology_t topology, struct h /** \brief Find the index of an object in a distances structure. * + * \return the index of the object in the distances structure if any. * \return -1 if object \p obj is not involved in structure \p distances. */ static __hwloc_inline int @@ -289,6 +305,7 @@ hwloc_distances_obj_index(struct hwloc_distances_s *distances, hwloc_obj_t obj) * The distance from \p obj1 to \p obj2 is stored in the value pointed by * \p value1to2 and reciprocally. * + * \return 0 on success. * \return -1 if object \p obj1 or \p obj2 is not involved in structure \p distances. */ static __hwloc_inline int @@ -374,8 +391,8 @@ hwloc_distances_add_create(hwloc_topology_t topology, * * \p flags must be \c 0 for now. * - * \return \c 0 on success. - * \return \c -1 on error. + * \return 0 on success. + * \return -1 on error. */ HWLOC_DECLSPEC int hwloc_distances_add_values(hwloc_topology_t topology, hwloc_distances_add_handle_t handle, @@ -411,8 +428,8 @@ enum hwloc_distances_add_flag_e { * * On error, the temporary distances structure and its content are destroyed. * - * \return \c 0 on success. - * \return \c -1 on error. + * \return 0 on success. + * \return -1 on error. */ HWLOC_DECLSPEC int hwloc_distances_add_commit(hwloc_topology_t topology, hwloc_distances_add_handle_t handle, @@ -433,18 +450,24 @@ HWLOC_DECLSPEC int hwloc_distances_add_commit(hwloc_topology_t topology, * * If these distances were used to group objects, these additional * Group objects are not removed from the topology. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology); /** \brief Remove distance matrices for objects at a specific depth in the topology. * * Identical to hwloc_distances_remove() but only applies to one level of the topology. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth); /** \brief Remove distance matrices for objects of a specific type in the topology. * * Identical to hwloc_distances_remove() but only applies to one level of the topology. + * + * \return 0 on success, -1 on error. */ static __hwloc_inline int hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type) @@ -458,6 +481,8 @@ hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type) /** \brief Release and remove the given distance matrice from the topology. * * This function includes a call to hwloc_distances_release(). + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_distances_release_remove(hwloc_topology_t topology, struct hwloc_distances_s *distances); diff --git a/src/3rdparty/hwloc/include/hwloc/export.h b/src/3rdparty/hwloc/include/hwloc/export.h index b178b77e..2ce5ab18 100644 --- a/src/3rdparty/hwloc/include/hwloc/export.h +++ b/src/3rdparty/hwloc/include/hwloc/export.h @@ -55,7 +55,7 @@ enum hwloc_topology_export_xml_flags_e { * * \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e. * - * \return -1 if a failure occured. + * \return 0 on success, or -1 on error. * * \note See also hwloc_topology_set_userdata_export_callback() * for exporting application-specific object userdata. @@ -91,7 +91,7 @@ HWLOC_DECLSPEC int hwloc_topology_export_xml(hwloc_topology_t topology, const ch * * \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e. * - * \return -1 if a failure occured. + * \return 0 on success, or -1 on error. * * \note See also hwloc_topology_set_userdata_export_callback() * for exporting application-specific object userdata. @@ -145,13 +145,15 @@ HWLOC_DECLSPEC void hwloc_topology_set_userdata_export_callback(hwloc_topology_t * that were given to the export callback. * * Only printable characters may be exported to XML string attributes. - * If a non-printable character is passed in \p name or \p buffer, - * the function returns -1 with errno set to EINVAL. * * If exporting binary data, the application should first encode into * printable characters only (or use hwloc_export_obj_userdata_base64()). * It should also take care of portability issues if the export may * be reimported on a different architecture. + * + * \return 0 on success. + * \return -1 with errno set to \c EINVAL if a non-printable character is + * passed in \p name or \b buffer. */ HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length); @@ -165,8 +167,14 @@ HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t to * This function may only be called from within the export() callback passed * to hwloc_topology_set_userdata_export_callback(). * + * The name must be made of printable characters for export to XML string attributes. + * * The function does not take care of portability issues if the export * may be reimported on a different architecture. + * + * \return 0 on success. + * \return -1 with errno set to \c EINVAL if a non-printable character is + * passed in \p name. */ HWLOC_DECLSPEC int hwloc_export_obj_userdata_base64(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length); diff --git a/src/3rdparty/hwloc/include/hwloc/gl.h b/src/3rdparty/hwloc/include/hwloc/gl.h index 56a402a8..2ad9284f 100644 --- a/src/3rdparty/hwloc/include/hwloc/gl.h +++ b/src/3rdparty/hwloc/include/hwloc/gl.h @@ -1,6 +1,6 @@ /* * Copyright © 2012 Blue Brain Project, EPFL. All rights reserved. - * Copyright © 2012-2021 Inria. All rights reserved. + * Copyright © 2012-2023 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -102,7 +102,8 @@ hwloc_gl_get_display_osdev_by_name(hwloc_topology_t topology, * Retrieves the OpenGL display port (server) in \p port and device (screen) * in \p screen that correspond to the given hwloc OS device object. * - * \return \c -1 if none could be found. + * \return 0 on success. + * \return -1 if none could be found. * * The topology \p topology does not necessarily have to match the current * machine. For instance the topology may be an XML import of a remote host. diff --git a/src/3rdparty/hwloc/include/hwloc/glibc-sched.h b/src/3rdparty/hwloc/include/hwloc/glibc-sched.h index 3c5368be..d3f16f45 100644 --- a/src/3rdparty/hwloc/include/hwloc/glibc-sched.h +++ b/src/3rdparty/hwloc/include/hwloc/glibc-sched.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2023 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -52,6 +52,8 @@ extern "C" { * that takes a cpu_set_t as input parameter. * * \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC + * + * \return 0. */ static __hwloc_inline int hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t hwlocset, @@ -80,6 +82,9 @@ hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute * that takes a cpu_set_t as input parameter. * * \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC + * + * \return 0 on success. + * \return -1 with errno set to \c ENOMEM if some internal reallocation failed. */ static __hwloc_inline int hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t hwlocset, @@ -95,7 +100,8 @@ hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribu cpu = 0; while (count) { if (CPU_ISSET_S(cpu, schedsetsize, schedset)) { - hwloc_bitmap_set(hwlocset, cpu); + if (hwloc_bitmap_set(hwlocset, cpu) < 0) + return -1; count--; } cpu++; @@ -107,7 +113,8 @@ hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribu assert(schedsetsize == sizeof(cpu_set_t)); for(cpu=0; cpucpuset, set)) - return NULL; - while (!hwloc_bitmap_isincluded(obj->cpuset, set)) { - /* while the object intersects without being included, look at its children */ - hwloc_obj_t child = obj->first_child; - while (child) { - if (hwloc_bitmap_intersects(child->cpuset, set)) - break; - child = child->next_sibling; - } - if (!child) - /* no child intersects, return their father */ - return obj; - /* found one intersecting child, look at its children */ - obj = child; - } - /* obj is included, return it */ - return obj; -} - -/** \brief Get the set of largest objects covering exactly a given cpuset \p set - * - * \return the number of objects returned in \p objs. - */ -HWLOC_DECLSPEC int hwloc_get_largest_objs_inside_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set, - hwloc_obj_t * __hwloc_restrict objs, int max); - -/** \brief Return the next object at depth \p depth included in CPU set \p set. - * - * If \p prev is \c NULL, return the first object at depth \p depth - * included in \p set. The next invokation should pass the previous - * return value in \p prev so as to obtain the next object in \p set. - * - * \note Objects with empty CPU sets are ignored - * (otherwise they would be considered included in any given set). - * - * \note This function cannot work if objects at the given depth do - * not have CPU sets (I/O or Misc objects). - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_next_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, - int depth, hwloc_obj_t prev) -{ - hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev); - if (!next) - return NULL; - while (next && (hwloc_bitmap_iszero(next->cpuset) || !hwloc_bitmap_isincluded(next->cpuset, set))) - next = next->next_cousin; - return next; -} - -/** \brief Return the next object of type \p type included in CPU set \p set. - * - * If there are multiple or no depth for given type, return \c NULL - * and let the caller fallback to - * hwloc_get_next_obj_inside_cpuset_by_depth(). - * - * \note Objects with empty CPU sets are ignored - * (otherwise they would be considered included in any given set). - * - * \note This function cannot work if objects of the given type do - * not have CPU sets (I/O or Misc objects). - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_next_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, - hwloc_obj_type_t type, hwloc_obj_t prev) -{ - int depth = hwloc_get_type_depth(topology, type); - if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) - return NULL; - return hwloc_get_next_obj_inside_cpuset_by_depth(topology, set, depth, prev); -} - -/** \brief Return the (logically) \p idx -th object at depth \p depth included in CPU set \p set. - * - * \note Objects with empty CPU sets are ignored - * (otherwise they would be considered included in any given set). - * - * \note This function cannot work if objects at the given depth do - * not have CPU sets (I/O or Misc objects). - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, - int depth, unsigned idx) __hwloc_attribute_pure; -static __hwloc_inline hwloc_obj_t -hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, - int depth, unsigned idx) -{ - hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0); - unsigned count = 0; - if (!obj) - return NULL; - while (obj) { - if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) { - if (count == idx) - return obj; - count++; - } - obj = obj->next_cousin; - } - return NULL; -} - -/** \brief Return the \p idx -th object of type \p type included in CPU set \p set. - * - * If there are multiple or no depth for given type, return \c NULL - * and let the caller fallback to - * hwloc_get_obj_inside_cpuset_by_depth(). - * - * \note Objects with empty CPU sets are ignored - * (otherwise they would be considered included in any given set). - * - * \note This function cannot work if objects of the given type do - * not have CPU sets (I/O or Misc objects). - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, - hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure; -static __hwloc_inline hwloc_obj_t -hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, - hwloc_obj_type_t type, unsigned idx) -{ - int depth = hwloc_get_type_depth(topology, type); - if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) - return NULL; - return hwloc_get_obj_inside_cpuset_by_depth(topology, set, depth, idx); -} - -/** \brief Return the number of objects at depth \p depth included in CPU set \p set. - * - * \note Objects with empty CPU sets are ignored - * (otherwise they would be considered included in any given set). - * - * \note This function cannot work if objects at the given depth do - * not have CPU sets (I/O or Misc objects). - */ -static __hwloc_inline unsigned -hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, - int depth) __hwloc_attribute_pure; -static __hwloc_inline unsigned -hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, - int depth) -{ - hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0); - unsigned count = 0; - if (!obj) - return 0; - while (obj) { - if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) - count++; - obj = obj->next_cousin; - } - return count; -} - -/** \brief Return the number of objects of type \p type included in CPU set \p set. - * - * If no object for that type exists inside CPU set \p set, 0 is - * returned. If there are several levels with objects of that type - * inside CPU set \p set, -1 is returned. - * - * \note Objects with empty CPU sets are ignored - * (otherwise they would be considered included in any given set). - * - * \note This function cannot work if objects of the given type do - * not have CPU sets (I/O objects). - */ -static __hwloc_inline int -hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, - hwloc_obj_type_t type) __hwloc_attribute_pure; -static __hwloc_inline int -hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, - hwloc_obj_type_t type) -{ - int depth = hwloc_get_type_depth(topology, type); - if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) - return 0; - if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) - return -1; /* FIXME: agregate nbobjs from different levels? */ - return (int) hwloc_get_nbobjs_inside_cpuset_by_depth(topology, set, depth); -} - -/** \brief Return the logical index among the objects included in CPU set \p set. - * - * Consult all objects in the same level as \p obj and inside CPU set \p set - * in the logical order, and return the index of \p obj within them. - * If \p set covers the entire topology, this is the logical index of \p obj. - * Otherwise, this is similar to a logical index within the part of the topology - * defined by CPU set \p set. - * - * \note Objects with empty CPU sets are ignored - * (otherwise they would be considered included in any given set). - * - * \note This function cannot work if obj does not have CPU sets (I/O objects). - */ -static __hwloc_inline int -hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, - hwloc_obj_t obj) __hwloc_attribute_pure; -static __hwloc_inline int -hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, - hwloc_obj_t obj) -{ - int idx = 0; - if (!hwloc_bitmap_isincluded(obj->cpuset, set)) - return -1; - /* count how many objects are inside the cpuset on the way from us to the beginning of the level */ - while ((obj = obj->prev_cousin) != NULL) - if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) - idx++; - return idx; -} - -/** @} */ - - - -/** \defgroup hwlocality_helper_find_covering Finding Objects covering at least CPU set - * @{ - */ - -/** \brief Get the child covering at least CPU set \p set. - * - * \return \c NULL if no child matches or if \p set is empty. - * - * \note This function cannot work if parent does not have a CPU set (I/O or Misc objects). - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, - hwloc_obj_t parent) __hwloc_attribute_pure; -static __hwloc_inline hwloc_obj_t -hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, - hwloc_obj_t parent) -{ - hwloc_obj_t child; - if (hwloc_bitmap_iszero(set)) - return NULL; - child = parent->first_child; - while (child) { - if (child->cpuset && hwloc_bitmap_isincluded(set, child->cpuset)) - return child; - child = child->next_sibling; - } - return NULL; -} - -/** \brief Get the lowest object covering at least CPU set \p set - * - * \return \c NULL if no object matches or if \p set is empty. - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure; -static __hwloc_inline hwloc_obj_t -hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) -{ - struct hwloc_obj *current = hwloc_get_root_obj(topology); - if (hwloc_bitmap_iszero(set) || !hwloc_bitmap_isincluded(set, current->cpuset)) - return NULL; - while (1) { - hwloc_obj_t child = hwloc_get_child_covering_cpuset(topology, set, current); - if (!child) - return current; - current = child; - } -} - -/** \brief Iterate through same-depth objects covering at least CPU set \p set - * - * If object \p prev is \c NULL, return the first object at depth \p - * depth covering at least part of CPU set \p set. The next - * invokation should pass the previous return value in \p prev so as - * to obtain the next object covering at least another part of \p set. - * - * \note This function cannot work if objects at the given depth do - * not have CPU sets (I/O or Misc objects). - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_next_obj_covering_cpuset_by_depth(hwloc_topology_t topology, hwloc_const_cpuset_t set, - int depth, hwloc_obj_t prev) -{ - hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev); - if (!next) - return NULL; - while (next && !hwloc_bitmap_intersects(set, next->cpuset)) - next = next->next_cousin; - return next; -} - -/** \brief Iterate through same-type objects covering at least CPU set \p set - * - * If object \p prev is \c NULL, return the first object of type \p - * type covering at least part of CPU set \p set. The next invokation - * should pass the previous return value in \p prev so as to obtain - * the next object of type \p type covering at least another part of - * \p set. - * - * If there are no or multiple depths for type \p type, \c NULL is returned. - * The caller may fallback to hwloc_get_next_obj_covering_cpuset_by_depth() - * for each depth. - * - * \note This function cannot work if objects of the given type do - * not have CPU sets (I/O or Misc objects). - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_next_obj_covering_cpuset_by_type(hwloc_topology_t topology, hwloc_const_cpuset_t set, - hwloc_obj_type_t type, hwloc_obj_t prev) -{ - int depth = hwloc_get_type_depth(topology, type); - if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) - return NULL; - return hwloc_get_next_obj_covering_cpuset_by_depth(topology, set, depth, prev); -} - -/** @} */ - - - -/** \defgroup hwlocality_helper_ancestors Looking at Ancestor and Child Objects - * @{ - * - * Be sure to see the figure in \ref termsanddefs that shows a - * complete topology tree, including depths, child/sibling/cousin - * relationships, and an example of an asymmetric topology where one - * package has fewer caches than its peers. - */ - -/** \brief Returns the ancestor object of \p obj at depth \p depth. - * - * \note \p depth should not be the depth of PU or NUMA objects - * since they are ancestors of no objects (except Misc or I/O). - * This function rather expects an intermediate level depth, - * such as the depth of Packages, Cores, or Caches. - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, int depth, hwloc_obj_t obj) __hwloc_attribute_pure; -static __hwloc_inline hwloc_obj_t -hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, int depth, hwloc_obj_t obj) -{ - hwloc_obj_t ancestor = obj; - if (obj->depth < depth) - return NULL; - while (ancestor && ancestor->depth > depth) - ancestor = ancestor->parent; - return ancestor; -} - -/** \brief Returns the ancestor object of \p obj with type \p type. - * - * \note \p type should not be ::HWLOC_OBJ_PU or ::HWLOC_OBJ_NUMANODE - * since these objects are ancestors of no objects (except Misc or I/O). - * This function rather expects an intermediate object type, - * such as ::HWLOC_OBJ_PACKAGE, ::HWLOC_OBJ_CORE, etc. - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj) __hwloc_attribute_pure; -static __hwloc_inline hwloc_obj_t -hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj) -{ - hwloc_obj_t ancestor = obj->parent; - while (ancestor && ancestor->type != type) - ancestor = ancestor->parent; - return ancestor; -} - -/** \brief Returns the common parent object to objects \p obj1 and \p obj2 */ -static __hwloc_inline hwloc_obj_t -hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2) __hwloc_attribute_pure; -static __hwloc_inline hwloc_obj_t -hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2) -{ - /* the loop isn't so easy since intermediate ancestors may have - * different depth, causing us to alternate between using obj1->parent - * and obj2->parent. Also, even if at some point we find ancestors of - * of the same depth, their ancestors may have different depth again. - */ - while (obj1 != obj2) { - while (obj1->depth > obj2->depth) - obj1 = obj1->parent; - while (obj2->depth > obj1->depth) - obj2 = obj2->parent; - if (obj1 != obj2 && obj1->depth == obj2->depth) { - obj1 = obj1->parent; - obj2 = obj2->parent; - } - } - return obj1; -} - -/** \brief Returns true if \p obj is inside the subtree beginning with ancestor object \p subtree_root. - * - * \note This function cannot work if \p obj and \p subtree_root objects do - * not have CPU sets (I/O or Misc objects). - */ -static __hwloc_inline int -hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root) __hwloc_attribute_pure; -static __hwloc_inline int -hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root) -{ - return obj->cpuset && subtree_root->cpuset && hwloc_bitmap_isincluded(obj->cpuset, subtree_root->cpuset); -} - -/** \brief Return the next child. - * - * Return the next child among the normal children list, - * then among the memory children list, then among the I/O - * children list, then among the Misc children list. - * - * If \p prev is \c NULL, return the first child. - * - * Return \c NULL when there is no next child. - */ -static __hwloc_inline hwloc_obj_t -hwloc_get_next_child (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t parent, hwloc_obj_t prev) -{ - hwloc_obj_t obj; - int state = 0; - if (prev) { - if (prev->type == HWLOC_OBJ_MISC) - state = 3; - else if (prev->type == HWLOC_OBJ_BRIDGE || prev->type == HWLOC_OBJ_PCI_DEVICE || prev->type == HWLOC_OBJ_OS_DEVICE) - state = 2; - else if (prev->type == HWLOC_OBJ_NUMANODE) - state = 1; - obj = prev->next_sibling; - } else { - obj = parent->first_child; - } - if (!obj && state == 0) { - obj = parent->memory_first_child; - state = 1; - } - if (!obj && state == 1) { - obj = parent->io_first_child; - state = 2; - } - if (!obj && state == 2) { - obj = parent->misc_first_child; - state = 3; - } - return obj; -} - -/** @} */ - - - /** \defgroup hwlocality_helper_types Kinds of object Type * @{ * @@ -496,6 +35,8 @@ hwloc_get_next_child (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_ * or I/O (i.e. hwloc_obj_type_is_io() returns 1) * or Misc (i.e. equal to ::HWLOC_OBJ_MISC). * It cannot be of more than one of these kinds. + * + * See also Object Kind in \ref termsanddefs. */ /** \brief Check whether an object type is Normal. @@ -565,6 +106,511 @@ hwloc_obj_type_is_icache(hwloc_obj_type_t type); +/** \defgroup hwlocality_helper_find_inside Finding Objects inside a CPU set + * @{ + */ + +/** \brief Get the first largest object included in the given cpuset \p set. + * + * \return the first object that is included in \p set and whose parent is not. + * \return \c NULL if no such object exists. + * + * This is convenient for iterating over all largest objects within a CPU set + * by doing a loop getting the first largest object and clearing its CPU set + * from the remaining CPU set. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_first_largest_obj_inside_cpuset(hwloc_topology_t topology, hwloc_const_cpuset_t set) +{ + hwloc_obj_t obj = hwloc_get_root_obj(topology); + if (!hwloc_bitmap_intersects(obj->cpuset, set)) + return NULL; + while (!hwloc_bitmap_isincluded(obj->cpuset, set)) { + /* while the object intersects without being included, look at its children */ + hwloc_obj_t child = obj->first_child; + while (child) { + if (hwloc_bitmap_intersects(child->cpuset, set)) + break; + child = child->next_sibling; + } + if (!child) + /* no child intersects, return their father */ + return obj; + /* found one intersecting child, look at its children */ + obj = child; + } + /* obj is included, return it */ + return obj; +} + +/** \brief Get the set of largest objects covering exactly a given cpuset \p set + * + * \return the number of objects returned in \p objs. + * \return -1 if no set of objects may cover that cpuset. + */ +HWLOC_DECLSPEC int hwloc_get_largest_objs_inside_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_t * __hwloc_restrict objs, int max); + +/** \brief Return the next object at depth \p depth included in CPU set \p set. + * + * The next invokation should pass the previous return value in \p prev + * so as to obtain the next object in \p set. + * + * \return the first object at depth \p depth included in \p set if \p prev is \c NULL. + * \return the next object at depth \p depth included in \p set if \p prev is not \c NULL. + * \return \c NULL if there is no next object. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects at the given depth do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + int depth, hwloc_obj_t prev) +{ + hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev); + if (!next) + return NULL; + while (next && (hwloc_bitmap_iszero(next->cpuset) || !hwloc_bitmap_isincluded(next->cpuset, set))) + next = next->next_cousin; + return next; +} + +/** \brief Return the next object of type \p type included in CPU set \p set. + * + * The next invokation should pass the previous return value in \p prev + * so as to obtain the next object in \p set. + * + * \return the first object of type \p type included in \p set if \p prev is \c NULL. + * \return the next object of type \p type included in \p set if \p prev is not \c NULL. + * \return \c NULL if there is no next object. + * \return \c NULL if there is no depth for the given type. + * \return \c NULL if there are multiple depths for the given type, + * the caller should fallback to hwloc_get_next_obj_inside_cpuset_by_depth(). + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects of the given type do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, hwloc_obj_t prev) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_next_obj_inside_cpuset_by_depth(topology, set, depth, prev); +} + +/** \brief Return the (logically) \p idx -th object at depth \p depth included in CPU set \p set. + * + * \return the object if any, \c NULL otherwise. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects at the given depth do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + int depth, unsigned idx) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + int depth, unsigned idx) +{ + hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0); + unsigned count = 0; + if (!obj) + return NULL; + while (obj) { + if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) { + if (count == idx) + return obj; + count++; + } + obj = obj->next_cousin; + } + return NULL; +} + +/** \brief Return the \p idx -th object of type \p type included in CPU set \p set. + * + * \return the object if any. + * \return \c NULL if there is no such object. + * \return \c NULL if there is no depth for given type. + * \return \c NULL if there are multiple depths for given type, + * the caller should fallback to hwloc_get_obj_inside_cpuset_by_depth(). + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects of the given type do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, unsigned idx) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_obj_inside_cpuset_by_depth(topology, set, depth, idx); +} + +/** \brief Return the number of objects at depth \p depth included in CPU set \p set. + * + * \return the number of objects. + * \return 0 if the depth is invalid. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects at the given depth do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline unsigned +hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + int depth) __hwloc_attribute_pure; +static __hwloc_inline unsigned +hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + int depth) +{ + hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0); + unsigned count = 0; + if (!obj) + return 0; + while (obj) { + if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) + count++; + obj = obj->next_cousin; + } + return count; +} + +/** \brief Return the number of objects of type \p type included in CPU set \p set. + * + * \return the number of objects. + * \return 0 if there are no objects of that type in the topology. + * \return -1 if there are multiple levels of objects of that type, + * the caller should fallback to hwloc_get_nbobjs_inside_cpuset_by_depth(). + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if objects of the given type do + * not have CPU sets (I/O objects). + */ +static __hwloc_inline int +hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type) __hwloc_attribute_pure; +static __hwloc_inline int +hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) + return 0; + if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return -1; /* FIXME: agregate nbobjs from different levels? */ + return (int) hwloc_get_nbobjs_inside_cpuset_by_depth(topology, set, depth); +} + +/** \brief Return the logical index among the objects included in CPU set \p set. + * + * Consult all objects in the same level as \p obj and inside CPU set \p set + * in the logical order, and return the index of \p obj within them. + * If \p set covers the entire topology, this is the logical index of \p obj. + * Otherwise, this is similar to a logical index within the part of the topology + * defined by CPU set \p set. + * + * \return the logical index among the objects included in the set if any. + * \return -1 if the object is not included in the set. + * + * \note Objects with empty CPU sets are ignored + * (otherwise they would be considered included in any given set). + * + * \note This function cannot work if obj does not have CPU sets (I/O objects). + */ +static __hwloc_inline int +hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, + hwloc_obj_t obj) __hwloc_attribute_pure; +static __hwloc_inline int +hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, + hwloc_obj_t obj) +{ + int idx = 0; + if (!hwloc_bitmap_isincluded(obj->cpuset, set)) + return -1; + /* count how many objects are inside the cpuset on the way from us to the beginning of the level */ + while ((obj = obj->prev_cousin) != NULL) + if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) + idx++; + return idx; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_find_covering Finding Objects covering at least CPU set + * @{ + */ + +/** \brief Get the child covering at least CPU set \p set. + * + * \return the child that covers the set entirely. + * \return \c NULL if no child matches or if \p set is empty. + * + * \note This function cannot work if parent does not have a CPU set (I/O or Misc objects). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, + hwloc_obj_t parent) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, + hwloc_obj_t parent) +{ + hwloc_obj_t child; + if (hwloc_bitmap_iszero(set)) + return NULL; + child = parent->first_child; + while (child) { + if (child->cpuset && hwloc_bitmap_isincluded(set, child->cpuset)) + return child; + child = child->next_sibling; + } + return NULL; +} + +/** \brief Get the lowest object covering at least CPU set \p set + * + * \return the lowest object covering the set entirely. + * \return \c NULL if no object matches or if \p set is empty. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) +{ + struct hwloc_obj *current = hwloc_get_root_obj(topology); + if (hwloc_bitmap_iszero(set) || !hwloc_bitmap_isincluded(set, current->cpuset)) + return NULL; + while (1) { + hwloc_obj_t child = hwloc_get_child_covering_cpuset(topology, set, current); + if (!child) + return current; + current = child; + } +} + +/** \brief Iterate through same-depth objects covering at least CPU set \p set + * + * The next invokation should pass the previous return value in \p prev so as + * to obtain the next object covering at least another part of \p set. + * + * \return the first object at depth \p depth covering at least part of CPU set \p set + * if object \p prev is \c NULL. + * \return the next one if \p prev is not \c NULL. + * \return \c NULL if there is no next object. + * + * \note This function cannot work if objects at the given depth do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_covering_cpuset_by_depth(hwloc_topology_t topology, hwloc_const_cpuset_t set, + int depth, hwloc_obj_t prev) +{ + hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev); + if (!next) + return NULL; + while (next && !hwloc_bitmap_intersects(set, next->cpuset)) + next = next->next_cousin; + return next; +} + +/** \brief Iterate through same-type objects covering at least CPU set \p set + * + * The next invokation should pass the previous return value in \p prev so as to obtain + * the next object of type \p type covering at least another part of \p set. + * + * \return the first object of type \p type covering at least part of CPU set \p set + * if object \p prev is \c NULL. + * \return the next one if \p prev is not \c NULL. + * \return \c NULL if there is no next object. + * \return \c NULL if there is no depth for the given type. + * \return \c NULL if there are multiple depths for the given type, + * the caller should fallback to hwloc_get_next_obj_covering_cpuset_by_depth(). + * + * \note This function cannot work if objects of the given type do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_covering_cpuset_by_type(hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, hwloc_obj_t prev) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_next_obj_covering_cpuset_by_depth(topology, set, depth, prev); +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_ancestors Looking at Ancestor and Child Objects + * @{ + * + * Be sure to see the figure in \ref termsanddefs that shows a + * complete topology tree, including depths, child/sibling/cousin + * relationships, and an example of an asymmetric topology where one + * package has fewer caches than its peers. + */ + +/** \brief Returns the ancestor object of \p obj at depth \p depth. + * + * \return the ancestor if any. + * \return \c NULL if no such ancestor exists. + * + * \note \p depth should not be the depth of PU or NUMA objects + * since they are ancestors of no objects (except Misc or I/O). + * This function rather expects an intermediate level depth, + * such as the depth of Packages, Cores, or Caches. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, int depth, hwloc_obj_t obj) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, int depth, hwloc_obj_t obj) +{ + hwloc_obj_t ancestor = obj; + if (obj->depth < depth) + return NULL; + while (ancestor && ancestor->depth > depth) + ancestor = ancestor->parent; + return ancestor; +} + +/** \brief Returns the ancestor object of \p obj with type \p type. + * + * \return the ancestor if any. + * \return \c NULL if no such ancestor exists. + * + * \note if multiple matching ancestors exist (e.g. multiple levels of ::HWLOC_OBJ_GROUP) + * the lowest one is returned. + * + * \note \p type should not be ::HWLOC_OBJ_PU or ::HWLOC_OBJ_NUMANODE + * since these objects are ancestors of no objects (except Misc or I/O). + * This function rather expects an intermediate object type, + * such as ::HWLOC_OBJ_PACKAGE, ::HWLOC_OBJ_CORE, etc. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj) +{ + hwloc_obj_t ancestor = obj->parent; + while (ancestor && ancestor->type != type) + ancestor = ancestor->parent; + return ancestor; +} + +/** \brief Returns the common parent object to objects \p obj1 and \p obj2. + * + * \return the common ancestor. + * + * \note This function cannot return \c NULL. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2) __hwloc_attribute_pure; +static __hwloc_inline hwloc_obj_t +hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2) +{ + /* the loop isn't so easy since intermediate ancestors may have + * different depth, causing us to alternate between using obj1->parent + * and obj2->parent. Also, even if at some point we find ancestors of + * of the same depth, their ancestors may have different depth again. + */ + while (obj1 != obj2) { + while (obj1->depth > obj2->depth) + obj1 = obj1->parent; + while (obj2->depth > obj1->depth) + obj2 = obj2->parent; + if (obj1 != obj2 && obj1->depth == obj2->depth) { + obj1 = obj1->parent; + obj2 = obj2->parent; + } + } + return obj1; +} + +/** \brief Returns true if \p obj is inside the subtree beginning with ancestor object \p subtree_root. + * + * \return 1 is the object is in the subtree, 0 otherwise. + * + * \note This function cannot work if \p obj and \p subtree_root objects do + * not have CPU sets (I/O or Misc objects). + */ +static __hwloc_inline int +hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root) __hwloc_attribute_pure; +static __hwloc_inline int +hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root) +{ + return obj->cpuset && subtree_root->cpuset && hwloc_bitmap_isincluded(obj->cpuset, subtree_root->cpuset); +} + +/** \brief Return the next child. + * + * Return the next child among the normal children list, + * then among the memory children list, then among the I/O + * children list, then among the Misc children list. + * + * \return the first child if \p prev is \c NULL. + * \return the next child if \p prev is not \c NULL. + * \return \c NULL when there is no next child. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_child (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t parent, hwloc_obj_t prev) +{ + hwloc_obj_t obj; + int state = 0; + if (prev) { + if (prev->type == HWLOC_OBJ_MISC) + state = 3; + else if (hwloc_obj_type_is_io(prev->type)) + state = 2; + else if (hwloc_obj_type_is_memory(prev->type)) + state = 1; + obj = prev->next_sibling; + } else { + obj = parent->first_child; + } + if (!obj && state == 0) { + obj = parent->memory_first_child; + state = 1; + } + if (!obj && state == 1) { + obj = parent->io_first_child; + state = 2; + } + if (!obj && state == 2) { + obj = parent->misc_first_child; + state = 3; + } + return obj; +} + +/** @} */ + + + /** \defgroup hwlocality_helper_find_cache Looking at Cache Objects * @{ */ @@ -578,17 +624,19 @@ hwloc_obj_type_is_icache(hwloc_obj_type_t type); * corresponding type such as ::HWLOC_OBJ_L1ICACHE, except that it may * also return a Unified cache when looking for an instruction cache. * - * If no cache level matches, ::HWLOC_TYPE_DEPTH_UNKNOWN is returned. + * \return the depth of the unique matching unified cache level is returned + * if \p cachetype is ::HWLOC_OBJ_CACHE_UNIFIED. * - * If \p cachetype is ::HWLOC_OBJ_CACHE_UNIFIED, the depth of the - * unique matching unified cache level is returned. + * \return the depth of either a matching cache level or a unified cache level + * if \p cachetype is ::HWLOC_OBJ_CACHE_DATA or ::HWLOC_OBJ_CACHE_INSTRUCTION. * - * If \p cachetype is ::HWLOC_OBJ_CACHE_DATA or ::HWLOC_OBJ_CACHE_INSTRUCTION, - * either a matching cache, or a unified cache is returned. + * \return the depth of the matching level + * if \p cachetype is \c -1 but only one level matches. * - * If \p cachetype is \c -1, it is ignored and multiple levels may - * match. The function returns either the depth of a uniquely matching - * level or ::HWLOC_TYPE_DEPTH_MULTIPLE. + * \return ::HWLOC_TYPE_DEPTH_MULTIPLE + * if \p cachetype is \c -1 but multiple levels match. + * + * \return ::HWLOC_TYPE_DEPTH_UNKNOWN if no cache level matches. */ static __hwloc_inline int hwloc_get_cache_type_depth (hwloc_topology_t topology, @@ -622,7 +670,7 @@ hwloc_get_cache_type_depth (hwloc_topology_t topology, /** \brief Get the first data (or unified) cache covering a cpuset \p set * - * \return \c NULL if no cache matches. + * \return a covering cache, or \c NULL if no cache matches. */ static __hwloc_inline hwloc_obj_t hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure; @@ -640,7 +688,8 @@ hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t /** \brief Get the first data (or unified) cache shared between an object and somebody else. * - * \return \c NULL if no cache matches or if an invalid object is given. + * \return a shared cache. + * \return \c NULL if no cache matches or if an invalid object is given (e.g. I/O object). */ static __hwloc_inline hwloc_obj_t hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj) __hwloc_attribute_pure; @@ -684,6 +733,8 @@ hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute * If \p which is larger than the number of PUs in a core there were originally set in \p cpuset, * no PU is kept for that core. * + * \return 0. + * * \note PUs that are not below a Core object are ignored * (for instance if the topology does not contain any Core object). * None of them is removed from \p cpuset. @@ -698,6 +749,8 @@ HWLOC_DECLSPEC int hwloc_bitmap_singlify_per_core(hwloc_topology_t topology, hwl * one may iterate over the bits of the resulting CPU set with * hwloc_bitmap_foreach_begin(), and find the corresponding PUs * with this function. + * + * \return the PU object, or \c NULL if none matches. */ static __hwloc_inline hwloc_obj_t hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) __hwloc_attribute_pure; @@ -719,6 +772,8 @@ hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) * one may iterate over the bits of the resulting nodeset with * hwloc_bitmap_foreach_begin(), and find the corresponding NUMA nodes * with this function. + * + * \return the NUMA node object, or \c NULL if none matches. */ static __hwloc_inline hwloc_obj_t hwloc_get_numanode_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) __hwloc_attribute_pure; @@ -756,6 +811,8 @@ HWLOC_DECLSPEC unsigned hwloc_get_closest_objs (hwloc_topology_t topology, hwloc * For instance, if type1 is PACKAGE, idx1 is 2, type2 is CORE and idx2 * is 3, return the fourth core object below the third package. * + * \return a matching object if any, \c NULL otherwise. + * * \note This function requires these objects to have a CPU set. */ static __hwloc_inline hwloc_obj_t @@ -789,6 +846,8 @@ hwloc_get_obj_below_by_type (hwloc_topology_t topology, * and idxv contains 0, 1 and 2, return the third core object below * the second package below the first NUMA node. * + * \return a matching object if any, \c NULL otherwise. + * * \note This function requires all these objects and the root object * to have a CPU set. */ @@ -885,6 +944,8 @@ enum hwloc_distrib_flags_e { * * \p flags should be 0 or a OR'ed set of ::hwloc_distrib_flags_e. * + * \return 0 on success, -1 on error. + * * \note This function requires the \p roots objects to have a CPU set. */ static __hwloc_inline int @@ -961,6 +1022,8 @@ hwloc_distrib(hwloc_topology_t topology, * * \return the complete CPU set of processors of the system. * + * \note This function cannot return \c NULL. + * * \note The returned cpuset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * @@ -975,6 +1038,8 @@ hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) __hwloc_attribute_ * provides topology information. This is equivalent to the cpuset of the * system object. * + * \note This function cannot return \c NULL. + * * \note The returned cpuset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * @@ -987,6 +1052,8 @@ hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_ * * \return the CPU set of allowed processors of the system. * + * \note This function cannot return \c NULL. + * * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was not set, * this is identical to hwloc_topology_get_topology_cpuset(), which means * all PUs are allowed. @@ -1006,6 +1073,8 @@ hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology) __hwloc_attribute_p * * \return the complete node set of memory of the system. * + * \note This function cannot return \c NULL. + * * \note The returned nodeset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * @@ -1020,6 +1089,8 @@ hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute * provides topology information. This is equivalent to the nodeset of the * system object. * + * \note This function cannot return \c NULL. + * * \note The returned nodeset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * @@ -1032,6 +1103,8 @@ hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute * * \return the node set of allowed memory of the system. * + * \note This function cannot return \c NULL. + * * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was not set, * this is identical to hwloc_topology_get_topology_nodeset(), which means * all NUMA nodes are allowed. @@ -1066,6 +1139,9 @@ hwloc_topology_get_allowed_nodeset(hwloc_topology_t topology) __hwloc_attribute_ * * Hence the entire topology CPU set is converted into the set of all nodes * that have some local CPUs. + * + * \return 0 on success. + * \return -1 with errno set to \c ENOMEM on internal reallocation failure. */ static __hwloc_inline int hwloc_cpuset_to_nodeset(hwloc_topology_t topology, hwloc_const_cpuset_t _cpuset, hwloc_nodeset_t nodeset) @@ -1090,6 +1166,9 @@ hwloc_cpuset_to_nodeset(hwloc_topology_t topology, hwloc_const_cpuset_t _cpuset, * * Hence the entire topology node set is converted into the set of all CPUs * that have some local NUMA nodes. + * + * \return 0 on success. + * \return -1 with errno set to \c ENOMEM on internal reallocation failure. */ static __hwloc_inline int hwloc_cpuset_from_nodeset(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, hwloc_const_nodeset_t nodeset) @@ -1122,6 +1201,10 @@ hwloc_cpuset_from_nodeset(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, hwl * because it has non-NULL CPU and node sets * and because its locality is the same as \p ioobj. * + * \return a non-I/O object. + * + * \note This function cannot return \c NULL. + * * \note The resulting object is usually a normal object but it could also * be a memory object (e.g. NUMA node) in future platforms if I/O objects * ever get attached to memory instead of CPUs. @@ -1140,6 +1223,8 @@ hwloc_get_non_io_ancestor_obj(hwloc_topology_t topology __hwloc_attribute_unused /** \brief Get the next PCI device in the system. * * \return the first PCI device if \p prev is \c NULL. + * \return the next PCI device if \p prev is not \c NULL. + * \return \c NULL if there is no next PCI device. */ static __hwloc_inline hwloc_obj_t hwloc_get_next_pcidev(hwloc_topology_t topology, hwloc_obj_t prev) @@ -1149,6 +1234,8 @@ hwloc_get_next_pcidev(hwloc_topology_t topology, hwloc_obj_t prev) /** \brief Find the PCI device object matching the PCI bus id * given domain, bus device and function PCI bus id. + * + * \return a matching PCI device object if any, \c NULL otherwise. */ static __hwloc_inline hwloc_obj_t hwloc_get_pcidev_by_busid(hwloc_topology_t topology, @@ -1167,6 +1254,8 @@ hwloc_get_pcidev_by_busid(hwloc_topology_t topology, /** \brief Find the PCI device object matching the PCI bus id * given as a string xxxx:yy:zz.t or yy:zz.t. + * + * \return a matching PCI device object if any, \c NULL otherwise. */ static __hwloc_inline hwloc_obj_t hwloc_get_pcidev_by_busidstring(hwloc_topology_t topology, const char *busid) @@ -1186,6 +1275,8 @@ hwloc_get_pcidev_by_busidstring(hwloc_topology_t topology, const char *busid) /** \brief Get the next OS device in the system. * * \return the first OS device if \p prev is \c NULL. + * \return the next OS device if \p prev is not \c NULL. + * \return \c NULL if there is no next OS device. */ static __hwloc_inline hwloc_obj_t hwloc_get_next_osdev(hwloc_topology_t topology, hwloc_obj_t prev) @@ -1196,6 +1287,8 @@ hwloc_get_next_osdev(hwloc_topology_t topology, hwloc_obj_t prev) /** \brief Get the next bridge in the system. * * \return the first bridge if \p prev is \c NULL. + * \return the next bridge if \p prev is not \c NULL. + * \return \c NULL if there is no next bridge. */ static __hwloc_inline hwloc_obj_t hwloc_get_next_bridge(hwloc_topology_t topology, hwloc_obj_t prev) @@ -1204,6 +1297,8 @@ hwloc_get_next_bridge(hwloc_topology_t topology, hwloc_obj_t prev) } /* \brief Checks whether a given bridge covers a given PCI bus. + * + * \return 1 if it covers, 0 if not. */ static __hwloc_inline int hwloc_bridge_covers_pcibus(hwloc_obj_t bridge, diff --git a/src/3rdparty/hwloc/include/hwloc/levelzero.h b/src/3rdparty/hwloc/include/hwloc/levelzero.h index 4c356fc8..dcdcf1fb 100644 --- a/src/3rdparty/hwloc/include/hwloc/levelzero.h +++ b/src/3rdparty/hwloc/include/hwloc/levelzero.h @@ -1,5 +1,5 @@ /* - * Copyright © 2021 Inria. All rights reserved. + * Copyright © 2021-2023 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -44,8 +44,9 @@ extern "C" { * the Level Zero device \p device. * * Topology \p topology and device \p device must match the local machine. - * The Level Zero must have been initialized with Sysman enabled - * (ZES_ENABLE_SYSMAN=1 in the environment). + * The Level Zero library must have been initialized with Sysman enabled + * (by calling zesInit(0) if supported, + * or by setting ZES_ENABLE_SYSMAN=1 in the environment). * I/O devices detection and the Level Zero component are not needed in the * topology. * @@ -55,6 +56,9 @@ extern "C" { * * This function is currently only implemented in a meaningful way for * Linux; other systems will simply get a full cpuset. + * + * \return 0 on success. + * \return -1 on error, for instance if device information could not be found. */ static __hwloc_inline int hwloc_levelzero_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, diff --git a/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h b/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h index 0e2cc19f..adb05c09 100644 --- a/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h +++ b/src/3rdparty/hwloc/include/hwloc/linux-libnuma.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2023 Inria. All rights reserved. * Copyright © 2009-2010, 2012 Université Bordeaux * See COPYING in top-level directory. */ @@ -50,6 +50,8 @@ extern "C" { * This function may be used before calling set_mempolicy, mbind, migrate_pages * or any other function that takes an array of unsigned long and a maximal * node number as input parameter. + * + * \return 0. */ static __hwloc_inline int hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset, @@ -84,6 +86,8 @@ hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpus * This function may be used before calling set_mempolicy, mbind, migrate_pages * or any other function that takes an array of unsigned long and a maximal * node number as input parameter. + * + * \return 0. */ static __hwloc_inline int hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, @@ -119,6 +123,9 @@ hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nod * This function may be used after calling get_mempolicy or any other function * that takes an array of unsigned long as output parameter (and possibly * a maximal node number as input parameter). + * + * \return 0 on success. + * \return -1 on error, for instance if failing an internal reallocation. */ static __hwloc_inline int hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t cpuset, @@ -130,7 +137,8 @@ hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) if (node->os_index < maxnode && (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8))))) - hwloc_bitmap_or(cpuset, cpuset, node->cpuset); + if (hwloc_bitmap_or(cpuset, cpuset, node->cpuset) < 0) + return -1; return 0; } @@ -142,6 +150,9 @@ hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t * This function may be used after calling get_mempolicy or any other function * that takes an array of unsigned long as output parameter (and possibly * a maximal node number as input parameter). + * + * \return 0 on success. + * \return -1 with errno set to \c ENOMEM if some internal reallocation failed. */ static __hwloc_inline int hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset_t nodeset, @@ -153,7 +164,8 @@ hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) if (node->os_index < maxnode && (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8))))) - hwloc_bitmap_set(nodeset, node->os_index); + if (hwloc_bitmap_set(nodeset, node->os_index) < 0) + return -1; return 0; } @@ -184,7 +196,7 @@ hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset * This function may be used before calling many numa_ functions * that use a struct bitmask as an input parameter. * - * \return newly allocated struct bitmask. + * \return newly allocated struct bitmask, or \c NULL on error. */ static __hwloc_inline struct bitmask * hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset) __hwloc_attribute_malloc; @@ -209,7 +221,7 @@ hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpu * This function may be used before calling many numa_ functions * that use a struct bitmask as an input parameter. * - * \return newly allocated struct bitmask. + * \return newly allocated struct bitmask, or \c NULL on error. */ static __hwloc_inline struct bitmask * hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset) __hwloc_attribute_malloc; @@ -231,6 +243,9 @@ hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_no * * This function may be used after calling many numa_ functions * that use a struct bitmask as an output parameter. + * + * \return 0 on success. + * \return -1 with errno set to \c ENOMEM if some internal reallocation failed. */ static __hwloc_inline int hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_t cpuset, @@ -241,7 +256,8 @@ hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_ hwloc_bitmap_zero(cpuset); while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) if (numa_bitmask_isbitset(bitmask, node->os_index)) - hwloc_bitmap_or(cpuset, cpuset, node->cpuset); + if (hwloc_bitmap_or(cpuset, cpuset, node->cpuset) < 0) + return -1; return 0; } @@ -249,6 +265,9 @@ hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_ * * This function may be used after calling many numa_ functions * that use a struct bitmask as an output parameter. + * + * \return 0 on success. + * \return -1 with errno set to \c ENOMEM if some internal reallocation failed. */ static __hwloc_inline int hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodeset_t nodeset, @@ -259,7 +278,8 @@ hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodese hwloc_bitmap_zero(nodeset); while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) if (numa_bitmask_isbitset(bitmask, node->os_index)) - hwloc_bitmap_set(nodeset, node->os_index); + if (hwloc_bitmap_set(nodeset, node->os_index) < 0) + return -1; return 0; } diff --git a/src/3rdparty/hwloc/include/hwloc/linux.h b/src/3rdparty/hwloc/include/hwloc/linux.h index d76633b0..3f69be74 100644 --- a/src/3rdparty/hwloc/include/hwloc/linux.h +++ b/src/3rdparty/hwloc/include/hwloc/linux.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2021 Inria. All rights reserved. + * Copyright © 2009-2023 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * See COPYING in top-level directory. */ @@ -38,6 +38,8 @@ extern "C" { * The behavior is exactly the same as the Linux sched_setaffinity system call, * but uses a hwloc cpuset. * + * \return 0 on success, -1 on error. + * * \note This is equivalent to calling hwloc_set_proc_cpubind() with * HWLOC_CPUBIND_THREAD as flags. */ @@ -52,6 +54,8 @@ HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t * The behavior is exactly the same as the Linux sched_getaffinity system call, * but uses a hwloc cpuset. * + * \return 0 on success, -1 on error. + * * \note This is equivalent to calling hwloc_get_proc_cpubind() with * ::HWLOC_CPUBIND_THREAD as flags. */ @@ -62,6 +66,8 @@ HWLOC_DECLSPEC int hwloc_linux_get_tid_cpubind(hwloc_topology_t topology, pid_t * The CPU-set \p set (previously allocated by the caller) * is filled with the PU which the thread last ran on. * + * \return 0 on success, -1 on error. + * * \note This is equivalent to calling hwloc_get_proc_last_cpu_location() with * ::HWLOC_CPUBIND_THREAD as flags. */ @@ -72,6 +78,8 @@ HWLOC_DECLSPEC int hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topolo * Might be used when reading CPU set from sysfs attributes such as topology * and caches for processors, or local_cpus for devices. * + * \return 0 on success, -1 on error. + * * \note This function ignores the HWLOC_FSROOT environment variable. */ HWLOC_DECLSPEC int hwloc_linux_read_path_as_cpumask(const char *path, hwloc_bitmap_t set); diff --git a/src/3rdparty/hwloc/include/hwloc/memattrs.h b/src/3rdparty/hwloc/include/hwloc/memattrs.h index acf4da53..10332b8e 100644 --- a/src/3rdparty/hwloc/include/hwloc/memattrs.h +++ b/src/3rdparty/hwloc/include/hwloc/memattrs.h @@ -1,5 +1,5 @@ /* - * Copyright © 2019-2022 Inria. All rights reserved. + * Copyright © 2019-2023 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -54,6 +54,10 @@ extern "C" { * Attribute values for these nodes, if any, may then be obtained with * hwloc_memattr_get_value() and manually compared with the desired criteria. * + * Memory attributes are also used internally to build Memory Tiers which provide + * an easy way to distinguish NUMA nodes of different kinds, as explained + * in \ref heteromem. + * * \sa An example is available in doc/examples/memory-attributes.c in the source tree. * * \note The API also supports specific objects as initiator, @@ -178,6 +182,9 @@ enum hwloc_memattr_id_e { typedef unsigned hwloc_memattr_id_t; /** \brief Return the identifier of the memory attribute with the given name. + * + * \return 0 on success. + * \return -1 with errno set to \c EINVAL if no such attribute exists. */ HWLOC_DECLSPEC int hwloc_memattr_get_by_name(hwloc_topology_t topology, @@ -247,6 +254,8 @@ enum hwloc_local_numanode_flag_e { * or the number of nodes that would have been stored if there were * enough room. * + * \return 0 on success or -1 on error. + * * \note Some of these NUMA nodes may not have any memory attribute * values and hence not be reported as actual targets in other functions. * @@ -276,6 +285,10 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology, * * \p flags must be \c 0 for now. * + * \return 0 on success. + * \return -1 on error, for instance with errno set to \c EINVAL if flags + * are invalid or no such attribute exists. + * * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET * when refering to accesses performed by CPU cores. * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, @@ -307,7 +320,10 @@ hwloc_memattr_get_value(hwloc_topology_t topology, * * \p flags must be \c 0 for now. * - * If there are no matching targets, \c -1 is returned with \p errno set to \c ENOENT; + * \return 0 on success. + * \return -1 with errno set to \c ENOENT if there are no matching targets. + * \return -1 with errno set to \c EINVAL if flags are invalid, + * or no such attribute exists. * * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET * when refering to accesses performed by CPU cores. @@ -323,10 +339,6 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology, hwloc_obj_t *best_target, hwloc_uint64_t *value); /** \brief Return the best initiator for the given attribute and target NUMA node. - * - * If the attribute does not relate to a specific initiator - * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), - * \c -1 is returned and \p errno is set to \c EINVAL. * * If \p value is non \c NULL, the corresponding value is returned there. * @@ -342,7 +354,10 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology, * * \p flags must be \c 0 for now. * - * If there are no matching initiators, \c -1 is returned with \p errno set to \c ENOENT; + * \return 0 on success. + * \return -1 with errno set to \c ENOENT if there are no matching initiators. + * \return -1 with errno set to \c EINVAL if the attribute does not relate to a specific initiator + * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR). */ HWLOC_DECLSPEC int hwloc_memattr_get_best_initiator(hwloc_topology_t topology, @@ -359,6 +374,9 @@ hwloc_memattr_get_best_initiator(hwloc_topology_t topology, */ /** \brief Return the name of a memory attribute. + * + * \return 0 on success. + * \return -1 with errno set to \c EINVAL if the attribute does not exist. */ HWLOC_DECLSPEC int hwloc_memattr_get_name(hwloc_topology_t topology, @@ -368,6 +386,9 @@ hwloc_memattr_get_name(hwloc_topology_t topology, /** \brief Return the flags of the given attribute. * * Flags are a OR'ed set of ::hwloc_memattr_flag_e. + * + * \return 0 on success. + * \return -1 with errno set to \c EINVAL if the attribute does not exist. */ HWLOC_DECLSPEC int hwloc_memattr_get_flags(hwloc_topology_t topology, @@ -397,6 +418,9 @@ enum hwloc_memattr_flag_e { * Add a specific memory attribute that is not defined in ::hwloc_memattr_id_e. * Flags are a OR'ed set of ::hwloc_memattr_flag_e. It must contain at least * one of ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST or ::HWLOC_MEMATTR_FLAG_LOWER_FIRST. + * + * \return 0 on success. + * \return -1 with errno set to \c EBUSY if another attribute already uses this name. */ HWLOC_DECLSPEC int hwloc_memattr_register(hwloc_topology_t topology, @@ -421,6 +445,8 @@ hwloc_memattr_register(hwloc_topology_t topology, * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, * but users may for instance use it to provide custom information about * host memory accesses performed by GPUs. + * + * \return 0 on success or -1 on error. */ HWLOC_DECLSPEC int hwloc_memattr_set_value(hwloc_topology_t topology, @@ -460,6 +486,8 @@ hwloc_memattr_set_value(hwloc_topology_t topology, * NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute * values. * + * \return 0 on success or -1 on error. + * * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET * when referring to accesses performed by CPU cores. * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, @@ -497,6 +525,8 @@ hwloc_memattr_get_targets(hwloc_topology_t topology, * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), * no initiator is returned. * + * \return 0 on success or -1 on error. + * * \note This function is meant for tools and debugging (listing internal information) * rather than for application queries. Applications should rather select useful * NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute diff --git a/src/3rdparty/hwloc/include/hwloc/nvml.h b/src/3rdparty/hwloc/include/hwloc/nvml.h index 57f36a85..558a97d0 100644 --- a/src/3rdparty/hwloc/include/hwloc/nvml.h +++ b/src/3rdparty/hwloc/include/hwloc/nvml.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2021 Inria. All rights reserved. + * Copyright © 2012-2023 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -51,6 +51,9 @@ extern "C" { * * This function is currently only implemented in a meaningful way for * Linux; other systems will simply get a full cpuset. + * + * \return 0 on success. + * \return -1 on error, for instance if device information could not be found. */ static __hwloc_inline int hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, diff --git a/src/3rdparty/hwloc/include/hwloc/opencl.h b/src/3rdparty/hwloc/include/hwloc/opencl.h index 395b32e3..9810504e 100644 --- a/src/3rdparty/hwloc/include/hwloc/opencl.h +++ b/src/3rdparty/hwloc/include/hwloc/opencl.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2021 Inria. All rights reserved. + * Copyright © 2012-2023 Inria. All rights reserved. * Copyright © 2013, 2018 Université Bordeaux. All right reserved. * See COPYING in top-level directory. */ @@ -69,6 +69,9 @@ typedef union { /** \brief Return the domain, bus and device IDs of the OpenCL device \p device. * * Device \p device must match the local machine. + * + * \return 0 on success. + * \return -1 on error, for instance if device information could not be found. */ static __hwloc_inline int hwloc_opencl_get_device_pci_busid(cl_device_id device, @@ -126,6 +129,9 @@ hwloc_opencl_get_device_pci_busid(cl_device_id device, * This function is currently only implemented in a meaningful way for * Linux with the AMD or NVIDIA OpenCL implementation; other systems will simply * get a full cpuset. + * + * \return 0 on success. + * \return -1 on error, for instance if the device could not be found. */ static __hwloc_inline int hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, diff --git a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h index 7cee137e..875b12a9 100644 --- a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h +++ b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2021 Inria. All rights reserved. + * Copyright © 2009-2023 Inria. All rights reserved. * Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -57,6 +57,9 @@ extern "C" { * * This function is currently only implemented in a meaningful way for * Linux; other systems will simply get a full cpuset. + * + * \return 0 on success. + * \return -1 on error, for instance if device information could not be found. */ static __hwloc_inline int hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, diff --git a/src/3rdparty/hwloc/include/hwloc/plugins.h b/src/3rdparty/hwloc/include/hwloc/plugins.h index d7abb02c..f3db648c 100644 --- a/src/3rdparty/hwloc/include/hwloc/plugins.h +++ b/src/3rdparty/hwloc/include/hwloc/plugins.h @@ -164,7 +164,7 @@ struct hwloc_disc_status { */ unsigned excluded_phases; - /** \brief OR'ed set of hwloc_disc_status_flag_e */ + /** \brief OR'ed set of ::hwloc_disc_status_flag_e */ unsigned long flags; }; diff --git a/src/3rdparty/hwloc/include/hwloc/rename.h b/src/3rdparty/hwloc/include/hwloc/rename.h index 279ecd84..d5687b69 100644 --- a/src/3rdparty/hwloc/include/hwloc/rename.h +++ b/src/3rdparty/hwloc/include/hwloc/rename.h @@ -176,6 +176,7 @@ extern "C" { #define hwloc_topology_insert_misc_object HWLOC_NAME(topology_insert_misc_object) #define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object) +#define hwloc_topology_free_group_object HWLOC_NAME(topology_free_group_object) #define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object) #define hwloc_obj_add_other_obj_sets HWLOC_NAME(obj_add_other_obj_sets) #define hwloc_topology_refresh HWLOC_NAME(topology_refresh) diff --git a/src/3rdparty/hwloc/include/hwloc/rsmi.h b/src/3rdparty/hwloc/include/hwloc/rsmi.h index 55aa1272..34db3267 100644 --- a/src/3rdparty/hwloc/include/hwloc/rsmi.h +++ b/src/3rdparty/hwloc/include/hwloc/rsmi.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2021 Inria. All rights reserved. + * Copyright © 2012-2023 Inria. All rights reserved. * Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved. * Written by Advanced Micro Devices, * See COPYING in top-level directory. @@ -55,6 +55,9 @@ extern "C" { * * This function is currently only implemented in a meaningful way for * Linux; other systems will simply get a full cpuset. + * + * \return 0 on success. + * \return -1 on error, for instance if device information could not be found. */ static __hwloc_inline int hwloc_rsmi_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, diff --git a/src/3rdparty/hwloc/include/hwloc/shmem.h b/src/3rdparty/hwloc/include/hwloc/shmem.h index 86f57b4f..c91e0d8e 100644 --- a/src/3rdparty/hwloc/include/hwloc/shmem.h +++ b/src/3rdparty/hwloc/include/hwloc/shmem.h @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2018 Inria. All rights reserved. + * Copyright © 2013-2023 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -48,6 +48,8 @@ extern "C" { * This length (in bytes) must be used in hwloc_shmem_topology_write() * and hwloc_shmem_topology_adopt() later. * + * \return the length, or -1 on error, for instance if flags are invalid. + * * \note Flags \p flags are currently unused, must be 0. */ HWLOC_DECLSPEC int hwloc_shmem_topology_get_length(hwloc_topology_t topology, @@ -74,9 +76,10 @@ HWLOC_DECLSPEC int hwloc_shmem_topology_get_length(hwloc_topology_t topology, * is not. However the caller may also allocate it manually in shared memory * to share it as well. * - * \return -1 with errno set to EBUSY if the virtual memory mapping defined + * \return 0 on success. + * \return -1 with errno set to \c EBUSY if the virtual memory mapping defined * by \p mmap_address and \p length isn't available in the process. - * \return -1 with errno set to EINVAL if \p fileoffset, \p mmap_address + * \return -1 with errno set to \c EINVAL if \p fileoffset, \p mmap_address * or \p length aren't page-aligned. */ HWLOC_DECLSPEC int hwloc_shmem_topology_write(hwloc_topology_t topology, @@ -112,14 +115,16 @@ HWLOC_DECLSPEC int hwloc_shmem_topology_write(hwloc_topology_t topology, * * \note This function takes care of calling hwloc_topology_abi_check(). * - * \return -1 with errno set to EBUSY if the virtual memory mapping defined + * \return 0 on success. + * + * \return -1 with errno set to \c EBUSY if the virtual memory mapping defined * by \p mmap_address and \p length isn't available in the process. * - * \return -1 with errno set to EINVAL if \p fileoffset, \p mmap_address + * \return -1 with errno set to \c EINVAL if \p fileoffset, \p mmap_address * or \p length aren't page-aligned, or do not match what was given to * hwloc_shmem_topology_write() earlier. * - * \return -1 with errno set to EINVAL if the layout of the topology structure + * \return -1 with errno set to \c EINVAL if the layout of the topology structure * is different between the writer process and the adopter process. */ HWLOC_DECLSPEC int hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp, diff --git a/src/3rdparty/hwloc/include/private/netloc.h b/src/3rdparty/hwloc/include/private/netloc.h deleted file mode 100644 index c070c54c..00000000 --- a/src/3rdparty/hwloc/include/private/netloc.h +++ /dev/null @@ -1,578 +0,0 @@ -/* - * Copyright © 2014 Cisco Systems, Inc. All rights reserved. - * Copyright © 2013-2014 University of Wisconsin-La Crosse. - * All rights reserved. - * Copyright © 2015-2017 Inria. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * See COPYING in top-level directory. - * - * $HEADER$ - */ - -#ifndef _NETLOC_PRIVATE_H_ -#define _NETLOC_PRIVATE_H_ - -#include -#include -#include -#include -#include - -#define NETLOCFILE_VERSION 1 - -#ifdef NETLOC_SCOTCH -#include -#include -#define NETLOC_int SCOTCH_Num -#else -#define NETLOC_int int -#endif - -/* - * "Import" a few things from hwloc - */ -#define __netloc_attribute_unused __hwloc_attribute_unused -#define __netloc_attribute_malloc __hwloc_attribute_malloc -#define __netloc_attribute_const __hwloc_attribute_const -#define __netloc_attribute_pure __hwloc_attribute_pure -#define __netloc_attribute_deprecated __hwloc_attribute_deprecated -#define __netloc_attribute_may_alias __hwloc_attribute_may_alias -#define NETLOC_DECLSPEC HWLOC_DECLSPEC - - -/********************************************************************** - * Types - **********************************************************************/ - -/** - * Definitions for Comparators - * \sa These are the return values from the following functions: - * netloc_network_compare, netloc_dt_edge_t_compare, netloc_dt_node_t_compare - */ -typedef enum { - NETLOC_CMP_SAME = 0, /**< Compared as the Same */ - NETLOC_CMP_SIMILAR = -1, /**< Compared as Similar, but not the Same */ - NETLOC_CMP_DIFF = -2 /**< Compared as Different */ -} netloc_compare_type_t; - -/** - * Enumerated type for the various types of supported networks - */ -typedef enum { - NETLOC_NETWORK_TYPE_ETHERNET = 1, /**< Ethernet network */ - NETLOC_NETWORK_TYPE_INFINIBAND = 2, /**< InfiniBand network */ - NETLOC_NETWORK_TYPE_INVALID = 3 /**< Invalid network */ -} netloc_network_type_t; - -/** - * Enumerated type for the various types of supported topologies - */ -typedef enum { - NETLOC_TOPOLOGY_TYPE_INVALID = -1, /**< Invalid */ - NETLOC_TOPOLOGY_TYPE_TREE = 1, /**< Tree */ -} netloc_topology_type_t; - -/** - * Enumerated type for the various types of nodes - */ -typedef enum { - NETLOC_NODE_TYPE_HOST = 0, /**< Host (a.k.a., network addressable endpoint - e.g., MAC Address) node */ - NETLOC_NODE_TYPE_SWITCH = 1, /**< Switch node */ - NETLOC_NODE_TYPE_INVALID = 2 /**< Invalid node */ -} netloc_node_type_t; - -typedef enum { - NETLOC_ARCH_TREE = 0, /* Fat tree */ -} netloc_arch_type_t; - - -/* Pre declarations to avoid inter dependency problems */ -/** \cond IGNORE */ -struct netloc_topology_t; -typedef struct netloc_topology_t netloc_topology_t; -struct netloc_node_t; -typedef struct netloc_node_t netloc_node_t; -struct netloc_edge_t; -typedef struct netloc_edge_t netloc_edge_t; -struct netloc_physical_link_t; -typedef struct netloc_physical_link_t netloc_physical_link_t; -struct netloc_path_t; -typedef struct netloc_path_t netloc_path_t; - -struct netloc_arch_tree_t; -typedef struct netloc_arch_tree_t netloc_arch_tree_t; -struct netloc_arch_node_t; -typedef struct netloc_arch_node_t netloc_arch_node_t; -struct netloc_arch_node_slot_t; -typedef struct netloc_arch_node_slot_t netloc_arch_node_slot_t; -struct netloc_arch_t; -typedef struct netloc_arch_t netloc_arch_t; -/** \endcond */ - -/** - * \struct netloc_topology_t - * \brief Netloc Topology Context - * - * An opaque data structure used to reference a network topology. - * - * \note Must be initialized with \ref netloc_topology_construct() - */ -struct netloc_topology_t { - /** Topology path */ - char *topopath; - /** Subnet ID */ - char *subnet_id; - - /** Node List */ - netloc_node_t *nodes; /* Hash table of nodes by physical_id */ - netloc_node_t *nodesByHostname; /* Hash table of nodes by hostname */ - - netloc_physical_link_t *physical_links; /* Hash table with physcial links */ - - /** Partition List */ - UT_array *partitions; - - /** Hwloc topology List */ - char *hwlocpath; - UT_array *topos; - hwloc_topology_t *hwloc_topos; - - /** Type of the graph */ - netloc_topology_type_t type; -}; - -/** - * \brief Netloc Node Type - * - * Represents the concept of a node (a.k.a., vertex, endpoint) within a network - * graph. This could be a server or a network switch. The \ref node_type parameter - * will distinguish the exact type of node this represents in the graph. - */ -struct netloc_node_t { - UT_hash_handle hh; /* makes this structure hashable with physical_id */ - UT_hash_handle hh2; /* makes this structure hashable with hostname */ - - /** Physical ID of the node */ - char physical_id[20]; - - /** Logical ID of the node (if any) */ - int logical_id; - - /** Type of the node */ - netloc_node_type_t type; - - /* Pointer to physical_links */ - UT_array *physical_links; - - /** Description information from discovery (if any) */ - char *description; - - /** - * Application-given private data pointer. - * Initialized to NULL, and not used by the netloc library. - */ - void * userdata; - - /** Outgoing edges from this node */ - netloc_edge_t *edges; - - UT_array *subnodes; /* the group of nodes for the virtual nodes */ - - netloc_path_t *paths; - - char *hostname; - - UT_array *partitions; /* index in the list from the topology */ - - hwloc_topology_t hwlocTopo; - int hwlocTopoIdx; -}; - -/** - * \brief Netloc Edge Type - * - * Represents the concept of a directed edge within a network graph. - * - * \note We do not point to the netloc_node_t structure directly to - * simplify the representation, and allow the information to more easily - * be entered into the data store without circular references. - * \todo JJH Is the note above still true? - */ -struct netloc_edge_t { - UT_hash_handle hh; /* makes this structure hashable */ - - netloc_node_t *dest; - - int id; - - /** Pointers to the parent node */ - netloc_node_t *node; - - /* Pointer to physical_links */ - UT_array *physical_links; - - /** total gbits of the links */ - float total_gbits; - - UT_array *partitions; /* index in the list from the topology */ - - UT_array *subnode_edges; /* for edges going to virtual nodes */ - - struct netloc_edge_t *other_way; - - /** - * Application-given private data pointer. - * Initialized to NULL, and not used by the netloc library. - */ - void * userdata; -}; - - -struct netloc_physical_link_t { - UT_hash_handle hh; /* makes this structure hashable */ - - int id; // TODO long long - netloc_node_t *src; - netloc_node_t *dest; - int ports[2]; - char *width; - char *speed; - - netloc_edge_t *edge; - - int other_way_id; - struct netloc_physical_link_t *other_way; - - UT_array *partitions; /* index in the list from the topology */ - - /** gbits of the link from speed and width */ - float gbits; - - /** Description information from discovery (if any) */ - char *description; -}; - -struct netloc_path_t { - UT_hash_handle hh; /* makes this structure hashable */ - char dest_id[20]; - UT_array *links; -}; - - -/********************************************************************** - * Architecture structures - **********************************************************************/ -struct netloc_arch_tree_t { - NETLOC_int num_levels; - NETLOC_int *degrees; - NETLOC_int *cost; -}; - -struct netloc_arch_node_t { - UT_hash_handle hh; /* makes this structure hashable */ - char *name; /* Hash key */ - netloc_node_t *node; /* Corresponding node */ - int idx_in_topo; /* idx with ghost hosts to have complete topo */ - int num_slots; /* it is not the real number of slots but the maximum slot idx */ - int *slot_idx; /* corresponding idx in slot_tree */ - int *slot_os_idx; /* corresponding os index for each leaf in tree */ - netloc_arch_tree_t *slot_tree; /* Tree built from hwloc */ - int num_current_slots; /* Number of PUs */ - NETLOC_int *current_slots; /* indices in the complete tree */ - int *slot_ranks; /* corresponding MPI rank for each leaf in tree */ -}; - -struct netloc_arch_node_slot_t { - netloc_arch_node_t *node; - int slot; -}; - -struct netloc_arch_t { - netloc_topology_t *topology; - int has_slots; /* if slots are included in the architecture */ - netloc_arch_type_t type; - union { - netloc_arch_tree_t *node_tree; - netloc_arch_tree_t *global_tree; - } arch; - netloc_arch_node_t *nodes_by_name; - netloc_arch_node_slot_t *node_slot_by_idx; /* node_slot by index in complete topo */ - NETLOC_int num_current_hosts; /* if has_slots, host is a slot, else host is a node */ - NETLOC_int *current_hosts; /* indices in the complete topology */ -}; - -/********************************************************************** - * Topology Functions - **********************************************************************/ -/** - * Allocate a topology handle. - * - * User is responsible for calling \ref netloc_detach on the topology handle. - * The network parameter information is deep copied into the topology handle, so the - * user may destruct the network handle after calling this function and/or reuse - * the network handle. - * - * \returns NETLOC_SUCCESS on success - * \returns NETLOC_ERROR upon an error. - */ -netloc_topology_t *netloc_topology_construct(char *path); - -/** - * Destruct a topology handle - * - * \param topology A valid pointer to a \ref netloc_topology_t handle created - * from a prior call to \ref netloc_topology_construct. - * - * \returns NETLOC_SUCCESS on success - * \returns NETLOC_ERROR upon an error. - */ -int netloc_topology_destruct(netloc_topology_t *topology); - -int netloc_topology_find_partition_idx(netloc_topology_t *topology, char *partition_name); - -int netloc_topology_read_hwloc(netloc_topology_t *topology, int num_nodes, - netloc_node_t **node_list); - -#define netloc_topology_iter_partitions(topology,partition) \ - for ((partition) = (char **)utarray_front(topology->partitions); \ - (partition) != NULL; \ - (partition) = (char **)utarray_next(topology->partitions, partition)) - -#define netloc_topology_iter_hwloctopos(topology,hwloctopo) \ - for ((hwloctopo) = (char **)utarray_front(topology->topos); \ - (hwloctopo) != NULL; \ - (hwloctopo) = (char **)utarray_next(topology->topos, hwloctopo)) - -#define netloc_topology_find_node(topology,node_id,node) \ - HASH_FIND_STR(topology->nodes, node_id, node) - -#define netloc_topology_iter_nodes(topology,node,_tmp) \ - HASH_ITER(hh, topology->nodes, node, _tmp) - -#define netloc_topology_num_nodes(topology) \ - HASH_COUNT(topology->nodes) - -/*************************************************/ - - -/** - * Constructor for netloc_node_t - * - * User is responsible for calling the destructor on the handle. - * - * Returns - * A newly allocated pointer to the network information. - */ -netloc_node_t *netloc_node_construct(void); - -/** - * Destructor for netloc_node_t - * - * \param node A valid node handle - * - * Returns - * NETLOC_SUCCESS on success - * NETLOC_ERROR on error - */ -int netloc_node_destruct(netloc_node_t *node); - -char *netloc_node_pretty_print(netloc_node_t* node); - -#define netloc_node_get_num_subnodes(node) \ - utarray_len((node)->subnodes) - -#define netloc_node_get_subnode(node,i) \ - (*(netloc_node_t **)utarray_eltptr((node)->subnodes, (i))) - -#define netloc_node_get_num_edges(node) \ - utarray_len((node)->edges) - -#define netloc_node_get_edge(node,i) \ - (*(netloc_edge_t **)utarray_eltptr((node)->edges, (i))) - -#define netloc_node_iter_edges(node,edge,_tmp) \ - HASH_ITER(hh, node->edges, edge, _tmp) - -#define netloc_node_iter_paths(node,path,_tmp) \ - HASH_ITER(hh, node->paths, path, _tmp) - -#define netloc_node_is_host(node) \ - (node->type == NETLOC_NODE_TYPE_HOST) - -#define netloc_node_is_switch(node) \ - (node->type == NETLOC_NODE_TYPE_SWITCH) - -#define netloc_node_iter_paths(node, path,_tmp) \ - HASH_ITER(hh, node->paths, path, _tmp) - -int netloc_node_is_in_partition(netloc_node_t *node, int partition); - -/*************************************************/ - - -/** - * Constructor for netloc_edge_t - * - * User is responsible for calling the destructor on the handle. - * - * Returns - * A newly allocated pointer to the edge information. - */ -netloc_edge_t *netloc_edge_construct(void); - -/** - * Destructor for netloc_edge_t - * - * \param edge A valid edge handle - * - * Returns - * NETLOC_SUCCESS on success - * NETLOC_ERROR on error - */ -int netloc_edge_destruct(netloc_edge_t *edge); - -char * netloc_edge_pretty_print(netloc_edge_t* edge); - -void netloc_edge_reset_uid(void); - -int netloc_edge_is_in_partition(netloc_edge_t *edge, int partition); - -#define netloc_edge_get_num_links(edge) \ - utarray_len((edge)->physical_links) - -#define netloc_edge_get_link(edge,i) \ - (*(netloc_physical_link_t **)utarray_eltptr((edge)->physical_links, (i))) - -#define netloc_edge_get_num_subedges(edge) \ - utarray_len((edge)->subnode_edges) - -#define netloc_edge_get_subedge(edge,i) \ - (*(netloc_edge_t **)utarray_eltptr((edge)->subnode_edges, (i))) - -/*************************************************/ - - -/** - * Constructor for netloc_physical_link_t - * - * User is responsible for calling the destructor on the handle. - * - * Returns - * A newly allocated pointer to the physical link information. - */ -netloc_physical_link_t * netloc_physical_link_construct(void); - -/** - * Destructor for netloc_physical_link_t - * - * Returns - * NETLOC_SUCCESS on success - * NETLOC_ERROR on error - */ -int netloc_physical_link_destruct(netloc_physical_link_t *link); - -char * netloc_link_pretty_print(netloc_physical_link_t* link); - -/*************************************************/ - - -netloc_path_t *netloc_path_construct(void); -int netloc_path_destruct(netloc_path_t *path); - - -/********************************************************************** - * Architecture functions - **********************************************************************/ - -netloc_arch_t * netloc_arch_construct(void); - -int netloc_arch_destruct(netloc_arch_t *arch); - -int netloc_arch_build(netloc_arch_t *arch, int add_slots); - -int netloc_arch_set_current_resources(netloc_arch_t *arch); - -int netloc_arch_set_global_resources(netloc_arch_t *arch); - -int netloc_arch_node_get_hwloc_info(netloc_arch_node_t *arch); - -void netloc_arch_tree_complete(netloc_arch_tree_t *tree, UT_array **down_degrees_by_level, - int num_hosts, int **parch_idx); - -NETLOC_int netloc_arch_tree_num_leaves(netloc_arch_tree_t *tree); - - -/********************************************************************** - * Access functions of various elements of the topology - **********************************************************************/ - -#define netloc_get_num_partitions(object) \ - utarray_len((object)->partitions) - -#define netloc_get_partition(object,i) \ - (*(int *)utarray_eltptr((object)->partitions, (i))) - - -#define netloc_path_iter_links(path,link) \ - for ((link) = (netloc_physical_link_t **)utarray_front(path->links); \ - (link) != NULL; \ - (link) = (netloc_physical_link_t **)utarray_next(path->links, link)) - -/********************************************************************** - * Misc functions - **********************************************************************/ - -/** - * Decode the network type - * - * \param net_type A valid member of the \ref netloc_network_type_t type - * - * \returns NULL if the type is invalid - * \returns A string for that \ref netloc_network_type_t type - */ -static inline const char * netloc_network_type_decode(netloc_network_type_t net_type) { - if( NETLOC_NETWORK_TYPE_ETHERNET == net_type ) { - return "ETH"; - } - else if( NETLOC_NETWORK_TYPE_INFINIBAND == net_type ) { - return "IB"; - } - else { - return NULL; - } -} - -/** - * Decode the node type - * - * \param node_type A valid member of the \ref netloc_node_type_t type - * - * \returns NULL if the type is invalid - * \returns A string for that \ref netloc_node_type_t type - */ -static inline const char * netloc_node_type_decode(netloc_node_type_t node_type) { - if( NETLOC_NODE_TYPE_SWITCH == node_type ) { - return "SW"; - } - else if( NETLOC_NODE_TYPE_HOST == node_type ) { - return "CA"; - } - else { - return NULL; - } -} - -ssize_t netloc_line_get(char **lineptr, size_t *n, FILE *stream); - -char *netloc_line_get_next_token(char **string, char c); - -int netloc_build_comm_mat(char *filename, int *pn, double ***pmat); - -#define STRDUP_IF_NOT_NULL(str) (NULL == str ? NULL : strdup(str)) -#define STR_EMPTY_IF_NULL(str) (NULL == str ? "" : str) - - -#endif // _NETLOC_PRIVATE_H_ diff --git a/src/3rdparty/hwloc/include/private/private.h b/src/3rdparty/hwloc/include/private/private.h index c61acb71..3e3f71d9 100644 --- a/src/3rdparty/hwloc/include/private/private.h +++ b/src/3rdparty/hwloc/include/private/private.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2022 Inria. All rights reserved. + * Copyright © 2009-2023 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * @@ -245,6 +245,12 @@ struct hwloc_topology { * temporary variables during discovery */ + /* set to 1 at the beginning of load() if the filter of any cpu cache type (L1 to L3i) is not NONE, + * may be checked by backends before querying caches + * (when they don't know the level of caches they are querying). + */ + int want_some_cpu_caches; + /* machine-wide memory. * temporarily stored there by OSes that only provide this without NUMA information, * and actually used later by the core. @@ -420,7 +426,7 @@ extern void hwloc_internal_memattrs_need_refresh(hwloc_topology_t topology); extern void hwloc_internal_memattrs_refresh(hwloc_topology_t topology); extern int hwloc_internal_memattrs_dup(hwloc_topology_t new, hwloc_topology_t old); extern int hwloc_internal_memattr_set_value(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_type_t target_type, hwloc_uint64_t target_gp_index, unsigned target_os_index, struct hwloc_internal_location_s *initiator, hwloc_uint64_t value); -extern int hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology); +extern int hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology, int force_subtype); extern void hwloc_internal_cpukinds_init(hwloc_topology_t topology); extern int hwloc_internal_cpukinds_rank(hwloc_topology_t topology); @@ -477,6 +483,7 @@ extern char * hwloc_progname(struct hwloc_topology *topology); #define HWLOC_GROUP_KIND_INTEL_DIE 104 /* no subkind */ #define HWLOC_GROUP_KIND_S390_BOOK 110 /* subkind 0 is book, subkind 1 is drawer (group of books) */ #define HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT 120 /* no subkind */ +#define HWLOC_GROUP_KIND_AMD_COMPLEX 121 /* no subkind */ /* then, OS-specific groups */ #define HWLOC_GROUP_KIND_SOLARIS_PG_HW_PERF 200 /* subkind is group width */ #define HWLOC_GROUP_KIND_AIX_SDL_UNKNOWN 210 /* subkind is SDL level */ diff --git a/src/3rdparty/hwloc/include/private/xml.h b/src/3rdparty/hwloc/include/private/xml.h index 3af5ba1e..49e80565 100644 --- a/src/3rdparty/hwloc/include/private/xml.h +++ b/src/3rdparty/hwloc/include/private/xml.h @@ -19,13 +19,14 @@ HWLOC_DECLSPEC int hwloc__xml_verbose(void); typedef struct hwloc__xml_import_state_s { struct hwloc__xml_import_state_s *parent; - /* globals shared because the entire stack of states during import */ + /* globals shared between the entire stack of states during import */ struct hwloc_xml_backend_data_s *global; /* opaque data used to store backend-specific data. * statically allocated to allow stack-allocation by the common code without knowing actual backend needs. + * libxml is 3 ptrs. nolibxml is 3 ptr + one int. */ - char data[32]; + char data[4 * SIZEOF_VOID_P]; } * hwloc__xml_import_state_t; struct hwloc__xml_imported_v1distances_s { @@ -74,8 +75,9 @@ typedef struct hwloc__xml_export_state_s { /* opaque data used to store backend-specific data. * statically allocated to allow stack-allocation by the common code without knowing actual backend needs. + * libxml is 1 ptr. nolibxml is 1 ptr + 2 size_t + 3 ints. */ - char data[40]; + char data[6 * SIZEOF_VOID_P]; } * hwloc__xml_export_state_t; HWLOC_DECLSPEC void hwloc__xml_export_topology(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, unsigned long flags); diff --git a/src/3rdparty/hwloc/src/components.c b/src/3rdparty/hwloc/src/components.c index b0381c83..003995a6 100644 --- a/src/3rdparty/hwloc/src/components.c +++ b/src/3rdparty/hwloc/src/components.c @@ -94,8 +94,7 @@ static hwloc_dlhandle hwloc_dlopenext(const char *_filename) { hwloc_dlhandle handle; char *filename = NULL; - (void) asprintf(&filename, "%s.so", _filename); - if (!filename) + if (asprintf(&filename, "%s.so", _filename) < 0) return NULL; handle = dlopen(filename, RTLD_NOW|RTLD_LOCAL); free(filename); diff --git a/src/3rdparty/hwloc/src/diff.c b/src/3rdparty/hwloc/src/diff.c index 81e12c55..361fa524 100644 --- a/src/3rdparty/hwloc/src/diff.c +++ b/src/3rdparty/hwloc/src/diff.c @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2022 Inria. All rights reserved. + * Copyright © 2013-2023 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -411,6 +411,30 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1, } } + if (!err) { + /* cpukinds */ + if (topo1->nr_cpukinds != topo2->nr_cpukinds) + goto roottoocomplex; + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *ic1 = &topo1->cpukinds[i]; + struct hwloc_internal_cpukind_s *ic2 = &topo2->cpukinds[i]; + unsigned j; + if (!hwloc_bitmap_isequal(ic1->cpuset, ic2->cpuset) + || ic1->efficiency != ic2->efficiency + || ic1->forced_efficiency != ic2->forced_efficiency + || ic1->ranking_value != ic2->ranking_value + || ic1->nr_infos != ic2->nr_infos) + goto roottoocomplex; + for(j=0; jnr_infos; j++) { + struct hwloc_info_s *info1 = &ic1->infos[j], *info2 = &ic2->infos[j]; + if (strcmp(info1->name, info2->name) + || strcmp(info1->value, info2->value)) { + goto roottoocomplex; + } + } + } + } + return err; roottoocomplex: diff --git a/src/3rdparty/hwloc/src/memattrs.c b/src/3rdparty/hwloc/src/memattrs.c index b27ed3ec..ab945471 100644 --- a/src/3rdparty/hwloc/src/memattrs.c +++ b/src/3rdparty/hwloc/src/memattrs.c @@ -1,5 +1,5 @@ /* - * Copyright © 2020-2022 Inria. All rights reserved. + * Copyright © 2020-2023 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -1219,24 +1219,82 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology, * Using memattrs to identify HBM/DRAM */ +enum hwloc_memory_tier_type_e { + /* WARNING: keep higher BW types first for compare_tiers_by_bw_and_type() when BW info is missing */ + HWLOC_MEMORY_TIER_HBM = 1UL<<0, + HWLOC_MEMORY_TIER_DRAM = 1UL<<1, + HWLOC_MEMORY_TIER_GPU = 1UL<<2, + HWLOC_MEMORY_TIER_SPM = 1UL<<3, /* Specific-Purpose Memory is usually HBM, we'll use BW to confirm or force*/ + HWLOC_MEMORY_TIER_NVM = 1UL<<4, + HWLOC_MEMORY_TIER_CXL = 1UL<<5 +}; +typedef unsigned long hwloc_memory_tier_type_t; +#define HWLOC_MEMORY_TIER_UNKNOWN 0UL + +static const char * hwloc_memory_tier_type_snprintf(hwloc_memory_tier_type_t type) +{ + switch (type) { + case HWLOC_MEMORY_TIER_DRAM: return "DRAM"; + case HWLOC_MEMORY_TIER_HBM: return "HBM"; + case HWLOC_MEMORY_TIER_GPU: return "GPUMemory"; + case HWLOC_MEMORY_TIER_SPM: return "SPM"; + case HWLOC_MEMORY_TIER_NVM: return "NVM"; + case HWLOC_MEMORY_TIER_CXL: + case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_DRAM: return "CXL-DRAM"; + case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_HBM: return "CXL-HBM"; + case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_GPU: return "CXL-GPUMemory"; + case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_SPM: return "CXL-SPM"; + case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_NVM: return "CXL-NVM"; + default: return NULL; + } +} + +static hwloc_memory_tier_type_t hwloc_memory_tier_type_sscanf(const char *name) +{ + if (!strcasecmp(name, "DRAM")) + return HWLOC_MEMORY_TIER_DRAM; + if (!strcasecmp(name, "HBM")) + return HWLOC_MEMORY_TIER_HBM; + if (!strcasecmp(name, "GPUMemory")) + return HWLOC_MEMORY_TIER_GPU; + if (!strcasecmp(name, "SPM")) + return HWLOC_MEMORY_TIER_SPM; + if (!strcasecmp(name, "NVM")) + return HWLOC_MEMORY_TIER_NVM; + if (!strcasecmp(name, "CXL-DRAM")) + return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_DRAM; + if (!strcasecmp(name, "CXL-HBM")) + return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_HBM; + if (!strcasecmp(name, "CXL-GPUMemory")) + return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_GPU; + if (!strcasecmp(name, "CXL-SPM")) + return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_SPM; + if (!strcasecmp(name, "CXL-NVM")) + return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_NVM; + return 0; +} + +/* factorized tier, grouping multiple nodes */ struct hwloc_memory_tier_s { - hwloc_obj_t node; - uint64_t local_bw; - enum hwloc_memory_tier_type_e { - /* warning the order is important for guess_memory_tiers() after qsort() */ - HWLOC_MEMORY_TIER_UNKNOWN, - HWLOC_MEMORY_TIER_DRAM, - HWLOC_MEMORY_TIER_HBM, - HWLOC_MEMORY_TIER_SPM, /* Specific-Purpose Memory is usually HBM, we'll use BW to confirm */ - HWLOC_MEMORY_TIER_NVM, - HWLOC_MEMORY_TIER_GPU, - } type; + hwloc_nodeset_t nodeset; + uint64_t local_bw_min, local_bw_max; + uint64_t local_lat_min, local_lat_max; + hwloc_memory_tier_type_t type; }; -static int compare_tiers(const void *_a, const void *_b) +/* early tier discovery, one entry per node */ +struct hwloc_memory_node_info_s { + hwloc_obj_t node; + uint64_t local_bw; + uint64_t local_lat; + hwloc_memory_tier_type_t type; + unsigned rank; +}; + +static int compare_node_infos_by_type_and_bw(const void *_a, const void *_b) { - const struct hwloc_memory_tier_s *a = _a, *b = _b; - /* sort by type of tier first */ + const struct hwloc_memory_node_info_s *a = _a, *b = _b; + /* sort by type of node first */ if (a->type != b->type) return a->type - b->type; /* then by bandwidth */ @@ -1247,180 +1305,560 @@ static int compare_tiers(const void *_a, const void *_b) return 0; } -int -hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology) +static int compare_tiers_by_bw_and_type(const void *_a, const void *_b) { - struct hwloc_internal_memattr_s *imattr; - struct hwloc_memory_tier_s *tiers; - unsigned i, j, n; - const char *env; - int spm_is_hbm = -1; /* -1 will guess from BW, 0 no, 1 forced */ - int mark_dram = 1; - unsigned first_spm, first_nvm; - hwloc_uint64_t max_unknown_bw, min_spm_bw; - - env = getenv("HWLOC_MEMTIERS_GUESS"); - if (env) { - if (!strcmp(env, "none")) { - return 0; - } else if (!strcmp(env, "default")) { - /* nothing */ - } else if (!strcmp(env, "spm_is_hbm")) { - hwloc_debug("Assuming SPM-tier is HBM, ignore bandwidth\n"); - spm_is_hbm = 1; - } else if (HWLOC_SHOW_CRITICAL_ERRORS()) { - fprintf(stderr, "hwloc: Failed to recognize HWLOC_MEMTIERS_GUESS value %s\n", env); - } + const struct hwloc_memory_tier_s *a = _a, *b = _b; + /* sort by (average) BW first */ + if (a->local_bw_min && b->local_bw_min) { + if (a->local_bw_min + a->local_bw_max > b->local_bw_min + b->local_bw_max) + return -1; + else if (a->local_bw_min + a->local_bw_max < b->local_bw_min + b->local_bw_max) + return 1; } + /* then by tier type */ + if (a->type != b->type) + return a->type - b->type; + return 0; +} - imattr = &topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH]; - - if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) - hwloc__imattr_refresh(topology, imattr); +static struct hwloc_memory_tier_s * +hwloc__group_memory_tiers(hwloc_topology_t topology, + unsigned *nr_tiers_p) +{ + struct hwloc_internal_memattr_s *imattr_bw, *imattr_lat; + struct hwloc_memory_node_info_s *nodeinfos; + struct hwloc_memory_tier_s *tiers; + unsigned nr_tiers; + float bw_threshold = 0.1; + float lat_threshold = 0.1; + const char *env; + unsigned i, j, n; n = hwloc_get_nbobjs_by_depth(topology, HWLOC_TYPE_DEPTH_NUMANODE); assert(n); - tiers = malloc(n * sizeof(*tiers)); - if (!tiers) - return -1; + env = getenv("HWLOC_MEMTIERS_BANDWIDTH_THRESHOLD"); + if (env) + bw_threshold = atof(env); + + env = getenv("HWLOC_MEMTIERS_LATENCY_THRESHOLD"); + if (env) + lat_threshold = atof(env); + + imattr_bw = &topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH]; + imattr_lat = &topology->memattrs[HWLOC_MEMATTR_ID_LATENCY]; + + if (!(imattr_bw->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + hwloc__imattr_refresh(topology, imattr_bw); + if (!(imattr_lat->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + hwloc__imattr_refresh(topology, imattr_lat); + + nodeinfos = malloc(n * sizeof(*nodeinfos)); + if (!nodeinfos) + return NULL; for(i=0; isubtype && !strcmp(node->subtype, "GPUMemory")) - tiers[i].type = HWLOC_MEMORY_TIER_GPU; + nodeinfos[i].type = HWLOC_MEMORY_TIER_GPU; + else if (daxtype && !strcmp(daxtype, "NVM")) + nodeinfos[i].type = HWLOC_MEMORY_TIER_NVM; + else if (daxtype && !strcmp(daxtype, "SPM")) + nodeinfos[i].type = HWLOC_MEMORY_TIER_SPM; + /* add CXL flag */ + if (hwloc_obj_get_info_by_name(node, "CXLDevice") != NULL) { + /* CXL is always SPM for now. HBM and DRAM not possible here yet. + * Hence remove all but NVM first. + */ + nodeinfos[i].type &= HWLOC_MEMORY_TIER_NVM; + nodeinfos[i].type |= HWLOC_MEMORY_TIER_CXL; + } - if (spm_is_hbm == -1) { - for(j=0; jnr_targets; j++) - if (imattr->targets[j].obj == node) { - imtg = &imattr->targets[j]; - break; - } - if (imtg && !hwloc_bitmap_iszero(node->cpuset)) { - iloc.type = HWLOC_LOCATION_TYPE_CPUSET; - iloc.location.cpuset = node->cpuset; - imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0); - if (imi) - tiers[i].local_bw = imi->value; + /* get local bandwidth */ + imtg = NULL; + for(j=0; jnr_targets; j++) + if (imattr_bw->targets[j].obj == node) { + imtg = &imattr_bw->targets[j]; + break; + } + if (imtg && !hwloc_bitmap_iszero(node->cpuset)) { + struct hwloc_internal_memattr_initiator_s *imi; + iloc.type = HWLOC_LOCATION_TYPE_CPUSET; + iloc.location.cpuset = node->cpuset; + imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0); + if (imi) + nodeinfos[i].local_bw = imi->value; + } + /* get local latency */ + imtg = NULL; + for(j=0; jnr_targets; j++) + if (imattr_lat->targets[j].obj == node) { + imtg = &imattr_lat->targets[j]; + break; + } + if (imtg && !hwloc_bitmap_iszero(node->cpuset)) { + struct hwloc_internal_memattr_initiator_s *imi; + iloc.type = HWLOC_LOCATION_TYPE_CPUSET; + iloc.location.cpuset = node->cpuset; + imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0); + if (imi) + nodeinfos[i].local_lat = imi->value; + } + } + + /* Sort nodes. + * We could also sort by the existing subtype. + * KNL is the only case where subtypes are set in backends, but we set memattrs as well there. + * Also HWLOC_MEMTIERS_REFRESH would be a special value to ignore existing subtypes. + */ + hwloc_debug("Sorting memory node infos...\n"); + qsort(nodeinfos, n, sizeof(*nodeinfos), compare_node_infos_by_type_and_bw); +#ifdef HWLOC_DEBUG + for(i=0; ilogical_index, nodeinfos[i].node->os_index, + nodeinfos[i].type, + (unsigned long long) nodeinfos[i].local_bw, + (unsigned long long) nodeinfos[i].local_lat); +#endif + /* now we have UNKNOWN nodes (sorted by BW only), then known ones */ + + /* iterate among them and add a rank value. + * start from rank 0 and switch to next rank when the type changes or when the BW or latendy difference is > threshold */ + hwloc_debug("Starting memory tier #0 and iterating over nodes...\n"); + nodeinfos[0].rank = 0; + for(i=1; ilogical_index, nodeinfos[i].node->os_index); + nodeinfos[i].rank++; + continue; + } + /* comparing bandwidth */ + if (nodeinfos[i].local_bw && nodeinfos[i-1].local_bw) { + float bw_ratio = (float)nodeinfos[i].local_bw/(float)nodeinfos[i-1].local_bw; + if (bw_ratio < 1.) + bw_ratio = 1./bw_ratio; + if (bw_ratio > 1.0 + bw_threshold) { + nodeinfos[i].rank++; + hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of bandwidth\n", + nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index); + continue; + } + } + /* comparing latency */ + if (nodeinfos[i].local_lat && nodeinfos[i-1].local_lat) { + float lat_ratio = (float)nodeinfos[i].local_lat/(float)nodeinfos[i-1].local_lat; + if (lat_ratio < 1.) + lat_ratio = 1./lat_ratio; + if (lat_ratio > 1.0 + lat_threshold) { + hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of latency\n", + nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index); + nodeinfos[i].rank++; + continue; } } } + /* FIXME: if there are cpuset-intersecting nodes in same tier, split again? */ + hwloc_debug(" Found %u tiers total\n", nodeinfos[n-1].rank + 1); - /* sort tiers */ - qsort(tiers, n, sizeof(*tiers), compare_tiers); - hwloc_debug("Sorting memory tiers...\n"); - for(i=0; ilogical_index, tiers[i].node->os_index, - tiers[i].type, (unsigned long long) tiers[i].local_bw); - - /* now we have UNKNOWN tiers (sorted by BW), then SPM tiers (sorted by BW), then NVM, then GPU */ - - /* iterate over UNKNOWN tiers, and find their BW */ + /* now group nodeinfos into factorized tiers */ + nr_tiers = nodeinfos[n-1].rank + 1; + tiers = calloc(nr_tiers, sizeof(*tiers)); + if (!tiers) + goto out_with_nodeinfos; + for(i=0; i HWLOC_MEMORY_TIER_UNKNOWN) - break; - } - first_spm = i; - /* get max BW from first */ - if (first_spm > 0) - max_unknown_bw = tiers[0].local_bw; - else - max_unknown_bw = 0; - - /* there are no DRAM or HBM tiers yet */ - - /* iterate over SPM tiers, and find their BW */ - for(i=first_spm; i HWLOC_MEMORY_TIER_SPM) - break; - } - first_nvm = i; - /* get min BW from last */ - if (first_nvm > first_spm) - min_spm_bw = tiers[first_nvm-1].local_bw; - else - min_spm_bw = 0; - - /* FIXME: if there's more than 10% between some sets of nodes inside a tier, split it? */ - /* FIXME: if there are cpuset-intersecting nodes in same tier, abort? */ - - if (spm_is_hbm == -1) { - /* if we have BW for all SPM and UNKNOWN - * and all SPM BW are 2x superior to all UNKNOWN BW - */ - hwloc_debug("UNKNOWN-memory-tier max bandwidth %llu\n", (unsigned long long) max_unknown_bw); - hwloc_debug("SPM-memory-tier min bandwidth %llu\n", (unsigned long long) min_spm_bw); - if (max_unknown_bw > 0 && min_spm_bw > 0 && max_unknown_bw*2 < min_spm_bw) { - hwloc_debug("assuming SPM means HBM and !SPM means DRAM since bandwidths are very different\n"); - spm_is_hbm = 1; - } else { - hwloc_debug("cannot assume SPM means HBM\n"); - spm_is_hbm = 0; - } + unsigned rank = nodeinfos[i].rank; + assert(rank < nr_tiers); + hwloc_bitmap_set(tiers[rank].nodeset, nodeinfos[i].node->os_index); + assert(tiers[rank].type == HWLOC_MEMORY_TIER_UNKNOWN + || tiers[rank].type == nodeinfos[i].type); + tiers[rank].type = nodeinfos[i].type; + /* nodeinfos are sorted in BW order, no need to compare */ + if (!tiers[rank].local_bw_min) + tiers[rank].local_bw_min = nodeinfos[i].local_bw; + tiers[rank].local_bw_max = nodeinfos[i].local_bw; + /* compare latencies to update min/max */ + if (!tiers[rank].local_lat_min || nodeinfos[i].local_lat < tiers[rank].local_lat_min) + tiers[rank].local_lat_min = nodeinfos[i].local_lat; + if (!tiers[rank].local_lat_max || nodeinfos[i].local_lat > tiers[rank].local_lat_max) + tiers[rank].local_lat_max = nodeinfos[i].local_lat; } - if (spm_is_hbm) { - for(i=0; isubtype) /* don't overwrite the existing subtype */ - continue; - switch (tiers[i].type) { - case HWLOC_MEMORY_TIER_DRAM: - if (mark_dram) - type = "DRAM"; - break; - case HWLOC_MEMORY_TIER_HBM: - type = "HBM"; - break; - case HWLOC_MEMORY_TIER_SPM: - type = "SPM"; - break; - case HWLOC_MEMORY_TIER_NVM: - type = "NVM"; - break; - default: - /* GPU memory is already marked with subtype="GPUMemory", - * UNKNOWN doesn't deserve any subtype - */ - break; - } - if (type) { - hwloc_debug("Marking node L#%u P#%u as %s\n", tiers[i].node->logical_index, tiers[i].node->os_index, type); - tiers[i].node->subtype = strdup(type); - } - } + free(nodeinfos); + *nr_tiers_p = nr_tiers; + return tiers; + out_with_tiers: + for(i=0; ilocal_bw_min || !tier2->local_bw_min) { + hwloc_debug(" Missing BW info\n"); + return -1; + } + + /* reorder tiers by BW */ + if (tier1->local_bw_min > tier2->local_bw_min) { + tmp = tier1; tier1 = tier2; tier2 = tmp; + } + /* tier1 < tier2 */ + + hwloc_debug(" tier1 BW %llu-%llu vs tier2 BW %llu-%llu\n", + (unsigned long long) tier1->local_bw_min, + (unsigned long long) tier1->local_bw_max, + (unsigned long long) tier2->local_bw_min, + (unsigned long long) tier2->local_bw_max); + if (tier2->local_bw_min <= tier1->local_bw_max * 2) { + /* tier2 BW isn't 2x tier1, we cannot guess HBM */ + hwloc_debug(" BW difference isn't >2x\n"); + return -1; + } + /* tier2 BW is >2x tier1 */ + + if ((flags & HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM) + && hwloc_bitmap_isset(tier2->nodeset, 0)) { + /* node0 is not DRAM, and we assume that's not possible */ + hwloc_debug(" node0 shouldn't have HBM BW\n"); + return -1; + } + + /* assume tier1 == DRAM and tier2 == HBM */ + tier1->type = HWLOC_MEMORY_TIER_DRAM; + tier2->type = HWLOC_MEMORY_TIER_HBM; + hwloc_debug(" Success\n"); + return 0; +} + +static int +hwloc__guess_memory_tiers_types(hwloc_topology_t topology __hwloc_attribute_unused, + unsigned nr_tiers, + struct hwloc_memory_tier_s *tiers) +{ + unsigned long flags; + const char *env; + unsigned nr_unknown, nr_spm; + struct hwloc_memory_tier_s *unknown_tier[2], *spm_tier; + unsigned i; + + flags = 0; + env = getenv("HWLOC_MEMTIERS_GUESS"); + if (env) { + if (!strcmp(env, "none")) + return 0; + /* by default, we don't guess anything unsure */ + if (!strcmp(env, "all")) + /* enable all typical cases */ + flags = ~0UL; + if (strstr(env, "spm_is_hbm")) { + hwloc_debug("Assuming SPM-tier is HBM, ignore bandwidth\n"); + flags |= HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM; + } + if (strstr(env, "node0_is_dram")) { + hwloc_debug("Assuming node0 is DRAM\n"); + flags |= HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM; + } + } + + if (nr_tiers == 1) + /* Likely DRAM only, but could also be HBM-only in non-SPM mode. + * We cannot be sure, but it doesn't matter since there's a single tier. + */ + return 0; + + nr_unknown = nr_spm = 0; + unknown_tier[0] = unknown_tier[1] = spm_tier = NULL; + for(i=0; i DRAM or HBM? HBM won't be SPM on HBM-only CPUs + * unknown + CXL DRAM => DRAM or HBM? + */ + if (nr_unknown == 2 && !nr_spm) { + /* 2 unknown, could be DRAM + non-SPM HBM */ + hwloc_debug(" Trying to guess 2 unknown tiers using BW\n"); + hwloc__guess_dram_hbm_tiers(unknown_tier[0], unknown_tier[1], flags); + } else if (nr_unknown == 1 && nr_spm == 1) { + /* 1 unknown + 1 SPM, could be DRAM + SPM HBM */ + hwloc_debug(" Trying to guess 1 unknown + 1 SPM tiers using BW\n"); + hwloc__guess_dram_hbm_tiers(unknown_tier[0], spm_tier, flags); + } + + if (flags & HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM) { + /* force mark SPM as HBM */ + for(i=0; ios_index)) { + const char *subtype = hwloc_memory_tier_type_snprintf(tiers[j].type); + if (!node->subtype || force) { /* don't overwrite the existing subtype unless forced */ + if (subtype) { /* don't set a subtype for unknown tiers */ + hwloc_debug(" marking node L#%u P#%u as %s (was %s)\n", node->logical_index, node->os_index, subtype, node->subtype); + free(node->subtype); + node->subtype = strdup(subtype); + } + } else + hwloc_debug(" node L#%u P#%u already marked as %s, not setting %s\n", + node->logical_index, node->os_index, node->subtype, subtype); + if (nr_tiers > 1) { + char tmp[20]; + snprintf(tmp, sizeof(tmp), "%u", j); + hwloc__add_info_nodup(&node->infos, &node->infos_count, "MemoryTier", tmp, 1); + } + break; /* each node is in a single tier */ + } + } + } +} + +int +hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology, int force_subtype) +{ + struct hwloc_memory_tier_s *tiers; + unsigned nr_tiers; + unsigned i; + const char *env; + + env = getenv("HWLOC_MEMTIERS"); + if (env) { + if (!strcmp(env, "none")) + goto out; + tiers = hwloc__force_memory_tiers(topology, &nr_tiers, env); + if (tiers) { + assert(nr_tiers > 0); + force_subtype = 1; + goto ready; + } + } + + tiers = hwloc__group_memory_tiers(topology, &nr_tiers); + if (!tiers) + goto out; + + hwloc__guess_memory_tiers_types(topology, nr_tiers, tiers); + + /* sort tiers by BW first, then by type */ + hwloc_debug("Sorting memory tiers...\n"); + qsort(tiers, nr_tiers, sizeof(*tiers), compare_tiers_by_bw_and_type); + + ready: +#ifdef HWLOC_DEBUG + for(i=0; itype; + + if (type == HWLOC_OBJ_GROUP) { + if (sattr->depth == (unsigned)-1) + sattr->depth = type_count[HWLOC_OBJ_GROUP]--; + + } else if (hwloc__obj_type_is_cache(type)) { + if (!sattr->memorysize) { + if (1 == sattr->depth) + /* 32KiB in L1 */ + sattr->memorysize = 32*1024; + else + /* *4 at each level, starting from 1MiB for L2, unified */ + sattr->memorysize = 256ULL*1024 << (2*sattr->depth); + } + + } else if (type == HWLOC_OBJ_NUMANODE && !sattr->memorysize) { + /* 1GiB in memory nodes. */ + sattr->memorysize = 1024*1024*1024; + } +} + /* frees level until arity = 0 */ static void hwloc_synthetic_free_levels(struct hwloc_synthetic_backend_data_s *data) @@ -465,6 +494,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, data->level[0].indexes.string = NULL; data->level[0].indexes.array = NULL; data->level[0].attr.memorysize = 0; + data->level[0].attr.memorysidecachesize = 0; data->level[0].attached = NULL; type_count[HWLOC_OBJ_MACHINE] = 1; if (*description == '(') { @@ -514,6 +544,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, if (attached) { attached->attr.type = type; attached->attr.memorysize = 0; + attached->attr.memorysidecachesize = 0; /* attached->attr.depth and .cachetype unused */ attached->next = NULL; pprev = &data->level[count-1].attached; @@ -601,7 +632,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, } if (!item) { if (verbose) - fprintf(stderr,"Synthetic string with disallow 0 number of objects at '%s'\n", pos); + fprintf(stderr,"Synthetic string with disallowed 0 number of objects at '%s'\n", pos); errno = EINVAL; goto error; } @@ -611,6 +642,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, data->level[count].indexes.string = NULL; data->level[count].indexes.array = NULL; data->level[count].attr.memorysize = 0; + data->level[count].attr.memorysidecachesize = 0; if (*next_pos == '(') { err = hwloc_synthetic_parse_attrs(next_pos+1, &next_pos, &data->level[count].attr, &data->level[count].indexes, verbose); if (err < 0) @@ -796,6 +828,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, data->level[1].indexes.string = NULL; data->level[1].indexes.array = NULL; data->level[1].attr.memorysize = 0; + data->level[1].attr.memorysidecachesize = 0; data->level[1].totalwidth = data->level[0].totalwidth; /* update arity to insert a single NUMA node per parent */ data->level[1].arity = data->level[0].arity; @@ -803,30 +836,14 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, count++; } + /* set default attributes that depend on the depth/hierarchy of levels */ for (i=0; ilevel[i]; - hwloc_obj_type_t type = curlevel->attr.type; - - if (type == HWLOC_OBJ_GROUP) { - if (curlevel->attr.depth == (unsigned)-1) - curlevel->attr.depth = type_count[HWLOC_OBJ_GROUP]--; - - } else if (hwloc__obj_type_is_cache(type)) { - if (!curlevel->attr.memorysize) { - if (1 == curlevel->attr.depth) - /* 32KiB in L1 */ - curlevel->attr.memorysize = 32*1024; - else - /* *4 at each level, starting from 1MiB for L2, unified */ - curlevel->attr.memorysize = 256ULL*1024 << (2*curlevel->attr.depth); - } - - } else if (type == HWLOC_OBJ_NUMANODE && !curlevel->attr.memorysize) { - /* 1GiB in memory nodes. */ - curlevel->attr.memorysize = 1024*1024*1024; - } - - hwloc_synthetic_process_indexes(data, &data->level[i].indexes, data->level[i].totalwidth, verbose); + hwloc_synthetic_set_default_attrs(&curlevel->attr, type_count); + for(attached = curlevel->attached; attached != NULL; attached = attached->next) + hwloc_synthetic_set_default_attrs(&attached->attr, type_count); + hwloc_synthetic_process_indexes(data, &curlevel->indexes, curlevel->totalwidth, verbose); } hwloc_synthetic_process_indexes(data, &data->numa_attached_indexes, data->numa_attached_nr, verbose); @@ -859,6 +876,12 @@ hwloc_synthetic_set_attr(struct hwloc_synthetic_attr_s *sattr, obj->attr->numanode.page_types[0].size = 4096; obj->attr->numanode.page_types[0].count = sattr->memorysize / 4096; break; + case HWLOC_OBJ_MEMCACHE: + obj->attr->cache.depth = 1; + obj->attr->cache.linesize = 64; + obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; + obj->attr->cache.size = sattr->memorysidecachesize; + break; case HWLOC_OBJ_PACKAGE: case HWLOC_OBJ_DIE: break; @@ -926,6 +949,14 @@ hwloc_synthetic_insert_attached(struct hwloc_topology *topology, hwloc__insert_object_by_cpuset(topology, NULL, child, "synthetic:attached"); + if (attached->attr.memorysidecachesize) { + hwloc_obj_t mscachechild = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MEMCACHE, HWLOC_UNKNOWN_INDEX); + mscachechild->cpuset = hwloc_bitmap_dup(set); + mscachechild->nodeset = hwloc_bitmap_dup(child->nodeset); + hwloc_synthetic_set_attr(&attached->attr, mscachechild); + hwloc__insert_object_by_cpuset(topology, NULL, mscachechild, "synthetic:attached:mscache"); + } + hwloc_synthetic_insert_attached(topology, data, attached->next, set); } @@ -977,6 +1008,14 @@ hwloc__look_synthetic(struct hwloc_topology *topology, hwloc_synthetic_set_attr(&curlevel->attr, obj); hwloc__insert_object_by_cpuset(topology, NULL, obj, "synthetic"); + + if (type == HWLOC_OBJ_NUMANODE && curlevel->attr.memorysidecachesize) { + hwloc_obj_t mscachechild = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MEMCACHE, HWLOC_UNKNOWN_INDEX); + mscachechild->cpuset = hwloc_bitmap_dup(set); + mscachechild->nodeset = hwloc_bitmap_dup(obj->nodeset); + hwloc_synthetic_set_attr(&curlevel->attr, mscachechild); + hwloc__insert_object_by_cpuset(topology, NULL, mscachechild, "synthetic:mscache"); + } } hwloc_synthetic_insert_attached(topology, data, curlevel->attached, set); @@ -1217,6 +1256,7 @@ hwloc__export_synthetic_indexes(hwloc_obj_t *level, unsigned total, static int hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology, + unsigned long flags, hwloc_obj_t obj, char *buffer, size_t buflen) { @@ -1224,6 +1264,7 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology, const char * prefix = "("; char cachesize[64] = ""; char memsize[64] = ""; + char memorysidecachesize[64] = ""; int needindexes = 0; if (hwloc__obj_type_is_cache(obj->type) && obj->attr->cache.size) { @@ -1236,6 +1277,19 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology, prefix, (unsigned long long) obj->attr->numanode.local_memory); prefix = separator; } + if (obj->type == HWLOC_OBJ_NUMANODE && !(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1)) { + hwloc_obj_t memorysidecache = obj->parent; + hwloc_uint64_t size = 0; + while (memorysidecache && memorysidecache->type == HWLOC_OBJ_MEMCACHE) { + size += memorysidecache->attr->cache.size; + memorysidecache = memorysidecache->parent; + } + if (size) { + snprintf(memorysidecachesize, sizeof(memorysidecachesize), "%smemorysidecachesize=%llu", + prefix, (unsigned long long) size); + prefix = separator; + } + } if (!obj->logical_index /* only display indexes once per level (not for non-first NUMA children, etc.) */ && (obj->type == HWLOC_OBJ_PU || obj->type == HWLOC_OBJ_NUMANODE)) { hwloc_obj_t cur = obj; @@ -1247,12 +1301,12 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology, cur = cur->next_cousin; } } - if (*cachesize || *memsize || needindexes) { + if (*cachesize || *memsize || *memorysidecachesize || needindexes) { ssize_t tmplen = buflen; char *tmp = buffer; int res, ret = 0; - res = hwloc_snprintf(tmp, tmplen, "%s%s%s", cachesize, memsize, needindexes ? "" : ")"); + res = hwloc_snprintf(tmp, tmplen, "%s%s%s%s", cachesize, memsize, memorysidecachesize, needindexes ? "" : ")"); if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) return -1; @@ -1326,7 +1380,7 @@ hwloc__export_synthetic_obj(struct hwloc_topology * topology, unsigned long flag if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) { /* obj attributes */ - res = hwloc__export_synthetic_obj_attr(topology, obj, tmp, tmplen); + res = hwloc__export_synthetic_obj_attr(topology, flags, obj, tmp, tmplen); if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) return -1; } @@ -1351,7 +1405,7 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) { /* v1: export a single NUMA child */ - if (parent->memory_arity > 1 || mchild->type != HWLOC_OBJ_NUMANODE) { + if (parent->memory_arity > 1) { /* not supported */ if (verbose) fprintf(stderr, "Cannot export to synthetic v1 if multiple memory children are attached to the same location.\n"); @@ -1362,6 +1416,9 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign if (needprefix) hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ' '); + /* ignore memcaches and export the NUMA node */ + while (mchild->type != HWLOC_OBJ_NUMANODE) + mchild = mchild->memory_first_child; res = hwloc__export_synthetic_obj(topology, flags, mchild, 1, tmp, tmplen); if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) return -1; @@ -1369,16 +1426,25 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign } while (mchild) { - /* FIXME: really recurse to export memcaches and numanode, + /* The core doesn't support shared memcache for now (because ACPI and Linux don't). + * So, for each mchild here, recurse only in the first children at each level. + * + * FIXME: whenever supported by the core, really recurse to export memcaches and numanode, * but it requires clever parsing of [ memcache [numa] [numa] ] during import, * better attaching of things to describe the hierarchy. */ hwloc_obj_t numanode = mchild; - /* only export the first NUMA node leaf of each memory child - * FIXME: This assumes mscache aren't shared between nodes, that's true in current platforms + /* Only export the first NUMA node leaf of each memory child. + * Memcaches are ignored here, they will be summed and exported as a single attribute + * of the NUMA node in hwloc__export_synthetic_obj(). */ while (numanode && numanode->type != HWLOC_OBJ_NUMANODE) { - assert(numanode->arity == 1); + if (verbose && numanode->memory_arity > 1) { + static int warned = 0; + if (!warned) + fprintf(stderr, "Ignoring non-first memory children at non-first level of memory hierarchy.\n"); + warned = 1; + } numanode = numanode->memory_first_child; } assert(numanode); /* there's always a numanode at the bottom of the memory tree */ @@ -1511,17 +1577,21 @@ hwloc_topology_export_synthetic(struct hwloc_topology * topology, if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) { /* v1 requires all NUMA at the same level */ - hwloc_obj_t node; + hwloc_obj_t node, parent; signed pdepth; node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0); assert(node); - assert(hwloc__obj_type_is_normal(node->parent->type)); /* only depth-1 memory children for now */ - pdepth = node->parent->depth; + parent = node->parent; + while (!hwloc__obj_type_is_normal(parent->type)) + parent = parent->parent; + pdepth = parent->depth; while ((node = node->next_cousin) != NULL) { - assert(hwloc__obj_type_is_normal(node->parent->type)); /* only depth-1 memory children for now */ - if (node->parent->depth != pdepth) { + parent = node->parent; + while (!hwloc__obj_type_is_normal(parent->type)) + parent = parent->parent; + if (parent->depth != pdepth) { if (verbose) fprintf(stderr, "Cannot export to synthetic v1 if memory is attached to parents at different depths.\n"); errno = EINVAL; @@ -1534,7 +1604,7 @@ hwloc_topology_export_synthetic(struct hwloc_topology * topology, if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) { /* obj attributes */ - res = hwloc__export_synthetic_obj_attr(topology, obj, tmp, tmplen); + res = hwloc__export_synthetic_obj_attr(topology, flags, obj, tmp, tmplen); if (res > 0) needprefix = 1; if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) diff --git a/src/3rdparty/hwloc/src/topology-windows.c b/src/3rdparty/hwloc/src/topology-windows.c index 20b617a9..e187bb12 100644 --- a/src/3rdparty/hwloc/src/topology-windows.c +++ b/src/3rdparty/hwloc/src/topology-windows.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2022 Inria. All rights reserved. + * Copyright © 2009-2023 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -367,7 +367,7 @@ hwloc_win_get_processor_groups(void) if (nr_processor_groups > 1 && SIZEOF_VOID_P == 4) { if (HWLOC_SHOW_ALL_ERRORS()) - fprintf(stderr, "hwloc: multiple processor groups found on 32bits Windows, topology may be invalid/incomplete.\n"); + fprintf(stderr, "hwloc/windows: multiple processor groups found on 32bits Windows, topology may be invalid/incomplete.\n"); } length = 0; @@ -987,7 +987,11 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta OSVERSIONINFOEX osvi; char versionstr[20]; char hostname[122] = ""; - unsigned hostname_size = sizeof(hostname); +#if !defined(__CYGWIN__) + DWORD hostname_size = sizeof(hostname); +#else + size_t hostname_size = sizeof(hostname); +#endif int has_efficiencyclass = 0; struct hwloc_win_efficiency_classes eclasses; char *env = getenv("HWLOC_WINDOWS_PROCESSOR_GROUP_OBJS"); @@ -1051,12 +1055,16 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta unsigned efficiency_class = 0; GROUP_AFFINITY *GroupMask; - /* Ignore unknown caches */ - if (procInfo->Relationship == RelationCache - && procInfo->Cache.Type != CacheUnified - && procInfo->Cache.Type != CacheData - && procInfo->Cache.Type != CacheInstruction) - continue; + if (procInfo->Relationship == RelationCache) { + if (!topology->want_some_cpu_caches) + /* TODO: check if RelationAll&~RelationCache works? */ + continue; + if (procInfo->Cache.Type != CacheUnified + && procInfo->Cache.Type != CacheData + && procInfo->Cache.Type != CacheInstruction) + /* Ignore unknown caches */ + continue; + } id = HWLOC_UNKNOWN_INDEX; switch (procInfo->Relationship) { diff --git a/src/3rdparty/hwloc/src/topology-x86.c b/src/3rdparty/hwloc/src/topology-x86.c index a1558f07..7aabd168 100644 --- a/src/3rdparty/hwloc/src/topology-x86.c +++ b/src/3rdparty/hwloc/src/topology-x86.c @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2022 Inria. All rights reserved. + * Copyright © 2010-2023 Inria. All rights reserved. * Copyright © 2010-2013 Université Bordeaux * Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -38,6 +38,12 @@ struct hwloc_x86_backend_data_s { int apicid_unique; char *src_cpuiddump_path; int is_knl; + int is_hybrid; + int found_die_ids; + int found_complex_ids; + int found_unit_ids; + int found_module_ids; + int found_tile_ids; }; /************************************ @@ -80,7 +86,7 @@ cpuiddump_read(const char *dirpath, unsigned idx) cpuiddump = malloc(sizeof(*cpuiddump)); if (!cpuiddump) { - fprintf(stderr, "Failed to allocate cpuiddump for PU #%u, ignoring cpuiddump.\n", idx); + fprintf(stderr, "hwloc/x86: Failed to allocate cpuiddump for PU #%u, ignoring cpuiddump.\n", idx); goto out; } @@ -91,7 +97,7 @@ cpuiddump_read(const char *dirpath, unsigned idx) snprintf(filename, filenamelen, "%s/pu%u", dirpath, idx); file = fopen(filename, "r"); if (!file) { - fprintf(stderr, "Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename); + fprintf(stderr, "hwloc/x86: Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename); goto out_with_filename; } @@ -100,7 +106,7 @@ cpuiddump_read(const char *dirpath, unsigned idx) nr++; cpuiddump->entries = malloc(nr * sizeof(struct cpuiddump_entry)); if (!cpuiddump->entries) { - fprintf(stderr, "Failed to allocate %u cpuiddump entries for PU #%u, ignoring cpuiddump.\n", nr, idx); + fprintf(stderr, "hwloc/x86: Failed to allocate %u cpuiddump entries for PU #%u, ignoring cpuiddump.\n", nr, idx); goto out_with_file; } @@ -156,7 +162,7 @@ cpuiddump_find_by_input(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *e return; } - fprintf(stderr, "Couldn't find %x,%x,%x,%x in dumped cpuid, returning 0s.\n", + fprintf(stderr, "hwloc/x86: Couldn't find %x,%x,%x,%x in dumped cpuid, returning 0s.\n", *eax, *ebx, *ecx, *edx); *eax = 0; *ebx = 0; @@ -210,7 +216,8 @@ struct procinfo { #define TILE 4 #define MODULE 5 #define DIE 6 -#define HWLOC_X86_PROCINFO_ID_NR 7 +#define COMPLEX 7 +#define HWLOC_X86_PROCINFO_ID_NR 8 unsigned ids[HWLOC_X86_PROCINFO_ID_NR]; unsigned *otherids; unsigned levels; @@ -314,7 +321,7 @@ static void read_amd_caches_topoext(struct procinfo *infos, struct cpuiddump *sr /* the code below doesn't want any other cache yet */ assert(!infos->numcaches); - for (cachenum = 0; ; cachenum++) { + for (cachenum = 0; cachenum<16 /* guard */; cachenum++) { eax = 0x8000001d; ecx = cachenum; cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); @@ -325,7 +332,7 @@ static void read_amd_caches_topoext(struct procinfo *infos, struct cpuiddump *sr cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache)); if (cache) { - for (cachenum = 0; ; cachenum++) { + for (cachenum = 0; cachenum<16 /* guard */; cachenum++) { unsigned long linesize, linepart, ways, sets; eax = 0x8000001d; ecx = cachenum; @@ -378,7 +385,7 @@ static void read_intel_caches(struct hwloc_x86_backend_data_s *data, struct proc unsigned cachenum; struct cacheinfo *cache; - for (cachenum = 0; ; cachenum++) { + for (cachenum = 0; cachenum<16 /* guard */; cachenum++) { eax = 0x04; ecx = cachenum; cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); @@ -400,7 +407,7 @@ static void read_intel_caches(struct hwloc_x86_backend_data_s *data, struct proc infos->cache = tmpcaches; cache = &infos->cache[oldnumcaches]; - for (cachenum = 0; ; cachenum++) { + for (cachenum = 0; cachenum<16 /* guard */; cachenum++) { unsigned long linesize, linepart, ways, sets; eax = 0x04; ecx = cachenum; @@ -480,7 +487,7 @@ static void read_amd_cores_legacy(struct procinfo *infos, struct cpuiddump *src_ } /* AMD unit/node from CPUID 0x8000001e leaf (topoext) */ -static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags, struct cpuiddump *src_cpuiddump) +static void read_amd_cores_topoext(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned long flags, struct cpuiddump *src_cpuiddump) { unsigned apic_id, nodes_per_proc = 0; unsigned eax, ebx, ecx, edx; @@ -510,6 +517,7 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags, unsigned cores_per_unit; /* coreid was obtained from read_amd_cores_legacy() earlier */ infos->ids[UNIT] = ebx & 0xff; + data->found_unit_ids = 1; cores_per_unit = ((ebx >> 8) & 0xff) + 1; hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, infos->ids[NODE], cores_per_unit, infos->ids[UNIT]); /* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor). @@ -524,20 +532,35 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags, } } -/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration) */ -static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump) +/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration) + * or AMD complex/ccd from CPUID 0x80000026 (extended CPU topology) + */ +static void read_extended_topo(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned leaf, enum cpuid_type cpuid_type, struct cpuiddump *src_cpuiddump) { - unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id; + unsigned level, apic_nextshift, apic_type, apic_id = 0, apic_shift = 0, id; unsigned threadid __hwloc_attribute_unused = 0; /* shut-up compiler */ unsigned eax, ebx, ecx = 0, edx; int apic_packageshift = 0; - for (level = 0; ; level++) { + for (level = 0; level<32 /* guard */; level++) { ecx = level; eax = leaf; cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - if (!eax && !ebx) - break; + /* Intel specifies that the 0x0b/0x1f loop should stop when we get "invalid domain" (0 in ecx[8:15]) + * (if so, we also get 0 in eax/ebx for invalid subleaves). + * However AMD rather says that the 0x80000026/0x0b loop should stop when we get "no thread at this level" (0 in ebx[0:15]). + * Zhaoxin follows the Intel specs but also returns "no thread at this level" for the last *valid* level (at least on KH-4000). + * From the Linux kernel code, it's very likely that AMD also returns "invalid domain" + * (because detect_extended_topology() uses that for all x86 CPUs) + * but keep with the official doc until AMD can clarify that (see #593). + */ + if (cpuid_type == amd) { + if (!(ebx & 0xffff)) + break; + } else { + if (!(ecx & 0xff00)) + break; + } apic_packageshift = eax & 0x1f; } @@ -545,47 +568,73 @@ static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf, infos->otherids = malloc(level * sizeof(*infos->otherids)); if (infos->otherids) { infos->levels = level; - for (level = 0; ; level++) { + for (level = 0; level<32 /* guard */; level++) { ecx = level; eax = leaf; cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - if (!eax && !ebx) - break; + if (cpuid_type == amd) { + if (!(ebx & 0xffff)) + break; + } else { + if (!(ecx & 0xff00)) + break; + } apic_nextshift = eax & 0x1f; - apic_number = ebx & 0xffff; apic_type = (ecx & 0xff00) >> 8; apic_id = edx; id = (apic_id >> apic_shift) & ((1 << (apic_packageshift - apic_shift)) - 1); - hwloc_debug("x2APIC %08x %u: nextshift %u num %2u type %u id %2u\n", apic_id, level, apic_nextshift, apic_number, apic_type, id); + hwloc_debug("x2APIC %08x %u: nextshift %u nextnumber %2u type %u id %2u\n", + apic_id, + level, + apic_nextshift, + ebx & 0xffff /* number of threads in next level */, + apic_type, + id); infos->apicid = apic_id; infos->otherids[level] = UINT_MAX; - switch (apic_type) { - case 1: - threadid = id; - /* apic_number is the actual number of threads per core */ - break; - case 2: - infos->ids[CORE] = id; - /* apic_number is the actual number of threads per die */ - break; - case 3: - infos->ids[MODULE] = id; - /* apic_number is the actual number of threads per tile */ - break; - case 4: - infos->ids[TILE] = id; - /* apic_number is the actual number of threads per die */ - break; - case 5: - infos->ids[DIE] = id; - /* apic_number is the actual number of threads per package */ - break; - default: - hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type); - infos->otherids[level] = apic_id >> apic_shift; - break; - } - apic_shift = apic_nextshift; + switch (apic_type) { + case 1: + threadid = id; + break; + case 2: + infos->ids[CORE] = id; + break; + case 3: + if (leaf == 0x80000026) { + data->found_complex_ids = 1; + infos->ids[COMPLEX] = id; + } else { + data->found_module_ids = 1; + infos->ids[MODULE] = id; + } + break; + case 4: + if (leaf == 0x80000026) { + data->found_die_ids = 1; + infos->ids[DIE] = id; + } else { + data->found_tile_ids = 1; + infos->ids[TILE] = id; + } + break; + case 5: + if (leaf == 0x80000026) { + goto unknown_type; + } else { + data->found_die_ids = 1; + infos->ids[DIE] = id; + } + break; + case 6: + /* TODO: "DieGrp" on Intel */ + /* fallthrough */ + default: + unknown_type: + hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type); + infos->otherids[level] = apic_id >> apic_shift; + break; + } + apic_shift = apic_nextshift; } infos->apicid = apic_id; infos->ids[PKG] = apic_id >> apic_shift; @@ -704,12 +753,13 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns } if (highest_cpuid >= 0x1a && has_hybrid(features)) { - /* Get hybrid cpu information from cpuid 0x1a */ + /* Get hybrid cpu information from cpuid 0x1a on Intel */ eax = 0x1a; ecx = 0; cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); infos->hybridcoretype = eax >> 24; infos->hybridnativemodel = eax & 0xffffff; + data->is_hybrid = 1; } /********************************************************************************* @@ -731,23 +781,30 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns * * Only needed when x2apic supported if NUMA nodes are needed. */ - read_amd_cores_topoext(infos, flags, src_cpuiddump); + read_amd_cores_topoext(data, infos, flags, src_cpuiddump); } - if ((cpuid_type == intel) && highest_cpuid >= 0x1f) { + if ((cpuid_type == amd) && highest_ext_cpuid >= 0x80000026) { + /* Get socket/die/complex/core/thread information from cpuid 0x80000026 + * (AMD Extended CPU Topology) + */ + read_extended_topo(data, infos, 0x80000026, cpuid_type, src_cpuiddump); + + } else if ((cpuid_type == intel || cpuid_type == zhaoxin) && highest_cpuid >= 0x1f) { /* Get package/die/module/tile/core/thread information from cpuid 0x1f * (Intel v2 Extended Topology Enumeration) */ - read_intel_cores_exttopoenum(infos, 0x1f, src_cpuiddump); + read_extended_topo(data, infos, 0x1f, cpuid_type, src_cpuiddump); } else if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == zhaoxin) && highest_cpuid >= 0x0b && has_x2apic(features)) { /* Get package/core/thread information from cpuid 0x0b * (Intel v1 Extended Topology Enumeration) */ - read_intel_cores_exttopoenum(infos, 0x0b, src_cpuiddump); + read_extended_topo(data, infos, 0x0b, cpuid_type, src_cpuiddump); } + if (backend->topology->want_some_cpu_caches) { /************************************** * Get caches from CPU-specific leaves */ @@ -845,6 +902,7 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns } } } + } if (hwloc_bitmap_isset(data->apicid_set, infos->apicid)) data->apicid_unique = 0; @@ -1046,21 +1104,34 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) { if (fulldiscovery) { - /* Look for AMD Compute units inside packages */ - hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); - hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, - UNIT, "Compute Unit", - HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0); - /* Look for Intel Modules inside packages */ - hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); - hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, - MODULE, "Module", - HWLOC_GROUP_KIND_INTEL_MODULE, 0); - /* Look for Intel Tiles inside packages */ - hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); - hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, - TILE, "Tile", - HWLOC_GROUP_KIND_INTEL_TILE, 0); + if (data->found_unit_ids) { + /* Look for AMD Complex inside packages */ + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, + COMPLEX, "Complex", + HWLOC_GROUP_KIND_AMD_COMPLEX, 0); + } + if (data->found_unit_ids) { + /* Look for AMD Compute units inside packages */ + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, + UNIT, "Compute Unit", + HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0); + } + if (data->found_module_ids) { + /* Look for Intel Modules inside packages */ + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, + MODULE, "Module", + HWLOC_GROUP_KIND_INTEL_MODULE, 0); + } + if (data->found_tile_ids) { + /* Look for Intel Tiles inside packages */ + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, + TILE, "Tile", + HWLOC_GROUP_KIND_INTEL_TILE, 0); + } /* Look for unknown objects */ if (infos[one].otherids) { @@ -1094,7 +1165,8 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns } } - if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) { + if (data->found_die_ids + && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) { /* Look for Intel Dies inside packages */ if (fulldiscovery) { hwloc_bitmap_t die_cpuset; @@ -1349,40 +1421,45 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long if (data->apicid_unique) { summarize(backend, infos, flags); - if (has_hybrid(features) && !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) { + if (data->is_hybrid + && !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) { /* use hybrid info for cpukinds */ - hwloc_bitmap_t atomset = hwloc_bitmap_alloc(); - hwloc_bitmap_t coreset = hwloc_bitmap_alloc(); - for(i=0; iapicid_unique, do nothing and return success, so that the caller does nothing either */ - return 0; } @@ -1459,7 +1536,15 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) unsigned i; unsigned highest_cpuid; unsigned highest_ext_cpuid; - /* This stores cpuid features with the same indexing as Linux */ + /* This stores cpuid features with the same indexing as Linux: + * [0] = 0x1 edx + * [1] = 0x80000001 edx + * [4] = 0x1 ecx + * [6] = 0x80000001 ecx + * [9] = 0x7/0 ebx + * [16] = 0x7/0 ecx + * [18] = 0x7/0 edx + */ unsigned features[19] = { 0 }; struct procinfo *infos = NULL; enum cpuid_type cpuid_type = unknown; @@ -1579,6 +1664,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) ecx = 0; cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); features[9] = ebx; + features[16] = ecx; features[18] = edx; } @@ -1730,17 +1816,17 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s sprintf(path, "%s/hwloc-cpuid-info", src_cpuiddump_path); file = fopen(path, "r"); if (!file) { - fprintf(stderr, "Couldn't open dumped cpuid summary %s\n", path); + fprintf(stderr, "hwloc/x86: Couldn't open dumped cpuid summary %s\n", path); goto out_with_path; } if (!fgets(line, sizeof(line), file)) { - fprintf(stderr, "Found read dumped cpuid summary in %s\n", path); + fprintf(stderr, "hwloc/x86: Found read dumped cpuid summary in %s\n", path); fclose(file); goto out_with_path; } fclose(file); if (strcmp(line, "Architecture: x86\n")) { - fprintf(stderr, "Found non-x86 dumped cpuid summary in %s: %s\n", path, line); + fprintf(stderr, "hwloc/x86: Found non-x86 dumped cpuid summary in %s: %s\n", path, line); goto out_with_path; } free(path); @@ -1752,19 +1838,19 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s if (!*end) hwloc_bitmap_set(set, idx); else - fprintf(stderr, "Ignoring invalid dirent `%s' in dumped cpuid directory `%s'\n", + fprintf(stderr, "hwloc/x86: Ignoring invalid dirent `%s' in dumped cpuid directory `%s'\n", dirent->d_name, src_cpuiddump_path); } } closedir(dir); if (hwloc_bitmap_iszero(set)) { - fprintf(stderr, "Did not find any valid pu%%u entry in dumped cpuid directory `%s'\n", + fprintf(stderr, "hwloc/x86: Did not find any valid pu%%u entry in dumped cpuid directory `%s'\n", src_cpuiddump_path); return -1; } else if (hwloc_bitmap_last(set) != hwloc_bitmap_weight(set) - 1) { /* The x86 backends enforces contigous set of PUs starting at 0 so far */ - fprintf(stderr, "Found non-contigous pu%%u range in dumped cpuid directory `%s'\n", + fprintf(stderr, "hwloc/x86: Found non-contigous pu%%u range in dumped cpuid directory `%s'\n", src_cpuiddump_path); return -1; } @@ -1816,9 +1902,15 @@ hwloc_x86_component_instantiate(struct hwloc_topology *topology, /* default values */ data->is_knl = 0; + data->is_hybrid = 0; data->apicid_set = hwloc_bitmap_alloc(); data->apicid_unique = 1; data->src_cpuiddump_path = NULL; + data->found_die_ids = 0; + data->found_complex_ids = 0; + data->found_unit_ids = 0; + data->found_module_ids = 0; + data->found_tile_ids = 0; src_cpuiddump_path = getenv("HWLOC_CPUID_PATH"); if (src_cpuiddump_path) { @@ -1829,7 +1921,7 @@ hwloc_x86_component_instantiate(struct hwloc_topology *topology, assert(!hwloc_bitmap_iszero(set)); /* enforced by hwloc_x86_check_cpuiddump_input() */ data->nbprocs = hwloc_bitmap_weight(set); } else { - fprintf(stderr, "Ignoring dumped cpuid directory.\n"); + fprintf(stderr, "hwloc/x86: Ignoring dumped cpuid directory.\n"); } hwloc_bitmap_free(set); } diff --git a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c index c0691962..8ea5e385 100644 --- a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c +++ b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c @@ -411,12 +411,12 @@ hwloc_nolibxml_backend_init(struct hwloc_xml_backend_data_s *bdata, bdata->data = nbdata; if (xmlbuffer) { - nbdata->buffer = malloc(xmlbuflen+1); + nbdata->buffer = malloc(xmlbuflen); if (!nbdata->buffer) goto out_with_nbdata; - nbdata->buflen = xmlbuflen+1; + nbdata->buflen = xmlbuflen; memcpy(nbdata->buffer, xmlbuffer, xmlbuflen); - nbdata->buffer[xmlbuflen] = '\0'; + nbdata->buffer[xmlbuflen-1] = '\0'; /* make sure it's there as requested in the API */ } else { int err = hwloc_nolibxml_read_file(xmlpath, &nbdata->buffer, &nbdata->buflen); @@ -453,8 +453,9 @@ hwloc_nolibxml_import_diff(struct hwloc__xml_import_state_s *state, buffer = malloc(xmlbuflen); if (!buffer) goto out; - memcpy(buffer, xmlbuffer, xmlbuflen); buflen = xmlbuflen; + memcpy(buffer, xmlbuffer, xmlbuflen); + buffer[xmlbuflen-1] = '\0'; /* make sure it's there as requested in the API */ } else { ret = hwloc_nolibxml_read_file(xmlpath, &buffer, &buflen); diff --git a/src/3rdparty/hwloc/src/topology-xml.c b/src/3rdparty/hwloc/src/topology-xml.c index b1f20dbf..70006f63 100644 --- a/src/3rdparty/hwloc/src/topology-xml.c +++ b/src/3rdparty/hwloc/src/topology-xml.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2022 Inria. All rights reserved. + * Copyright © 2009-2023 Inria. All rights reserved. * Copyright © 2009-2011, 2020 Université Bordeaux * Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -562,7 +562,13 @@ hwloc__xml_import_pagetype(hwloc_topology_t topology __hwloc_attribute_unused, s char *attrname, *attrvalue; if (state->global->next_attr(state, &attrname, &attrvalue) < 0) break; - if (!strcmp(attrname, "size")) + if (!strcmp(attrname, "info")) { + char *infoname, *infovalue; + int ret = hwloc___xml_import_info(&infoname, &infovalue, state); + if (ret < 0) + return -1; + /* ignored */ + } else if (!strcmp(attrname, "size")) size = strtoull(attrvalue, NULL, 10); else if (!strcmp(attrname, "count")) count = strtoull(attrvalue, NULL, 10); @@ -1160,6 +1166,48 @@ hwloc__xml_import_object(hwloc_topology_t topology, data->last_numanode = obj; } + /* 3.0 forward compatibility */ + if (data->version_major >= 3 && obj->type == HWLOC_OBJ_OS_DEVICE) { + /* osdev.type changed into bitmak in 3.0 */ + if (obj->attr->osdev.type & 3 /* STORAGE|MEMORY for BLOCK */) { + obj->attr->osdev.type = HWLOC_OBJ_OSDEV_BLOCK; + } else if (obj->attr->osdev.type & 8 /* COPROC for COPROC and rsmi/nvml GPUs */) { + if (obj->subtype && (!strcmp(obj->subtype, "RSMI") || !strcmp(obj->subtype, "NVML"))) + obj->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU; + else + obj->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC; + } else if (obj->attr->osdev.type & 4 /* GPU for non-COPROC GPUs */) { + obj->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU; + } else if (obj->attr->osdev.type & 32 /* OFED */) { + obj->attr->osdev.type = HWLOC_OBJ_OSDEV_OPENFABRICS; + } else if (obj->attr->osdev.type & 16 /* NET for NET and BXI v2-fake-OFED */) { + if (obj->subtype && !strcmp(obj->subtype, "BXI")) + obj->attr->osdev.type = HWLOC_OBJ_OSDEV_OPENFABRICS; + else + obj->attr->osdev.type = HWLOC_OBJ_OSDEV_NETWORK; + } else if (obj->attr->osdev.type & 64 /* DMA */) { + obj->attr->osdev.type = HWLOC_OBJ_OSDEV_DMA; + } else { /* none or unknown */ + obj->attr->osdev.type = (hwloc_obj_osdev_type_t) -1; + } + /* Backend info only in root */ + if (obj->subtype && !hwloc_obj_get_info_by_name(obj, "Backend")) { + if (!strcmp(obj->subtype, "CUDA")) { + hwloc_obj_add_info(obj, "Backend", "CUDA"); + } else if (!strcmp(obj->subtype, "NVML")) { + hwloc_obj_add_info(obj, "Backend", "NVML"); + } else if (!strcmp(obj->subtype, "OpenCL")) { + hwloc_obj_add_info(obj, "Backend", "OpenCL"); + } else if (!strcmp(obj->subtype, "RSMI")) { + hwloc_obj_add_info(obj, "Backend", "RSMI"); + } else if (!strcmp(obj->subtype, "LevelZero")) { + hwloc_obj_add_info(obj, "Backend", "LevelZero"); + } else if (!strcmp(obj->subtype, "Display")) { + hwloc_obj_add_info(obj, "Backend", "GL"); + } + } + } + if (!hwloc_filter_check_keep_object(topology, obj)) { /* Ignore this object instead of inserting it. * @@ -1433,7 +1481,14 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, if (ret <= 0) break; - if (!strcmp(tag, "indexes")) + if (!strcmp(tag, "info")) { + char *infoname, *infovalue; + ret = hwloc___xml_import_info(&infoname, &infovalue, state); + if (ret < 0) + goto out_with_arrays; + /* ignored */ + continue; + } else if (!strcmp(tag, "indexes")) is_index = 1; else if (!strcmp(tag, "u64values")) is_u64values = 1; @@ -1766,6 +1821,10 @@ hwloc__xml_import_memattr(hwloc_topology_t topology, if (!strcmp(tag, "memattr_value")) { ret = hwloc__xml_import_memattr_value(topology, id, flags, &childstate); + } else if (!strcmp(tag, "info")) { + char *infoname, *infovalue; + ret = hwloc___xml_import_info(&infoname, &infovalue, &childstate); + /* ignored */ } else { if (hwloc__xml_verbose()) fprintf(stderr, "%s: memattr with unrecognized child %s\n", @@ -2094,9 +2153,10 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) if (ret < 0) goto failed; - if (data->version_major > 2) { + if (data->version_major > 3 + || (data->version_major == 3 && data->version_minor > 0)) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: cannot import XML version %u.%u > 2\n", + fprintf(stderr, "%s: cannot import XML version %u.%u > 3.0\n", data->msgprefix, data->version_major, data->version_minor); goto err; } @@ -2144,6 +2204,13 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) ret = hwloc__xml_import_cpukind(topology, &childstate); if (ret < 0) goto failed; + } else if (!strcmp(tag, "info")) { + char *infoname, *infovalue; + ret = hwloc___xml_import_info(&infoname, &infovalue, &childstate); + if (ret < 0) + goto failed; + /* move 3.x topology info back to the root object */ + hwloc_obj_add_info(topology->levels[0][0], infoname, infovalue); } else { if (hwloc__xml_verbose()) fprintf(stderr, "%s: ignoring unknown tag `%s' after root object.\n", diff --git a/src/3rdparty/hwloc/src/topology.c b/src/3rdparty/hwloc/src/topology.c index 47b4658c..9dc2b07c 100644 --- a/src/3rdparty/hwloc/src/topology.c +++ b/src/3rdparty/hwloc/src/topology.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2022 Inria. All rights reserved. + * Copyright © 2009-2023 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2022 IBM Corporation. All rights reserved. @@ -146,21 +146,24 @@ report_insert_error_format_obj(char *buf, size_t buflen, hwloc_obj_t obj) char typestr[64]; char *cpusetstr; char *nodesetstr = NULL; + char indexstr[64] = ""; + char groupstr[64] = ""; hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0); hwloc_bitmap_asprintf(&cpusetstr, obj->cpuset); + if (obj->os_index != HWLOC_UNKNOWN_INDEX) + snprintf(indexstr, sizeof(indexstr), "P#%u ", obj->os_index); + if (obj->type == HWLOC_OBJ_GROUP) + snprintf(groupstr, sizeof(groupstr), "groupkind %u-%u ", obj->attr->group.kind, obj->attr->group.subkind); if (obj->nodeset) /* may be missing during insert */ hwloc_bitmap_asprintf(&nodesetstr, obj->nodeset); - if (obj->os_index != HWLOC_UNKNOWN_INDEX) - snprintf(buf, buflen, "%s (P#%u cpuset %s%s%s)", - typestr, obj->os_index, cpusetstr, - nodesetstr ? " nodeset " : "", - nodesetstr ? nodesetstr : ""); - else - snprintf(buf, buflen, "%s (cpuset %s%s%s)", - typestr, cpusetstr, - nodesetstr ? " nodeset " : "", - nodesetstr ? nodesetstr : ""); + snprintf(buf, buflen, "%s (%s%s%s%s%scpuset %s%s%s)", + typestr, + indexstr, + obj->subtype ? "subtype " : "", obj->subtype ? obj->subtype : "", obj->subtype ? " " : "", + groupstr, + cpusetstr, + nodesetstr ? " nodeset " : "", nodesetstr ? nodesetstr : ""); free(cpusetstr); free(nodesetstr); } @@ -178,8 +181,9 @@ static void report_insert_error(hwloc_obj_t new, hwloc_obj_t old, const char *ms fprintf(stderr, "****************************************************************************\n"); fprintf(stderr, "* hwloc %s received invalid information from the operating system.\n", HWLOC_VERSION); fprintf(stderr, "*\n"); - fprintf(stderr, "* Failed with: %s\n", msg); - fprintf(stderr, "* while inserting %s at %s\n", newstr, oldstr); + fprintf(stderr, "* Failed with error: %s\n", msg); + fprintf(stderr, "* while inserting %s\n", newstr); + fprintf(stderr, "* at %s\n", oldstr); fprintf(stderr, "* coming from: %s\n", reason); fprintf(stderr, "*\n"); fprintf(stderr, "* The following FAQ entry in the hwloc documentation may help:\n"); @@ -679,7 +683,8 @@ unlink_and_free_object_and_children(hwloc_obj_t *pobj) void hwloc_free_object_and_children(hwloc_obj_t obj) { - unlink_and_free_object_and_children(&obj); + if (obj) + unlink_and_free_object_and_children(&obj); } /* Free an object, its next siblings and their children without unlinking from parent. @@ -1925,6 +1930,22 @@ hwloc_topology_alloc_group_object(struct hwloc_topology *topology) return hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX); } +int +hwloc_topology_free_group_object(struct hwloc_topology *topology, hwloc_obj_t obj) +{ + if (!topology->is_loaded) { + /* this could actually work, see insert() below */ + errno = EINVAL; + return -1; + } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return -1; + } + hwloc_free_unlinked_object(obj); + return 0; +} + static void hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root); static void propagate_total_memory(hwloc_obj_t obj); static void hwloc_set_group_depth(hwloc_topology_t topology); @@ -1935,7 +1956,7 @@ static int hwloc_connect_special_levels(hwloc_topology_t topology); hwloc_obj_t hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t obj) { - hwloc_obj_t res, root; + hwloc_obj_t res, root, child; int cmp; if (!topology->is_loaded) { @@ -1945,6 +1966,7 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t return NULL; } if (topology->adopted_shmem_addr) { + hwloc_free_unlinked_object(obj); errno = EPERM; return NULL; } @@ -1998,6 +2020,7 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t res = hwloc__insert_object_by_cpuset(topology, NULL, obj, NULL /* do not show errors on stdout */); } else { /* just merge root */ + hwloc_free_unlinked_object(obj); res = root; } @@ -2024,6 +2047,13 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t if (hwloc_topology_reconnect(topology, 0) < 0) return NULL; + /* Compute group total_memory. */ + res->total_memory = 0; + for_each_child(child, res) + res->total_memory += child->total_memory; + for_each_memory_child(child, res) + res->total_memory += child->total_memory; + hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]); hwloc_set_group_depth(topology); @@ -2254,11 +2284,13 @@ fixup_sets(hwloc_obj_t obj) int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src) { -#define ADD_OTHER_OBJ_SET(_dst, _src, _set) \ - if ((_src)->_set) { \ - if (!(_dst)->_set) \ - (_dst)->_set = hwloc_bitmap_alloc(); \ - hwloc_bitmap_or((_dst)->_set, (_dst)->_set, (_src)->_set); \ +#define ADD_OTHER_OBJ_SET(_dst, _src, _set) \ + if ((_src)->_set) { \ + if (!(_dst)->_set) \ + (_dst)->_set = hwloc_bitmap_alloc(); \ + if (!(_dst)->_set \ + || hwloc_bitmap_or((_dst)->_set, (_dst)->_set, (_src)->_set) < 0) \ + return -1; \ } ADD_OTHER_OBJ_SET(dst, src, cpuset); ADD_OTHER_OBJ_SET(dst, src, complete_cpuset); @@ -3730,6 +3762,7 @@ hwloc__topology_init (struct hwloc_topology **topologyp, hwloc__topology_filter_init(topology); + /* always initialize since we don't know flags to disable those yet */ hwloc_internal_distances_init(topology); hwloc_internal_memattrs_init(topology); hwloc_internal_cpukinds_init(topology); @@ -3942,8 +3975,12 @@ int hwloc_topology_set_cache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter) { unsigned i; - for(i=HWLOC_OBJ_L1CACHE; iis_loaded) { + errno = EBUSY; + return -1; + } + for(i=HWLOC_OBJ_L1CACHE; i<=HWLOC_OBJ_L3ICACHE; i++) + hwloc__topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter); return 0; } @@ -3951,17 +3988,25 @@ int hwloc_topology_set_icache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter) { unsigned i; - for(i=HWLOC_OBJ_L1ICACHE; iis_loaded) { + errno = EBUSY; + return -1; + } + for(i=HWLOC_OBJ_L1ICACHE; i<=HWLOC_OBJ_L3ICACHE; i++) + hwloc__topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter); return 0; } int hwloc_topology_set_io_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter) { - hwloc_topology_set_type_filter(topology, HWLOC_OBJ_BRIDGE, filter); - hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, filter); - hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, filter); + if (topology->is_loaded) { + errno = EBUSY; + return -1; + } + hwloc__topology_set_type_filter(topology, HWLOC_OBJ_BRIDGE, filter); + hwloc__topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, filter); + hwloc__topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, filter); return 0; } @@ -3982,9 +4027,12 @@ hwloc_topology_clear (struct hwloc_topology *topology) { /* no need to set to NULL after free() since callers will call setup_defaults() or just destroy the rest of the topology */ unsigned l; + + /* always destroy cpukinds/distances/memattrs since there are always initialized during init() */ hwloc_internal_cpukinds_destroy(topology); hwloc_internal_distances_destroy(topology); hwloc_internal_memattrs_destroy(topology); + hwloc_free_object_and_children(topology->levels[0][0]); hwloc_bitmap_free(topology->allowed_cpuset); hwloc_bitmap_free(topology->allowed_nodeset); @@ -4024,6 +4072,7 @@ hwloc_topology_load (struct hwloc_topology *topology) { struct hwloc_disc_status dstatus; const char *env; + unsigned i; int err; if (topology->is_loaded) { @@ -4032,8 +4081,18 @@ hwloc_topology_load (struct hwloc_topology *topology) } /* initialize envvar-related things */ - hwloc_internal_distances_prepare(topology); - hwloc_internal_memattrs_prepare(topology); + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES)) + hwloc_internal_distances_prepare(topology); + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS)) + hwloc_internal_memattrs_prepare(topology); + + /* check if any cpu cache filter is not NONE */ + topology->want_some_cpu_caches = 0; + for(i=HWLOC_OBJ_L1CACHE; i<=HWLOC_OBJ_L3ICACHE; i++) + if (topology->type_filter[i] != HWLOC_TYPE_FILTER_KEEP_NONE) { + topology->want_some_cpu_caches = 1; + break; + } if (getenv("HWLOC_XML_USERDATA_NOT_DECODED")) topology->userdata_not_decoded = 1; @@ -4110,23 +4169,32 @@ hwloc_topology_load (struct hwloc_topology *topology) #endif hwloc_topology_check(topology); - /* Rank cpukinds */ - hwloc_internal_cpukinds_rank(topology); + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) { + /* Rank cpukinds */ + hwloc_internal_cpukinds_rank(topology); + } - /* Mark distances objs arrays as invalid since we may have removed objects - * from the topology after adding the distances (remove_empty, etc). - * It would be hard to actually verify whether it's needed. - */ - hwloc_internal_distances_invalidate_cached_objs(topology); - /* And refresh distances so that multithreaded concurrent distances_get() - * don't refresh() concurrently (disallowed). - */ - hwloc_internal_distances_refresh(topology); + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES)) { + /* Mark distances objs arrays as invalid since we may have removed objects + * from the topology after adding the distances (remove_empty, etc). + * It would be hard to actually verify whether it's needed. + */ + hwloc_internal_distances_invalidate_cached_objs(topology); + /* And refresh distances so that multithreaded concurrent distances_get() + * don't refresh() concurrently (disallowed). + */ + hwloc_internal_distances_refresh(topology); + } - /* Same for memattrs */ - hwloc_internal_memattrs_need_refresh(topology); - hwloc_internal_memattrs_refresh(topology); - hwloc_internal_memattrs_guess_memory_tiers(topology); + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS)) { + int force_memtiers = (getenv("HWLOC_MEMTIERS_REFRESH") != NULL); + /* Same for memattrs */ + hwloc_internal_memattrs_need_refresh(topology); + hwloc_internal_memattrs_refresh(topology); + /* update memtiers unless XML */ + if (force_memtiers || strcmp(topology->backends->component->name, "xml")) + hwloc_internal_memattrs_guess_memory_tiers(topology, force_memtiers); + } topology->is_loaded = 1; @@ -4185,20 +4253,11 @@ restrict_object_by_cpuset(hwloc_topology_t topology, unsigned long flags, hwloc_ hwloc_bitmap_andnot(obj->cpuset, obj->cpuset, droppedcpuset); hwloc_bitmap_andnot(obj->complete_cpuset, obj->complete_cpuset, droppedcpuset); modified = 1; - } else { - if ((flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) - && hwloc_bitmap_iszero(obj->complete_cpuset)) { - /* we're empty, there's a NUMAnode below us, it'll be removed this time */ - modified = 1; - } - /* nodeset cannot intersect unless cpuset intersects or is empty */ - if (droppednodeset) - assert(!hwloc_bitmap_intersects(obj->complete_nodeset, droppednodeset) - || hwloc_bitmap_iszero(obj->complete_cpuset)); } - if (droppednodeset) { + if (droppednodeset && hwloc_bitmap_intersects(obj->complete_nodeset, droppednodeset)) { hwloc_bitmap_andnot(obj->nodeset, obj->nodeset, droppednodeset); hwloc_bitmap_andnot(obj->complete_nodeset, obj->complete_nodeset, droppednodeset); + modified = 1; } if (modified) { @@ -4251,20 +4310,11 @@ restrict_object_by_nodeset(hwloc_topology_t topology, unsigned long flags, hwloc hwloc_bitmap_andnot(obj->nodeset, obj->nodeset, droppednodeset); hwloc_bitmap_andnot(obj->complete_nodeset, obj->complete_nodeset, droppednodeset); modified = 1; - } else { - if ((flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS) - && hwloc_bitmap_iszero(obj->complete_nodeset)) { - /* we're empty, there's a PU below us, it'll be removed this time */ - modified = 1; - } - /* cpuset cannot intersect unless nodeset intersects or is empty */ - if (droppedcpuset) - assert(!hwloc_bitmap_intersects(obj->complete_cpuset, droppedcpuset) - || hwloc_bitmap_iszero(obj->complete_nodeset)); } - if (droppedcpuset) { + if (droppedcpuset && hwloc_bitmap_intersects(obj->complete_cpuset, droppedcpuset)) { hwloc_bitmap_andnot(obj->cpuset, obj->cpuset, droppedcpuset); hwloc_bitmap_andnot(obj->complete_cpuset, obj->complete_cpuset, droppedcpuset); + modified = 1; } if (modified) { @@ -4433,13 +4483,18 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_bitmap_t se if (hwloc_filter_levels_keep_structure(topology) < 0) /* takes care of reconnecting internally */ goto out; - /* some objects may have disappeared, we need to update distances objs arrays */ - hwloc_internal_distances_invalidate_cached_objs(topology); - hwloc_internal_memattrs_need_refresh(topology); + /* some objects may have disappeared and sets were modified, + * we need to update distances, etc */ + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES)) + hwloc_internal_distances_invalidate_cached_objs(topology); + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS)) + hwloc_internal_memattrs_need_refresh(topology); + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) + hwloc_internal_cpukinds_restrict(topology); + hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]); propagate_total_memory(topology->levels[0][0]); - hwloc_internal_cpukinds_restrict(topology); #ifndef HWLOC_DEBUG if (getenv("HWLOC_DEBUG_CHECK")) @@ -4527,9 +4582,12 @@ hwloc_topology_allow(struct hwloc_topology *topology, int hwloc_topology_refresh(struct hwloc_topology *topology) { - hwloc_internal_cpukinds_rank(topology); - hwloc_internal_distances_refresh(topology); - hwloc_internal_memattrs_refresh(topology); + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) + hwloc_internal_cpukinds_rank(topology); + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES)) + hwloc_internal_distances_refresh(topology); + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS)) + hwloc_internal_memattrs_refresh(topology); return 0; } @@ -5081,6 +5139,9 @@ hwloc_topology_check(struct hwloc_topology *topology) for(i=HWLOC_OBJ_TYPE_MIN; iis_loaded) + return; + depth = hwloc_topology_get_depth(topology); assert(!topology->modified); diff --git a/src/3rdparty/libethash/CMakeLists.txt b/src/3rdparty/libethash/CMakeLists.txt index 7df9ec86..491432a6 100644 --- a/src/3rdparty/libethash/CMakeLists.txt +++ b/src/3rdparty/libethash/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project (ethash C) set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Os") diff --git a/src/crypto/ghostrider/CMakeLists.txt b/src/crypto/ghostrider/CMakeLists.txt index db63cfde..87050a15 100644 --- a/src/crypto/ghostrider/CMakeLists.txt +++ b/src/crypto/ghostrider/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(GhostRider) set(HEADERS From 334753763509c5353ade35ba61732313617663c6 Mon Sep 17 00:00:00 2001 From: xmrig Date: Sat, 23 Mar 2024 00:46:15 +0700 Subject: [PATCH 13/13] Update CHANGELOG.md --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf89deb8..7e93d79d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# v6.21.2 +- The dependencies of all prebuilt releases have been updated. Support for old Ubuntu releases has been dropped. +- [#2800](https://github.com/xmrig/xmrig/issues/2800) Fixed donation with GhostRider algorithm for builds without KawPow algorithm. +- [#3436](https://github.com/xmrig/xmrig/pull/3436) Fixed, the file log writer was not thread-safe. +- [#3450](https://github.com/xmrig/xmrig/pull/3450) Fixed RandomX crash when compiled with fortify_source. + # v6.21.1 - [#3391](https://github.com/xmrig/xmrig/pull/3391) Added support for townforge (monero fork using randomx). - [#3399](https://github.com/xmrig/xmrig/pull/3399) Fixed Zephyr mining (OpenCL).