From b324e34444c5f389340b9c081c57af4d6d50d4b0 Mon Sep 17 00:00:00 2001 From: XMRig Date: Thu, 23 Jun 2022 16:45:54 +0700 Subject: [PATCH] Update hwloc for msvc. --- src/3rdparty/hwloc/NEWS | 14 ++++++++- src/3rdparty/hwloc/VERSION | 6 ++-- src/3rdparty/hwloc/include/hwloc.h | 30 +++++++++++-------- .../hwloc/include/hwloc/autogen/config.h | 4 +-- src/3rdparty/hwloc/include/hwloc/memattrs.h | 8 ++--- src/3rdparty/hwloc/src/topology-x86.c | 28 ++++++++++++----- src/3rdparty/hwloc/src/topology.c | 22 ++++++++++++-- 7 files changed, 80 insertions(+), 32 deletions(-) diff --git a/src/3rdparty/hwloc/NEWS b/src/3rdparty/hwloc/NEWS index 71f858a3..75557e5f 100644 --- a/src/3rdparty/hwloc/NEWS +++ b/src/3rdparty/hwloc/NEWS @@ -1,5 +1,5 @@ Copyright © 2009 CNRS -Copyright © 2009-2021 Inria. All rights reserved. +Copyright © 2009-2022 Inria. All rights reserved. Copyright © 2009-2013 Université Bordeaux Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. Copyright © 2020 Hewlett Packard Enterprise. All rights reserved. @@ -17,6 +17,18 @@ bug fixes (and other actions) for each version of hwloc since version 0.9. +Version 2.7.1 +------------- +* Workaround crashes when virtual machines report incoherent x86 CPUID + information about numbers of cores and threads. + Thanks to Peter Bense for the report. +* Use setenv() instead of putenv() when trying to force enable oneAPI L0 + support, to avoid issues with applications that touch the environment, + thanks to Josh Hursey for the patch. +* Add some warnings at the end of configure when GPU libraries are + missing on the system or their path is missing in the environment. + + Version 2.7.0 ------------- * Backends diff --git a/src/3rdparty/hwloc/VERSION b/src/3rdparty/hwloc/VERSION index 7486ae04..d17fb44e 100644 --- a/src/3rdparty/hwloc/VERSION +++ b/src/3rdparty/hwloc/VERSION @@ -9,7 +9,7 @@ major=2 minor=7 -release=0 +release=1 # greek is used for alpha or beta release tags. If it is non-empty, # it will be appended to the version number. It does not have to be @@ -22,7 +22,7 @@ greek= # The date when this release was created -date="Dec 06, 2021" +date="Mar 20, 2022" # If snapshot=1, then use the value from snapshot_version as the # entire hwloc version (i.e., ignore major, minor, release, and @@ -41,7 +41,7 @@ snapshot_version=${major}.${minor}.${release}${greek}-git # 2. Version numbers are described in the Libtool current:revision:age # format. -libhwloc_so_version=20:2:5 +libhwloc_so_version=20:3:5 libnetloc_so_version=0:0:0 # Please also update the lines in contrib/windows/libhwloc.vcxproj diff --git a/src/3rdparty/hwloc/include/hwloc.h b/src/3rdparty/hwloc/include/hwloc.h index b5f0f48a..35bbcc71 100644 --- a/src/3rdparty/hwloc/include/hwloc.h +++ b/src/3rdparty/hwloc/include/hwloc.h @@ -29,7 +29,7 @@ * THAT IS IN THE PDF/HTML THAT IS ***NOT*** IN hwloc.h! * * There are entire paragraph-length descriptions, discussions, and - * pretty prictures to explain subtle corner cases, provide concrete + * pretty pictures to explain subtle corner cases, provide concrete * examples, etc. * * Please, go read the documentation. :-) @@ -517,7 +517,7 @@ struct hwloc_obj { * objects). * * If the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED configuration flag is set, - * some of these CPUs may not be allowed for binding, + * some of these CPUs may be online but not allowed for binding, * see hwloc_topology_get_allowed_cpuset(). * * \note All objects have non-NULL CPU and node sets except Misc and I/O objects. @@ -549,7 +549,7 @@ struct hwloc_obj { * nodes more precisely. * * If the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED configuration flag is set, - * some of these nodes may not be allowed for allocation, + * some of these nodes may be online but not allowed for allocation, * see hwloc_topology_get_allowed_nodeset(). * * If there are no NUMA nodes in the machine, all the memory is close to this @@ -642,7 +642,7 @@ union hwloc_obj_attr_u { unsigned char revision; float linkspeed; /* in GB/s */ } pcidev; - /** \brief Bridge specific Object Attribues */ + /** \brief Bridge specific Object Attributes */ struct hwloc_bridge_attr_s { union { struct hwloc_pcidev_attr_s pci; @@ -1089,7 +1089,7 @@ HWLOC_DECLSPEC int hwloc_obj_add_info(hwloc_obj_t obj, const char *name, const c * * Some operating systems only support binding threads or processes to a single PU. * Others allow binding to larger sets such as entire Cores or Packages or - * even random sets of invididual PUs. In such operating system, the scheduler + * even random sets of individual PUs. In such operating system, the scheduler * is free to run the task on one of these PU, then migrate it to another PU, etc. * It is often useful to call hwloc_bitmap_singlify() on the target CPU set before * passing it to the binding function to avoid these expensive migrations. @@ -1167,7 +1167,7 @@ typedef enum { * CPUs are idle, operating systems may execute the thread/process * on those other CPUs instead of the designated CPUs, to let them * progress anyway. Strict binding means that the thread/process - * will _never_ execute on other cpus than the designated CPUs, even + * will _never_ execute on other CPUs than the designated CPUs, even * when those are busy with other tasks and other CPUs are idle. * * \note Depending on the operating system, strict binding may not @@ -1204,7 +1204,7 @@ typedef enum { HWLOC_CPUBIND_NOMEMBIND = (1<<3) } hwloc_cpubind_flags_t; -/** \brief Bind current process or thread on cpus given in physical bitmap \p set. +/** \brief Bind current process or thread on CPUs given in physical bitmap \p set. * * \return -1 with errno set to ENOSYS if the action is not supported * \return -1 with errno set to EXDEV if the binding cannot be enforced @@ -1219,7 +1219,7 @@ HWLOC_DECLSPEC int hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_cpus */ HWLOC_DECLSPEC int hwloc_get_cpubind(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); -/** \brief Bind a process \p pid on cpus given in physical bitmap \p set. +/** \brief Bind a process \p pid on CPUs given in physical bitmap \p set. * * \note \p hwloc_pid_t is \p pid_t on Unix platforms, * and \p HANDLE on native Windows platforms. @@ -1250,7 +1250,7 @@ HWLOC_DECLSPEC int hwloc_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t HWLOC_DECLSPEC int hwloc_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags); #ifdef hwloc_thread_t -/** \brief Bind a thread \p thread on cpus given in physical bitmap \p set. +/** \brief Bind a thread \p thread on CPUs given in physical bitmap \p set. * * \note \p hwloc_thread_t is \p pthread_t on Unix platforms, * and \p HANDLE on native Windows platforms. @@ -1914,8 +1914,9 @@ HWLOC_DECLSPEC int hwloc_topology_set_components(hwloc_topology_t __hwloc_restri enum hwloc_topology_flags_e { /** \brief Detect the whole system, ignore reservations, include disallowed objects. * - * Gather all resources, even if some were disabled by the administrator. + * Gather all online resources, even if some were disabled by the administrator. * For instance, ignore Linux Cgroup/Cpusets and gather all processors and memory nodes. + * However offline PUs and NUMA nodes are still ignored. * * When this flag is not set, PUs and NUMA nodes that are disallowed are not added to the topology. * Parent objects (package, core, cache, etc.) are added only if some of their children are allowed. @@ -2066,10 +2067,12 @@ enum hwloc_topology_flags_e { * * Set a OR'ed set of ::hwloc_topology_flags_e onto a topology that was not yet loaded. * - * If this function is called multiple times, the last invokation will erase + * If this function is called multiple times, the last invocation will erase * and replace the set of flags that was previously set. * - * The flags set in a topology may be retrieved with hwloc_topology_get_flags() + * By default, no flags are set (\c 0). + * + * The flags set in a topology may be retrieved with hwloc_topology_get_flags(). */ HWLOC_DECLSPEC int hwloc_topology_set_flags (hwloc_topology_t topology, unsigned long flags); @@ -2077,6 +2080,9 @@ HWLOC_DECLSPEC int hwloc_topology_set_flags (hwloc_topology_t topology, unsigned * * Get the OR'ed set of ::hwloc_topology_flags_e of a topology. * + * If hwloc_topology_set_flags() was not called earlier, + * no flags are set (\c 0 is returned). + * * \return the flags previously set with hwloc_topology_set_flags(). */ HWLOC_DECLSPEC unsigned long hwloc_topology_get_flags (hwloc_topology_t topology); diff --git a/src/3rdparty/hwloc/include/hwloc/autogen/config.h b/src/3rdparty/hwloc/include/hwloc/autogen/config.h index 8d89fa25..562a4811 100644 --- a/src/3rdparty/hwloc/include/hwloc/autogen/config.h +++ b/src/3rdparty/hwloc/include/hwloc/autogen/config.h @@ -11,10 +11,10 @@ #ifndef HWLOC_CONFIG_H #define HWLOC_CONFIG_H -#define HWLOC_VERSION "2.7.0" +#define HWLOC_VERSION "2.7.1" #define HWLOC_VERSION_MAJOR 2 #define HWLOC_VERSION_MINOR 7 -#define HWLOC_VERSION_RELEASE 0 +#define HWLOC_VERSION_RELEASE 1 #define HWLOC_VERSION_GREEK "" #define __hwloc_restrict diff --git a/src/3rdparty/hwloc/include/hwloc/memattrs.h b/src/3rdparty/hwloc/include/hwloc/memattrs.h index 2494abb0..02ffa832 100644 --- a/src/3rdparty/hwloc/include/hwloc/memattrs.h +++ b/src/3rdparty/hwloc/include/hwloc/memattrs.h @@ -1,5 +1,5 @@ /* - * Copyright © 2019-2020 Inria. All rights reserved. + * Copyright © 2019-2022 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -354,7 +354,7 @@ hwloc_memattr_register(hwloc_topology_t topology, * \p flags must be \c 0 for now. * * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET - * when refering to accesses performed by CPU cores. + * when referring to accesses performed by CPU cores. * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, * but users may for instance use it to provide custom information about * host memory accesses performed by GPUs. @@ -398,7 +398,7 @@ hwloc_memattr_set_value(hwloc_topology_t topology, * values. * * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET - * when refering to accesses performed by CPU cores. + * when referring to accesses performed by CPU cores. * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, * but users may for instance use it to provide custom information about * host memory accesses performed by GPUs. @@ -408,7 +408,7 @@ hwloc_memattr_get_targets(hwloc_topology_t topology, hwloc_memattr_id_t attribute, struct hwloc_location *initiator, unsigned long flags, - unsigned *nrp, hwloc_obj_t *targets, hwloc_uint64_t *values); + unsigned *nr, hwloc_obj_t *targets, hwloc_uint64_t *values); /** \brief Return the initiators that have values for a given attribute for a specific target NUMA node. * diff --git a/src/3rdparty/hwloc/src/topology-x86.c b/src/3rdparty/hwloc/src/topology-x86.c index 42172eca..b9bc7fb0 100644 --- a/src/3rdparty/hwloc/src/topology-x86.c +++ b/src/3rdparty/hwloc/src/topology-x86.c @@ -614,10 +614,13 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns eax = 0x01; cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); infos->apicid = ebx >> 24; - if (edx & (1 << 28)) + if (edx & (1 << 28)) { legacy_max_log_proc = 1 << hwloc_flsl(((ebx >> 16) & 0xff) - 1); - else + } else { + hwloc_debug("HTT bit not set in CPUID 0x01.edx, assuming legacy_max_log_proc = 1\n"); legacy_max_log_proc = 1; + } + hwloc_debug("APIC ID 0x%02x legacy_max_log_proc %u\n", infos->apicid, legacy_max_log_proc); infos->ids[PKG] = infos->apicid / legacy_max_log_proc; legacy_log_proc_id = infos->apicid % legacy_max_log_proc; @@ -680,12 +683,23 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns unsigned max_nbcores; unsigned max_nbthreads; unsigned threadid __hwloc_attribute_unused; + hwloc_debug("Trying to get core/thread IDs from 0x04...\n"); max_nbcores = ((eax >> 26) & 0x3f) + 1; - max_nbthreads = legacy_max_log_proc / max_nbcores; - hwloc_debug("thus %u threads\n", max_nbthreads); - threadid = legacy_log_proc_id % max_nbthreads; - infos->ids[CORE] = legacy_log_proc_id / max_nbthreads; - hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]); + hwloc_debug("found %u cores max\n", max_nbcores); + /* some VMs (e.g. issue#525) don't report valid information, check things before dividing by 0. */ + if (!max_nbcores) { + hwloc_debug("cannot detect core/thread IDs from 0x04 without a valid max of cores\n"); + } else { + max_nbthreads = legacy_max_log_proc / max_nbcores; + hwloc_debug("found %u threads max\n", max_nbthreads); + if (!max_nbthreads) { + hwloc_debug("cannot detect core/thread IDs from 0x04 without a valid max of threads\n"); + } else { + threadid = legacy_log_proc_id % max_nbthreads; + infos->ids[CORE] = legacy_log_proc_id / max_nbthreads; + hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]); + } + } } } diff --git a/src/3rdparty/hwloc/src/topology.c b/src/3rdparty/hwloc/src/topology.c index c0f39c77..54b772ff 100644 --- a/src/3rdparty/hwloc/src/topology.c +++ b/src/3rdparty/hwloc/src/topology.c @@ -3,6 +3,7 @@ * Copyright © 2009-2021 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * Copyright © 2022 IBM Corporation. All rights reserved. * See COPYING in top-level directory. */ @@ -52,29 +53,41 @@ #include #endif + +#ifdef HWLOC_HAVE_LEVELZERO /* * Define ZES_ENABLE_SYSMAN=1 early so that the LevelZero backend gets Sysman enabled. + * + * Only if the levelzero was enabled in this build so that we don't enable sysman + * for external levelzero users when hwloc doesn't need it. If somebody ever loads + * an external levelzero plugin in a hwloc library built without levelzero (unlikely), + * he may have to manually set ZES_ENABLE_SYSMAN=1. + * * Use the constructor if supported and/or the Windows DllMain callback. * Do it in the main hwloc library instead of the levelzero component because * the latter could be loaded later as a plugin. * * L0 seems to be using getenv() to check this variable on Windows * (at least in the Intel Compute-Runtime of March 2021), - * so use putenv() to set the variable. + * but setenv() doesn't seem to exist on Windows, hence use putenv() to set the variable. * * For the record, Get/SetEnvironmentVariable() is not exactly the same as getenv/putenv(): * - getenv() doesn't see what was set with SetEnvironmentVariable() * - GetEnvironmentVariable() doesn't see putenv() in cygwin (while it does in MSVC and MinGW). * Hence, if L0 ever switches from getenv() to GetEnvironmentVariable(), * it will break in cygwin, we'll have to use both putenv() and SetEnvironmentVariable(). - * Hopefully L0 will be provide a way to enable Sysman without env vars before it happens. + * Hopefully L0 will provide a way to enable Sysman without env vars before it happens. */ #if HWLOC_HAVE_ATTRIBUTE_CONSTRUCTOR static void hwloc_constructor(void) __attribute__((constructor)); static void hwloc_constructor(void) { if (!getenv("ZES_ENABLE_SYSMAN")) - putenv((char *) "ZES_ENABLE_SYSMAN=1"); +#ifdef HWLOC_WIN_SYS + putenv("ZES_ENABLE_SYSMAN=1"); +#else + setenv("ZES_ENABLE_SYSMAN", "1", 1); +#endif } #endif #ifdef HWLOC_WIN_SYS @@ -82,11 +95,14 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) { if (fdwReason == DLL_PROCESS_ATTACH) { if (!getenv("ZES_ENABLE_SYSMAN")) + /* Windows does not have a setenv, so use putenv. */ putenv((char *) "ZES_ENABLE_SYSMAN=1"); } return TRUE; } #endif +#endif /* HWLOC_HAVE_LEVELZERO */ + unsigned hwloc_get_api_version(void) {