From ca3695a754579b3d0803e07ce0a50ef1716ba5ca Mon Sep 17 00:00:00 2001 From: XMRig Date: Mon, 14 Dec 2020 02:55:50 +0700 Subject: [PATCH] Update hwloc for MSVC to 2.4.0. --- src/3rdparty/hwloc/CMakeLists.txt | 36 +- src/3rdparty/hwloc/NEWS | 71 + src/3rdparty/hwloc/README | 8 +- src/3rdparty/hwloc/VERSION | 6 +- src/3rdparty/hwloc/include/hwloc.h | 117 +- .../hwloc/include/hwloc/autogen/config.h | 6 +- src/3rdparty/hwloc/include/hwloc/bitmap.h | 4 +- src/3rdparty/hwloc/include/hwloc/cpukinds.h | 188 +++ src/3rdparty/hwloc/include/hwloc/cuda.h | 4 +- src/3rdparty/hwloc/include/hwloc/cudart.h | 4 +- src/3rdparty/hwloc/include/hwloc/diff.h | 6 +- src/3rdparty/hwloc/include/hwloc/distances.h | 5 +- .../hwloc/include/hwloc/glibc-sched.h | 4 +- src/3rdparty/hwloc/include/hwloc/helper.h | 10 +- src/3rdparty/hwloc/include/hwloc/memattrs.h | 455 +++++++ src/3rdparty/hwloc/include/hwloc/nvml.h | 4 +- src/3rdparty/hwloc/include/hwloc/opencl.h | 4 +- .../hwloc/include/hwloc/openfabrics-verbs.h | 4 +- src/3rdparty/hwloc/include/hwloc/plugins.h | 30 +- src/3rdparty/hwloc/include/hwloc/rename.h | 83 +- .../hwloc/include/private/autogen/config.h | 10 +- src/3rdparty/hwloc/include/private/debug.h | 20 +- .../include/private/internal-components.h | 1 + src/3rdparty/hwloc/include/private/private.h | 82 +- src/3rdparty/hwloc/include/private/xml.h | 2 +- src/3rdparty/hwloc/src/bind.c | 3 +- src/3rdparty/hwloc/src/bitmap.c | 4 +- src/3rdparty/hwloc/src/cpukinds.c | 649 +++++++++ src/3rdparty/hwloc/src/diff.c | 83 +- src/3rdparty/hwloc/src/distances.c | 56 +- src/3rdparty/hwloc/src/memattrs.c | 1197 +++++++++++++++++ src/3rdparty/hwloc/src/misc.c | 4 +- src/3rdparty/hwloc/src/pci-common.c | 62 +- src/3rdparty/hwloc/src/shmem.c | 12 +- src/3rdparty/hwloc/src/static-components.h | 7 +- src/3rdparty/hwloc/src/topology-synthetic.c | 8 +- src/3rdparty/hwloc/src/topology-windows.c | 208 ++- src/3rdparty/hwloc/src/topology-x86.c | 80 +- .../hwloc/src/topology-xml-nolibxml.c | 4 +- src/3rdparty/hwloc/src/topology-xml.c | 595 +++++++- src/3rdparty/hwloc/src/topology.c | 234 ++-- src/3rdparty/hwloc/src/traversal.c | 69 +- 42 files changed, 4057 insertions(+), 382 deletions(-) create mode 100644 src/3rdparty/hwloc/include/hwloc/cpukinds.h create mode 100644 src/3rdparty/hwloc/include/hwloc/memattrs.h create mode 100644 src/3rdparty/hwloc/src/cpukinds.c create mode 100644 src/3rdparty/hwloc/src/memattrs.c diff --git a/src/3rdparty/hwloc/CMakeLists.txt b/src/3rdparty/hwloc/CMakeLists.txt index 0e56b6fc..3f159afd 100644 --- a/src/3rdparty/hwloc/CMakeLists.txt +++ b/src/3rdparty/hwloc/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 2.8) +cmake_minimum_required (VERSION 2.8.12) project (hwloc C) include_directories(include) @@ -13,23 +13,25 @@ set(HEADERS ) set(SOURCES - src/base64.c - src/bind.c - src/bitmap.c - src/components.c - src/diff.c - src/distances.c - src/misc.c - src/pci-common.c - src/shmem.c - src/topology.c - src/topology-noos.c - src/topology-synthetic.c - src/topology-windows.c - src/topology-x86.c - src/topology-xml.c - src/topology-xml-nolibxml.c + src/base64.c + src/bind.c + src/bitmap.c + src/components.c + src/diff.c + src/distances.c + src/misc.c + src/pci-common.c + src/shmem.c + src/topology.c + src/topology-noos.c + src/topology-synthetic.c + src/topology-windows.c + src/topology-x86.c + src/topology-xml.c + src/topology-xml-nolibxml.c src/traversal.c + src/memattrs.c + src/cpukinds.c ) add_library(hwloc STATIC diff --git a/src/3rdparty/hwloc/NEWS b/src/3rdparty/hwloc/NEWS index 0dfe28df..0ec17bb6 100644 --- a/src/3rdparty/hwloc/NEWS +++ b/src/3rdparty/hwloc/NEWS @@ -2,6 +2,7 @@ Copyright © 2009 CNRS Copyright © 2009-2020 Inria. All rights reserved. Copyright © 2009-2013 Université Bordeaux Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. +Copyright © 2020 Hewlett Packard Enterprise. All rights reserved. $COPYRIGHT$ @@ -16,6 +17,76 @@ bug fixes (and other actions) for each version of hwloc since version 0.9. +Version 2.4.0 +------------- +* API + + Add hwloc/cpukinds.h for reporting information about hybrid CPUs. + - Use Linux cpufreq frequencies to rank cores by efficiency. + - Use x86 CPUID hybrid leaf and future Linux kernels sysfs CPU type + files to identify Intel Atom and Core cores. + - Use the Windows native EfficiencyClass to separate kinds. +* Backends + + Properly handle Linux kernel 5.10+ exposing ACPI HMAT information + with knowledge of Generic Initiators. +* Tools + + lstopo has new --cpukinds and --no-cpukinds options for showing + CPU kinds or not in textual and graphical modes respectively. + + hwloc-calc has a new --cpukind option for filtering PUs by kind. + + hwloc-annotate has a new cpukind command for modifying CPU kinds. +* Misc + + Fix hwloc_bitmap_nr_ulongs(), thanks to Norbert Eicker. + + Add a documentation section about + "Topology Attributes: Distances, Memory Attributes and CPU Kinds". + + Silence some spurious warnings in the OpenCL backend and when showing + process binding with lstopo --ps. + + +Version 2.3.0 +------------- +* API + + Add hwloc/memattrs.h for exposing latency/bandwidth information + between initiators (CPU sets for now) and target NUMA nodes, + typically on heterogeneous platforms. + - When available, bandwidths and latencies are read from the ACPI HMAT + table exposed by Linux kernel 5.2+. + - Attributes may also be customized to expose user-defined performance + information. + + Add hwloc_get_local_numanode_objs() for listing NUMA nodes that are + local to some locality. + + The new topology flag HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT causes + support arrays to be loaded from XML exported with hwloc 2.3+. + - hwloc_topology_get_support() now returns an additional "misc" + array with feature "imported_support" set when support was imported. + + Add hwloc_topology_refresh() to refresh internal caches after modifying + the topology and before consulting the topology in a multithread context. +* Backends + + Add a ROCm SMI backend and a hwloc/rsmi.h helper file for getting + the locality of AMD GPUs, now exposed as "rsmi" OS devices. + Thanks to Mike Li. + + Remove POWER device-tree-based topology on Linux, + (it was disabled by default since 2.1). +* Tools + + Command-line options for specifying flags now understand comma-separated + lists of flag names (substrings). + + hwloc-info and hwloc-calc have new --local-memory --local-memory-flags + and --best-memattr options for reporting local memory nodes and filtering + by memory attributes. + + hwloc-bind has a new --best-memattr option for filtering by memory attributes + among the memory binding set. + + Tools that have a --restrict option may now receive a nodeset or + some custom flags for restricting the topology. + + lstopo now has a --thickness option for changing line thickness in the + graphical output. + + Fix lstopo drawing when autoresizing on Windows 10. + + Pressing the F5 key in lstopo X11 and Windows graphical/interactive outputs + now refreshes the display according to the current topology and binding. + + Add a tikz lstopo graphical backend to generate picture easily included into + LaTeX documents. Thanks to Clement Foyer. +* Misc + + The default installation path of the Bash completion file has changed to + ${datadir}/bash-completion/completions/hwloc. Thanks to Tomasz Kłoczko. + + Version 2.2.0 ------------- * API diff --git a/src/3rdparty/hwloc/README b/src/3rdparty/hwloc/README index 5567b4d1..932d6d09 100644 --- a/src/3rdparty/hwloc/README +++ b/src/3rdparty/hwloc/README @@ -23,9 +23,9 @@ APIs are documented after these sections. Installation -hwloc (http://www.open-mpi.org/projects/hwloc/) is available under the BSD -license. It is hosted as a sub-project of the overall Open MPI project (http:// -www.open-mpi.org/). Note that hwloc does not require any functionality from +hwloc (https://www.open-mpi.org/projects/hwloc/) is available under the BSD +license. It is hosted as a sub-project of the overall Open MPI project (https:/ +/www.open-mpi.org/). Note that hwloc does not require any functionality from Open MPI -- it is a wholly separate (and much smaller!) project and code base. It just happens to be hosted as part of the overall Open MPI project. @@ -75,7 +75,7 @@ Bugs should be reported in the tracker (https://github.com/open-mpi/hwloc/ issues). Opening a new issue automatically displays lots of hints about how to debug and report issues. -Questions may be sent to the users or developers mailing lists (http:// +Questions may be sent to the users or developers mailing lists (https:// www.open-mpi.org/community/lists/hwloc.php). There is also a #hwloc IRC channel on Freenode (irc.freenode.net). diff --git a/src/3rdparty/hwloc/VERSION b/src/3rdparty/hwloc/VERSION index e182793d..979c2cc8 100644 --- a/src/3rdparty/hwloc/VERSION +++ b/src/3rdparty/hwloc/VERSION @@ -8,7 +8,7 @@ # Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too. major=2 -minor=2 +minor=4 release=0 # greek is used for alpha or beta release tags. If it is non-empty, @@ -22,7 +22,7 @@ greek= # The date when this release was created -date="Mar 30, 2020" +date="Nov 26, 2020" # If snapshot=1, then use the value from snapshot_version as the # entire hwloc version (i.e., ignore major, minor, release, and @@ -41,7 +41,7 @@ snapshot_version=${major}.${minor}.${release}${greek}-git # 2. Version numbers are described in the Libtool current:revision:age # format. -libhwloc_so_version=17:0:2 +libhwloc_so_version=19:0:4 libnetloc_so_version=0:0:0 # Please also update the lines in contrib/windows/libhwloc.vcxproj diff --git a/src/3rdparty/hwloc/include/hwloc.h b/src/3rdparty/hwloc/include/hwloc.h index 01b42fdc..9c8c86cc 100644 --- a/src/3rdparty/hwloc/include/hwloc.h +++ b/src/3rdparty/hwloc/include/hwloc.h @@ -2,7 +2,7 @@ * Copyright © 2009 CNRS * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux - * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. + * Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ @@ -11,7 +11,7 @@ * ------------------------------------------------ * $tarball_directory/doc/doxygen-doc/ * or - * http://www.open-mpi.org/projects/hwloc/doc/ + * https://www.open-mpi.org/projects/hwloc/doc/ *===================================================================== * * FAIR WARNING: Do NOT expect to be able to figure out all the @@ -93,7 +93,7 @@ extern "C" { * Two stable releases of the same series usually have the same ::HWLOC_API_VERSION * even if their HWLOC_VERSION are different. */ -#define HWLOC_API_VERSION 0x00020100 +#define HWLOC_API_VERSION 0x00020400 /** \brief Indicate at runtime which hwloc API version was used at build time. * @@ -102,7 +102,7 @@ extern "C" { HWLOC_DECLSPEC unsigned hwloc_get_api_version(void); /** \brief Current component and plugin ABI version (see hwloc/plugins.h) */ -#define HWLOC_COMPONENT_ABI 6 +#define HWLOC_COMPONENT_ABI 7 /** @} */ @@ -196,7 +196,7 @@ typedef enum { */ HWLOC_OBJ_CORE, /**< \brief Core. * A computation unit (may be shared by several - * logical processors). + * PUs, aka logical processors). */ HWLOC_OBJ_PU, /**< \brief Processing Unit, or (Logical) Processor. * An execution unit (may share a core with some @@ -257,22 +257,31 @@ typedef enum { HWLOC_OBJ_BRIDGE, /**< \brief Bridge (filtered out by default). * Any bridge (or PCI switch) that connects the host or an I/O bus, * to another I/O bus. - * They are not added to the topology unless I/O discovery - * is enabled with hwloc_topology_set_flags(). + * + * Bridges are not added to the topology unless their + * filtering is changed (see hwloc_topology_set_type_filter() + * and hwloc_topology_set_io_types_filter()). + * * I/O objects are not listed in the main children list, * but rather in the dedicated io children list. * I/O objects have NULL CPU and node sets. */ HWLOC_OBJ_PCI_DEVICE, /**< \brief PCI device (filtered out by default). - * They are not added to the topology unless I/O discovery - * is enabled with hwloc_topology_set_flags(). + * + * PCI devices are not added to the topology unless their + * filtering is changed (see hwloc_topology_set_type_filter() + * and hwloc_topology_set_io_types_filter()). + * * I/O objects are not listed in the main children list, * but rather in the dedicated io children list. * I/O objects have NULL CPU and node sets. */ HWLOC_OBJ_OS_DEVICE, /**< \brief Operating system device (filtered out by default). - * They are not added to the topology unless I/O discovery - * is enabled with hwloc_topology_set_flags(). + * + * OS devices are not added to the topology unless their + * filtering is changed (see hwloc_topology_set_type_filter() + * and hwloc_topology_set_io_types_filter()). + * * I/O objects are not listed in the main children list, * but rather in the dedicated io children list. * I/O objects have NULL CPU and node sets. @@ -282,6 +291,10 @@ typedef enum { * Objects without particular meaning, that can e.g. be * added by the application for its own use, or by hwloc * for miscellaneous objects such as MemoryModule (DIMMs). + * + * They are not added to the topology unless their filtering + * is changed (see hwloc_topology_set_type_filter()). + * * These objects are not listed in the main children list, * but rather in the dedicated misc children list. * Misc objects may only have Misc objects as children, @@ -304,7 +317,6 @@ typedef enum { HWLOC_OBJ_DIE, /**< \brief Die within a physical package. * A subpart of the physical package, that contains multiple cores. - * \hideinitializer */ HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */ @@ -338,8 +350,7 @@ typedef enum hwloc_obj_osdev_type_e { HWLOC_OBJ_OSDEV_DMA, /**< \brief Operating system dma engine device. * For instance the "dma0chan0" DMA channel on Linux. */ HWLOC_OBJ_OSDEV_COPROC /**< \brief Operating system co-processor device. - * For instance "mic0" for a Xeon Phi (MIC) on Linux, - * "opencl0d0" for a OpenCL device, + * For instance "opencl0d0" for a OpenCL device, * "cuda0" for a CUDA device. */ } hwloc_obj_osdev_type_t; @@ -512,7 +523,7 @@ struct hwloc_obj { * * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. */ - hwloc_cpuset_t complete_cpuset; /**< \brief The complete CPU set of logical processors of this object, + hwloc_cpuset_t complete_cpuset; /**< \brief The complete CPU set of processors of this object, * * This may include not only the same as the cpuset field, but also some CPUs for * which topology information is unknown or incomplete, some offlines CPUs, and @@ -533,6 +544,8 @@ struct hwloc_obj { * between this object and the NUMA node objects). * * In the end, these nodes are those that are close to the current object. + * Function hwloc_get_local_numanode_objs() may be used to list those NUMA + * nodes more precisely. * * If the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED configuration flag is set, * some of these nodes may not be allowed for allocation, @@ -1929,7 +1942,31 @@ enum hwloc_topology_flags_e { * would result in the same behavior. * \hideinitializer */ - HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES = (1UL<<2) + HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES = (1UL<<2), + + /** \brief Import support from the imported topology. + * + * When importing a XML topology from a remote machine, binding is + * disabled by default (see ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM). + * This disabling is also marked by putting zeroes in the corresponding + * supported feature bits reported by hwloc_topology_get_support(). + * + * The flag ::HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT actually imports + * support bits from the remote machine. It also sets the flag + * \p imported_support in the struct hwloc_topology_misc_support array. + * If the imported XML did not contain any support information + * (exporter hwloc is too old), this flag is not set. + * + * Note that these supported features are only relevant for the hwloc + * installation that actually exported the XML topology + * (it may vary with the operating system, or with how hwloc was compiled). + * + * Note that setting this flag however does not enable binding for the + * locally imported hwloc topology, it only reports what the remote + * hwloc and machine support. + * + */ + HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT = (1UL<<3) }; /** \brief Set OR'ed flags to non-yet-loaded topology. @@ -1972,6 +2009,8 @@ struct hwloc_topology_discovery_support { unsigned char disallowed_pu; /** \brief Detecting and identifying NUMA nodes that are not available to the current process is supported. */ unsigned char disallowed_numa; + /** \brief Detecting the efficiency of CPU kinds is supported, see \ref hwlocality_cpukinds. */ + unsigned char cpukind_efficiency; }; /** \brief Flags describing actual PU binding support for this topology. @@ -2042,6 +2081,13 @@ struct hwloc_topology_membind_support { unsigned char get_area_memlocation; }; +/** \brief Flags describing miscellaneous features. + */ +struct hwloc_topology_misc_support { + /** Support was imported when importing another topology, see ::HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT. */ + unsigned char imported_support; +}; + /** \brief Set of flags describing actual support for this topology. * * This is retrieved with hwloc_topology_get_support() and will be valid until @@ -2052,6 +2098,7 @@ struct hwloc_topology_support { struct hwloc_topology_discovery_support *discovery; struct hwloc_topology_cpubind_support *cpubind; struct hwloc_topology_membind_support *membind; + struct hwloc_topology_misc_support *misc; }; /** \brief Retrieve the topology support. @@ -2062,6 +2109,18 @@ struct hwloc_topology_support { * call may still fail in some corner cases. * * These features are also listed by hwloc-info \--support + * + * The reported features are what the current topology supports + * on the current machine. If the topology was exported to XML + * from another machine and later imported here, support still + * describes what is supported for this imported topology after + * import. By default, binding will be reported as unsupported + * in this case (see ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM). + * + * Topology flag ::HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT may be used + * to report the supported features of the original remote machine + * instead. If it was successfully imported, \p imported_support + * will be set in the struct hwloc_topology_misc_support array. */ HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(hwloc_topology_t __hwloc_restrict topology); @@ -2108,8 +2167,8 @@ enum hwloc_type_filter_e { * * It is only useful for I/O object types. * For ::HWLOC_OBJ_PCI_DEVICE and ::HWLOC_OBJ_OS_DEVICE, it means that only objects - * of major/common kinds are kept (storage, network, OpenFabrics, Intel MICs, CUDA, - * OpenCL, NVML, and displays). + * of major/common kinds are kept (storage, network, OpenFabrics, CUDA, + * OpenCL, RSMI, NVML, and displays). * Also, only OS devices directly attached on PCI (e.g. no USB) are reported. * For ::HWLOC_OBJ_BRIDGE, it means that bridges are kept only if they have children. * @@ -2371,6 +2430,22 @@ HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_group_object(hwloc_topology_t t */ HWLOC_DECLSPEC int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src); +/** \brief Refresh internal structures after topology modification. + * + * Modifying the topology (by restricting, adding objects, modifying structures + * such as distances or memory attributes, etc.) may cause some internal caches + * to become invalid. These caches are automatically refreshed when accessed + * but this refreshing is not thread-safe. + * + * This function is not thread-safe either, but it is a good way to end a + * non-thread-safe phase of topology modification. Once this refresh is done, + * multiple threads may concurrently consult the topology, objects, distances, + * attributes, etc. + * + * See also \ref threadsafety + */ +HWLOC_DECLSPEC int hwloc_topology_refresh(hwloc_topology_t topology); + /** @} */ @@ -2386,6 +2461,12 @@ HWLOC_DECLSPEC int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src /* inline code of some functions above */ #include "hwloc/inlines.h" +/* memory attributes */ +#include "hwloc/memattrs.h" + +/* kinds of CPU cores */ +#include "hwloc/cpukinds.h" + /* exporting to XML or synthetic */ #include "hwloc/export.h" diff --git a/src/3rdparty/hwloc/include/hwloc/autogen/config.h b/src/3rdparty/hwloc/include/hwloc/autogen/config.h index 06963b36..8b69185f 100644 --- a/src/3rdparty/hwloc/include/hwloc/autogen/config.h +++ b/src/3rdparty/hwloc/include/hwloc/autogen/config.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -11,9 +11,9 @@ #ifndef HWLOC_CONFIG_H #define HWLOC_CONFIG_H -#define HWLOC_VERSION "2.2.0" +#define HWLOC_VERSION "2.4.0" #define HWLOC_VERSION_MAJOR 2 -#define HWLOC_VERSION_MINOR 2 +#define HWLOC_VERSION_MINOR 4 #define HWLOC_VERSION_RELEASE 0 #define HWLOC_VERSION_GREEK "" diff --git a/src/3rdparty/hwloc/include/hwloc/bitmap.h b/src/3rdparty/hwloc/include/hwloc/bitmap.h index d5b0ea02..8d9bb9c8 100644 --- a/src/3rdparty/hwloc/include/hwloc/bitmap.h +++ b/src/3rdparty/hwloc/include/hwloc/bitmap.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -231,7 +231,7 @@ HWLOC_DECLSPEC int hwloc_bitmap_clr_range(hwloc_bitmap_t bitmap, unsigned begin, /** \brief Keep a single index among those set in bitmap \p bitmap * * May be useful before binding so that the process does not - * have a chance of migrating between multiple logical CPUs + * have a chance of migrating between multiple processors * in the original mask. * Instead of running the task on any PU inside the given CPU set, * the operating system scheduler will be forced to run it on a single diff --git a/src/3rdparty/hwloc/include/hwloc/cpukinds.h b/src/3rdparty/hwloc/include/hwloc/cpukinds.h new file mode 100644 index 00000000..f240baf3 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/cpukinds.h @@ -0,0 +1,188 @@ +/* + * Copyright © 2020 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Kinds of CPU cores. + */ + +#ifndef HWLOC_CPUKINDS_H +#define HWLOC_CPUKINDS_H + +#include "hwloc.h" + +#ifdef __cplusplus +extern "C" { +#elif 0 +} +#endif + +/** \defgroup hwlocality_cpukinds Kinds of CPU cores + * + * Platforms with heterogeneous CPUs may have some cores with + * different features or frequencies. + * This API exposes identical PUs in sets called CPU kinds. + * Each PU of the topology may only be in a single kind. + * + * The number of kinds may be obtained with hwloc_cpukinds_get_nr(). + * If the platform is homogeneous, there may be a single kind + * with all PUs. + * If the platform or operating system does not expose any + * information about CPU cores, there may be no kind at all. + * + * The index of the kind that describes a given CPU set + * (if any, and not partially) + * may be obtained with hwloc_cpukinds_get_by_cpuset(). + * + * From the index of a kind, it is possible to retrieve information + * with hwloc_cpukinds_get_info(): + * an abstracted efficiency value, + * and an array of info attributes + * (for instance the "CoreType" and "FrequencyMaxMHz", + * see \ref topoattrs_cpukinds). + * + * A higher efficiency value means intrinsic greater performance + * (and possibly less performance/power efficiency). + * Kinds with lower efficiency are ranked first: + * Passing 0 as \p kind_index to hwloc_cpukinds_get_info() will + * return information about the less efficient CPU kind. + * + * When available, efficiency values are gathered from the operating + * system (when \p cpukind_efficiency is set in the + * struct hwloc_topology_discovery_support array, only on Windows 10 for now). + * Otherwise hwloc tries to compute efficiencies + * by comparing CPU kinds using frequencies (on ARM), + * or core types and frequencies (on other architectures). + * The environment variable HWLOC_CPUKINDS_RANKING may be used + * to change this heuristics, see \ref envvar. + * + * If hwloc fails to rank any kind, for instance because the operating + * system does not expose efficiencies and core frequencies, + * all kinds will have an unknown efficiency (\c -1), + * and they are not indexed/ordered in any specific way. + * + * @{ + */ + +/** \brief Get the number of different kinds of CPU cores in the topology. + * + * \p flags must be \c 0 for now. + * + * \return The number of CPU kinds (positive integer) on success. + * \return \c 0 if no information about kinds was found. + * \return \c -1 with \p errno set to \c EINVAL if \p flags is invalid. + */ +HWLOC_DECLSPEC int +hwloc_cpukinds_get_nr(hwloc_topology_t topology, + unsigned long flags); + +/** \brief Get the index of the CPU kind that contains CPUs listed in \p cpuset. + * + * \p flags must be \c 0 for now. + * + * \return The index of the CPU kind (positive integer or 0) on success. + * \return \c -1 with \p errno set to \c EXDEV if \p cpuset is + * only partially included in the some kind. + * \return \c -1 with \p errno set to \c ENOENT if \p cpuset is + * not included in any kind, even partially. + * \return \c -1 with \p errno set to \c EINVAL if parameters are invalid. + */ +HWLOC_DECLSPEC int +hwloc_cpukinds_get_by_cpuset(hwloc_topology_t topology, + hwloc_const_bitmap_t cpuset, + unsigned long flags); + +/** \brief Get the CPU set and infos about a CPU kind in the topology. + * + * \p kind_index identifies one kind of CPU between 0 and the number + * of kinds returned by hwloc_cpukinds_get_nr() minus 1. + * + * If not \c NULL, the bitmap \p cpuset will be filled with + * the set of PUs of this kind. + * + * The integer pointed by \p efficiency, if not \c NULL will, be filled + * with the ranking of this kind of CPU in term of efficiency (see above). + * It ranges from \c 0 to the number of kinds + * (as reported by hwloc_cpukinds_get_nr()) minus 1. + * + * Kinds with lower efficiency are reported first. + * + * If there is a single kind in the topology, its efficiency \c 0. + * If the efficiency of some kinds of cores is unknown, + * the efficiency of all kinds is set to \c -1, + * and kinds are reported in no specific order. + * + * The array of info attributes (for instance the "CoreType", + * "FrequencyMaxMHz" or "FrequencyBaseMHz", see \ref topoattrs_cpukinds) + * and its length are returned in \p infos or \p nr_infos. + * The array belongs to the topology, it should not be freed or modified. + * + * If \p nr_infos or \p infos is \c NULL, no info is returned. + * + * \p flags must be \c 0 for now. + * + * \return \c 0 on success. + * \return \c -1 with \p errno set to \c ENOENT if \p kind_index does not match any CPU kind. + * \return \c -1 with \p errno set to \c EINVAL if parameters are invalid. + */ +HWLOC_DECLSPEC int +hwloc_cpukinds_get_info(hwloc_topology_t topology, + unsigned kind_index, + hwloc_bitmap_t cpuset, + int *efficiency, + unsigned *nr_infos, struct hwloc_info_s **infos, + unsigned long flags); + +/** \brief Register a kind of CPU in the topology. + * + * Mark the PUs listed in \p cpuset as being of the same kind + * with respect to the given attributes. + * + * \p forced_efficiency should be \c -1 if unknown. + * Otherwise it is an abstracted efficiency value to enforce + * the ranking of all kinds if all of them have valid (and + * different) efficiencies. + * + * The array \p infos of size \p nr_infos may be used to provide + * info names and values describing this kind of PUs. + * + * \p flags must be \c 0 for now. + * + * Parameters \p cpuset and \p infos will be duplicated internally, + * the caller is responsible for freeing them. + * + * If \p cpuset overlaps with some existing kinds, those might get + * modified or split. For instance if existing kind A contains + * PUs 0 and 1, and one registers another kind for PU 1 and 2, + * there will be 3 resulting kinds: + * existing kind A is restricted to only PU 0; + * new kind B contains only PU 1 and combines information from A + * and from the newly-registered kind; + * new kind C contains only PU 2 and only gets information from + * the newly-registered kind. + * + * \note The efficiency \p forced_efficiency provided to this function + * may be different from the one reported later by hwloc_cpukinds_get_info() + * because hwloc will scale efficiency values down to + * between 0 and the number of kinds minus 1. + * + * \return \c 0 on success. + * \return \c -1 with \p errno set to \c EINVAL if some parameters are invalid, + * for instance if \p cpuset is \c NULL or empty. + */ +HWLOC_DECLSPEC int +hwloc_cpukinds_register(hwloc_topology_t topology, + hwloc_bitmap_t cpuset, + int forced_efficiency, + unsigned nr_infos, struct hwloc_info_s *infos, + unsigned long flags); + +/** @} */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_CPUKINDS_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/cuda.h b/src/3rdparty/hwloc/include/hwloc/cuda.h index 6f0cda4c..582270d1 100644 --- a/src/3rdparty/hwloc/include/hwloc/cuda.h +++ b/src/3rdparty/hwloc/include/hwloc/cuda.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2017 Inria. All rights reserved. + * Copyright © 2010-2020 Inria. All rights reserved. * Copyright © 2010-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -72,7 +72,7 @@ hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused return 0; } -/** \brief Get the CPU set of logical processors that are physically +/** \brief Get the CPU set of processors that are physically * close to device \p cudevice. * * Return the CPU set describing the locality of the CUDA device \p cudevice. diff --git a/src/3rdparty/hwloc/include/hwloc/cudart.h b/src/3rdparty/hwloc/include/hwloc/cudart.h index 688b8421..059727ae 100644 --- a/src/3rdparty/hwloc/include/hwloc/cudart.h +++ b/src/3rdparty/hwloc/include/hwloc/cudart.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2017 Inria. All rights reserved. + * Copyright © 2010-2020 Inria. All rights reserved. * Copyright © 2010-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -69,7 +69,7 @@ hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unus return 0; } -/** \brief Get the CPU set of logical processors that are physically +/** \brief Get the CPU set of processors that are physically * close to device \p idx. * * Return the CPU set describing the locality of the CUDA device diff --git a/src/3rdparty/hwloc/include/hwloc/diff.h b/src/3rdparty/hwloc/include/hwloc/diff.h index 79f2df3d..0ad0486b 100644 --- a/src/3rdparty/hwloc/include/hwloc/diff.h +++ b/src/3rdparty/hwloc/include/hwloc/diff.h @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2018 Inria. All rights reserved. + * Copyright © 2013-2020 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -110,7 +110,7 @@ union hwloc_topology_diff_obj_attr_u { */ typedef enum hwloc_topology_diff_type_e { /** \brief An object attribute was changed. - * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_s. + * The union is a hwloc_topology_diff_u::hwloc_topology_diff_obj_attr_s. */ HWLOC_TOPOLOGY_DIFF_OBJ_ATTR, @@ -119,7 +119,7 @@ typedef enum hwloc_topology_diff_type_e { * this object has not been checked. * hwloc_topology_diff_build() will return 1. * - * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_too_complex_s. + * The union is a hwloc_topology_diff_u::hwloc_topology_diff_too_complex_s. */ HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX } hwloc_topology_diff_type_t; diff --git a/src/3rdparty/hwloc/include/hwloc/distances.h b/src/3rdparty/hwloc/include/hwloc/distances.h index b7baed8a..57e53cd5 100644 --- a/src/3rdparty/hwloc/include/hwloc/distances.h +++ b/src/3rdparty/hwloc/include/hwloc/distances.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2019 Inria. All rights reserved. + * Copyright © 2010-2020 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -34,6 +34,7 @@ extern "C" { * It corresponds to the latency for accessing the memory of one node * from a core in another node. * The corresponding kind is ::HWLOC_DISTANCES_KIND_FROM_OS | ::HWLOC_DISTANCES_KIND_FROM_USER. + * The name of this distances structure is "NUMALatency". * * The matrix may also contain bandwidths between random sets of objects, * possibly provided by the user, as specified in the \p kind attribute. @@ -144,6 +145,8 @@ hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, /** \brief Retrieve a distance matrix with the given name. * * Usually only one distances structure may match a given name. + * + * The name of the most common structure is "NUMALatency". */ HWLOC_DECLSPEC int hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name, diff --git a/src/3rdparty/hwloc/include/hwloc/glibc-sched.h b/src/3rdparty/hwloc/include/hwloc/glibc-sched.h index 99659e03..3c5368be 100644 --- a/src/3rdparty/hwloc/include/hwloc/glibc-sched.h +++ b/src/3rdparty/hwloc/include/hwloc/glibc-sched.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2013 inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -22,7 +22,7 @@ #include -#if !defined _GNU_SOURCE || !defined _SCHED_H || (!defined CPU_SETSIZE && !defined sched_priority) +#if !defined _GNU_SOURCE || (!defined _SCHED_H && !defined _SCHED_H_) || (!defined CPU_SETSIZE && !defined sched_priority) #error Please make sure to include sched.h before including glibc-sched.h, and define _GNU_SOURCE before any inclusion of sched.h #endif diff --git a/src/3rdparty/hwloc/include/hwloc/helper.h b/src/3rdparty/hwloc/include/hwloc/helper.h index 3df64843..8e4d4532 100644 --- a/src/3rdparty/hwloc/include/hwloc/helper.h +++ b/src/3rdparty/hwloc/include/hwloc/helper.h @@ -872,8 +872,8 @@ hwloc_distrib(hwloc_topology_t topology, unsigned chunk, weight; hwloc_obj_t root = roots[flags & HWLOC_DISTRIB_FLAG_REVERSE ? n_roots-1-i : i]; hwloc_cpuset_t cpuset = root->cpuset; - if (root->type == HWLOC_OBJ_NUMANODE) - /* NUMANodes have same cpuset as their parent, but we need normal objects below */ + while (!hwloc_obj_type_is_normal(root->type)) + /* If memory/io/misc, walk up to normal parent */ root = root->parent; weight = (unsigned) hwloc_bitmap_weight(cpuset); if (!weight) @@ -919,7 +919,7 @@ hwloc_distrib(hwloc_topology_t topology, /** \brief Get complete CPU set * - * \return the complete CPU set of logical processors of the system. + * \return the complete CPU set of processors of the system. * * \note The returned cpuset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. @@ -931,7 +931,7 @@ hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) __hwloc_attribute_ /** \brief Get topology CPU set * - * \return the CPU set of logical processors of the system for which hwloc + * \return the CPU set of processors of the system for which hwloc * provides topology information. This is equivalent to the cpuset of the * system object. * @@ -945,7 +945,7 @@ hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_ /** \brief Get allowed CPU set * - * \return the CPU set of allowed logical processors of the system. + * \return the CPU set of allowed processors of the system. * * \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was not set, * this is identical to hwloc_topology_get_topology_cpuset(), which means diff --git a/src/3rdparty/hwloc/include/hwloc/memattrs.h b/src/3rdparty/hwloc/include/hwloc/memattrs.h new file mode 100644 index 00000000..2494abb0 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/memattrs.h @@ -0,0 +1,455 @@ +/* + * Copyright © 2019-2020 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Memory node attributes. + */ + +#ifndef HWLOC_MEMATTR_H +#define HWLOC_MEMATTR_H + +#include "hwloc.h" + +#ifdef __cplusplus +extern "C" { +#elif 0 +} +#endif + +/** \defgroup hwlocality_memattrs Comparing memory node attributes for finding where to allocate on + * + * Platforms with heterogeneous memory require ways to decide whether + * a buffer should be allocated on "fast" memory (such as HBM), + * "normal" memory (DDR) or even "slow" but large-capacity memory + * (non-volatile memory). + * These memory nodes are called "Targets" while the CPU accessing them + * is called the "Initiator". Access performance depends on their + * locality (NUMA platforms) as well as the intrinsic performance + * of the targets (heterogeneous platforms). + * + * The following attributes describe the performance of memory accesses + * from an Initiator to a memory Target, for instance their latency + * or bandwidth. + * Initiators performing these memory accesses are usually some PUs or Cores + * (described as a CPU set). + * Hence a Core may choose where to allocate a memory buffer by comparing + * the attributes of different target memory nodes nearby. + * + * There are also some attributes that are system-wide. + * Their value does not depend on a specific initiator performing + * an access. + * The memory node Capacity is an example of such attribute without + * initiator. + * + * One way to use this API is to start with a cpuset describing the Cores where + * a program is bound. The best target NUMA node for allocating memory in this + * program on these Cores may be obtained by passing this cpuset as an initiator + * to hwloc_memattr_get_best_target() with the relevant memory attribute. + * For instance, if the code is latency limited, use the Latency attribute. + * + * A more flexible approach consists in getting the list of local NUMA nodes + * by passing this cpuset to hwloc_get_local_numanode_objs(). + * Attribute values for these nodes, if any, may then be obtained with + * hwloc_memattr_get_value() and manually compared with the desired criteria. + * + * \note The API also supports specific objects as initiator, + * but it is currently not used internally by hwloc. + * Users may for instance use it to provide custom performance + * values for host memory accesses performed by GPUs. + * + * \note The interface actually also accepts targets that are not NUMA nodes. + * @{ + */ + +/** \brief Memory node attributes. */ +enum hwloc_memattr_id_e { + /** \brief "Capacity". + * The capacity is returned in bytes + * (local_memory attribute in objects). + * + * Best capacity nodes are nodes with higher capacity. + * + * No initiator is involved when looking at this attribute. + * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST. + */ + HWLOC_MEMATTR_ID_CAPACITY = 0, + + /** \brief "Locality". + * The locality is returned as the number of PUs in that locality + * (e.g. the weight of its cpuset). + * + * Best locality nodes are nodes with smaller locality + * (nodes that are local to very few PUs). + * Poor locality nodes are nodes with larger locality + * (nodes that are local to the entire machine). + * + * No initiator is involved when looking at this attribute. + * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST. + */ + HWLOC_MEMATTR_ID_LOCALITY = 1, + + /** \brief "Bandwidth". + * The bandwidth is returned in MiB/s, as seen from the given initiator location. + * Best bandwidth nodes are nodes with higher bandwidth. + * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST + * and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR. + */ + HWLOC_MEMATTR_ID_BANDWIDTH = 2, + + /** \brief "Latency". + * The latency is returned as nanoseconds, as seen from the given initiator location. + * Best latency nodes are nodes with smaller latency. + * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_LOWER_FIRST + * and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR. + */ + HWLOC_MEMATTR_ID_LATENCY = 3 + + /* TODO read vs write, persistence? */ +}; + +/** \brief A memory attribute identifier. + * May be either one of ::hwloc_memattr_id_e or a new id returned by hwloc_memattr_register(). + */ +typedef unsigned hwloc_memattr_id_t; + +/** \brief Return the identifier of the memory attribute with the given name. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_by_name(hwloc_topology_t topology, + const char *name, + hwloc_memattr_id_t *id); + + +/** \brief Type of location. */ +enum hwloc_location_type_e { + /** \brief Location is given as a cpuset, in the location cpuset union field. \hideinitializer */ + HWLOC_LOCATION_TYPE_CPUSET = 1, + /** \brief Location is given as an object, in the location object union field. \hideinitializer */ + HWLOC_LOCATION_TYPE_OBJECT = 0 +}; + +/** \brief Where to measure attributes from. */ +struct hwloc_location { + /** \brief Type of location. */ + enum hwloc_location_type_e type; + /** \brief Actual location. */ + union hwloc_location_u { + /** \brief Location as a cpuset, when the location type is ::HWLOC_LOCATION_TYPE_CPUSET. */ + hwloc_cpuset_t cpuset; + /** \brief Location as an object, when the location type is ::HWLOC_LOCATION_TYPE_OBJECT. */ + hwloc_obj_t object; + } location; +}; + + +/** \brief Flags for selecting target NUMA nodes. */ +enum hwloc_local_numanode_flag_e { + /** \brief Select NUMA nodes whose locality is larger than the given cpuset. + * For instance, if a single PU (or its cpuset) is given in \p initiator, + * select all nodes close to the package that contains this PU. + * \hideinitializer + */ + HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY = (1UL<<0), + + /** \brief Select NUMA nodes whose locality is smaller than the given cpuset. + * For instance, if a package (or its cpuset) is given in \p initiator, + * also select nodes that are attached to only a half of that package. + * \hideinitializer + */ + HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY = (1UL<<1), + + /** \brief Select all NUMA nodes in the topology. + * The initiator \p initiator is ignored. + * \hideinitializer + */ + HWLOC_LOCAL_NUMANODE_FLAG_ALL = (1UL<<2) +}; + +/** \brief Return an array of local NUMA nodes. + * + * By default only select the NUMA nodes whose locality is exactly + * the given \p location. More nodes may be selected if additional flags + * are given as a OR'ed set of ::hwloc_local_numanode_flag_e. + * + * If \p location is given as an explicit object, its CPU set is used + * to find NUMA nodes with the corresponding locality. + * If the object does not have a CPU set (e.g. I/O object), the CPU + * parent (where the I/O object is attached) is used. + * + * On input, \p nr points to the number of nodes that may be stored + * in the \p nodes array. + * On output, \p nr will be changed to the number of stored nodes, + * or the number of nodes that would have been stored if there were + * enough room. + * + * \note Some of these NUMA nodes may not have any memory attribute + * values and hence not be reported as actual targets in other functions. + * + * \note The number of NUMA nodes in the topology (obtained by + * hwloc_bitmap_weight() on the root object nodeset) may be used + * to allocate the \p nodes array. + * + * \note When an object CPU set is given as locality, for instance a Package, + * and when flags contain both ::HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY + * and ::HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY, + * the returned array corresponds to the nodeset of that object. + */ +HWLOC_DECLSPEC int +hwloc_get_local_numanode_objs(hwloc_topology_t topology, + struct hwloc_location *location, + unsigned *nr, + hwloc_obj_t *nodes, + unsigned long flags); + + + +/** \brief Return an attribute value for a specific target NUMA node. + * + * If the attribute does not relate to a specific initiator + * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), + * location \p initiator is ignored and may be \c NULL. + * + * \p flags must be \c 0 for now. + * + * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET + * when refering to accesses performed by CPU cores. + * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, + * but users may for instance use it to provide custom information about + * host memory accesses performed by GPUs. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_value(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + hwloc_obj_t target_node, + struct hwloc_location *initiator, + unsigned long flags, + hwloc_uint64_t *value); + +/** \brief Return the best target NUMA node for the given attribute and initiator. + * + * If the attribute does not relate to a specific initiator + * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), + * location \p initiator is ignored and may be \c NULL. + * + * If \p value is non \c NULL, the corresponding value is returned there. + * + * If multiple targets have the same attribute values, only one is + * returned (and there is no way to clarify how that one is chosen). + * Applications that want to detect targets with identical/similar + * values, or that want to look at values for multiple attributes, + * should rather get all values using hwloc_memattr_get_value() + * and manually select the target they consider the best. + * + * \p flags must be \c 0 for now. + * + * If there are no matching targets, \c -1 is returned with \p errno set to \c ENOENT; + * + * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET + * when refering to accesses performed by CPU cores. + * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, + * but users may for instance use it to provide custom information about + * host memory accesses performed by GPUs. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_best_target(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + struct hwloc_location *initiator, + unsigned long flags, + hwloc_obj_t *best_target, hwloc_uint64_t *value); + +/** \brief Return the best initiator for the given attribute and target NUMA node. + * + * If the attribute does not relate to a specific initiator + * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), + * \c -1 is returned and \p errno is set to \c EINVAL. + * + * If \p value is non \c NULL, the corresponding value is returned there. + * + * If multiple initiators have the same attribute values, only one is + * returned (and there is no way to clarify how that one is chosen). + * Applications that want to detect initiators with identical/similar + * values, or that want to look at values for multiple attributes, + * should rather get all values using hwloc_memattr_get_value() + * and manually select the initiator they consider the best. + * + * The returned initiator should not be modified or freed, + * it belongs to the topology. + * + * \p flags must be \c 0 for now. + * + * If there are no matching initiators, \c -1 is returned with \p errno set to \c ENOENT; + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_best_initiator(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + hwloc_obj_t target, + unsigned long flags, + struct hwloc_location *best_initiator, hwloc_uint64_t *value); + +/** @} */ + + +/** \defgroup hwlocality_memattrs_manage Managing memory attributes + * @{ + */ + +/** \brief Return the name of a memory attribute. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_name(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + const char **name); + +/** \brief Return the flags of the given attribute. + * + * Flags are a OR'ed set of ::hwloc_memattr_flag_e. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_flags(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + unsigned long *flags); + +/** \brief Memory attribute flags. + * Given to hwloc_memattr_register() and returned by hwloc_memattr_get_flags(). + */ +enum hwloc_memattr_flag_e { + /** \brief The best nodes for this memory attribute are those with the higher values. + * For instance Bandwidth. + */ + HWLOC_MEMATTR_FLAG_HIGHER_FIRST = (1UL<<0), + /** \brief The best nodes for this memory attribute are those with the lower values. + * For instance Latency. + */ + HWLOC_MEMATTR_FLAG_LOWER_FIRST = (1UL<<1), + /** \brief The value returned for this memory attribute depends on the given initiator. + * For instance Bandwidth and Latency, but not Capacity. + */ + HWLOC_MEMATTR_FLAG_NEED_INITIATOR = (1UL<<2) +}; + +/** \brief Register a new memory attribute. + * + * Add a specific memory attribute that is not defined in ::hwloc_memattr_id_e. + * Flags are a OR'ed set of ::hwloc_memattr_flag_e. It must contain at least + * one of ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST or ::HWLOC_MEMATTR_FLAG_LOWER_FIRST. + */ +HWLOC_DECLSPEC int +hwloc_memattr_register(hwloc_topology_t topology, + const char *name, + unsigned long flags, + hwloc_memattr_id_t *id); + +/** \brief Set an attribute value for a specific target NUMA node. + * + * If the attribute does not relate to a specific initiator + * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), + * location \p initiator is ignored and may be \c NULL. + * + * The initiator will be copied into the topology, + * the caller should free anything allocated to store the initiator, + * for instance the cpuset. + * + * \p flags must be \c 0 for now. + * + * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET + * when refering to accesses performed by CPU cores. + * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, + * but users may for instance use it to provide custom information about + * host memory accesses performed by GPUs. + */ +HWLOC_DECLSPEC int +hwloc_memattr_set_value(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + hwloc_obj_t target_node, + struct hwloc_location *initiator, + unsigned long flags, + hwloc_uint64_t value); + +/** \brief Return the target NUMA nodes that have some values for a given attribute. + * + * Return targets for the given attribute in the \p targets array + * (for the given initiator if any). + * If \p values is not \c NULL, the corresponding attribute values + * are stored in the array it points to. + * + * On input, \p nr points to the number of targets that may be stored + * in the array \p targets (and \p values). + * On output, \p nr points to the number of targets (and values) that + * were actually found, even if some of them couldn't be stored in the array. + * Targets that couldn't be stored are ignored, but the function still + * returns success (\c 0). The caller may find out by comparing the value pointed + * by \p nr before and after the function call. + * + * The returned targets should not be modified or freed, + * they belong to the topology. + * + * Argument \p initiator is ignored if the attribute does not relate to a specific + * initiator (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR). + * Otherwise \p initiator may be non \c NULL to report only targets + * that have a value for that initiator. + * + * \p flags must be \c 0 for now. + * + * \note This function is meant for tools and debugging (listing internal information) + * rather than for application queries. Applications should rather select useful + * NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute + * values. + * + * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET + * when refering to accesses performed by CPU cores. + * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, + * but users may for instance use it to provide custom information about + * host memory accesses performed by GPUs. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_targets(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + struct hwloc_location *initiator, + unsigned long flags, + unsigned *nrp, hwloc_obj_t *targets, hwloc_uint64_t *values); + +/** \brief Return the initiators that have values for a given attribute for a specific target NUMA node. + * + * Return initiators for the given attribute and target node in the + * \p initiators array. + * If \p values is not \c NULL, the corresponding attribute values + * are stored in the array it points to. + * + * On input, \p nr points to the number of initiators that may be stored + * in the array \p initiators (and \p values). + * On output, \p nr points to the number of initiators (and values) that + * were actually found, even if some of them couldn't be stored in the array. + * Initiators that couldn't be stored are ignored, but the function still + * returns success (\c 0). The caller may find out by comparing the value pointed + * by \p nr before and after the function call. + * + * The returned initiators should not be modified or freed, + * they belong to the topology. + * + * \p flags must be \c 0 for now. + * + * If the attribute does not relate to a specific initiator + * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), + * no initiator is returned. + * + * \note This function is meant for tools and debugging (listing internal information) + * rather than for application queries. Applications should rather select useful + * NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute + * values for some relevant initiators. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_initiators(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + hwloc_obj_t target_node, + unsigned long flags, + unsigned *nr, struct hwloc_location *initiators, hwloc_uint64_t *values); +/** @} */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_MEMATTR_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/nvml.h b/src/3rdparty/hwloc/include/hwloc/nvml.h index 1bc2599f..9d578903 100644 --- a/src/3rdparty/hwloc/include/hwloc/nvml.h +++ b/src/3rdparty/hwloc/include/hwloc/nvml.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2016 Inria. All rights reserved. + * Copyright © 2012-2020 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -36,7 +36,7 @@ extern "C" { * @{ */ -/** \brief Get the CPU set of logical processors that are physically +/** \brief Get the CPU set of processors that are physically * close to NVML device \p device. * * Return the CPU set describing the locality of the NVML device \p device. diff --git a/src/3rdparty/hwloc/include/hwloc/opencl.h b/src/3rdparty/hwloc/include/hwloc/opencl.h index 99dfb0c8..d498d606 100644 --- a/src/3rdparty/hwloc/include/hwloc/opencl.h +++ b/src/3rdparty/hwloc/include/hwloc/opencl.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2019 Inria. All rights reserved. + * Copyright © 2012-2020 Inria. All rights reserved. * Copyright © 2013, 2018 Université Bordeaux. All right reserved. * See COPYING in top-level directory. */ @@ -109,7 +109,7 @@ hwloc_opencl_get_device_pci_busid(cl_device_id device, return -1; } -/** \brief Get the CPU set of logical processors that are physically +/** \brief Get the CPU set of processors that are physically * close to OpenCL device \p device. * * Return the CPU set describing the locality of the OpenCL device \p device. diff --git a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h index d247a8b1..bbf25d0f 100644 --- a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h +++ b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2016 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -41,7 +41,7 @@ extern "C" { * @{ */ -/** \brief Get the CPU set of logical processors that are physically +/** \brief Get the CPU set of processors that are physically * close to device \p ibdev. * * Return the CPU set describing the locality of the OpenFabrics diff --git a/src/3rdparty/hwloc/include/hwloc/plugins.h b/src/3rdparty/hwloc/include/hwloc/plugins.h index 88faf538..06e1c3e9 100644 --- a/src/3rdparty/hwloc/include/hwloc/plugins.h +++ b/src/3rdparty/hwloc/include/hwloc/plugins.h @@ -313,7 +313,13 @@ struct hwloc_component { * @{ */ +/** \brief Check whether insertion errors are hidden */ +HWLOC_DECLSPEC int hwloc_hide_errors(void); + /** \brief Add an object to the topology. + * + * Insert new object \p obj in the topology starting under existing object \p root + * (if \c NULL, the topology root object is used). * * It is sorted along the tree of other objects according to the inclusion of * cpusets, to eventually be added as a child of the smallest object including @@ -327,32 +333,20 @@ struct hwloc_component { * * This shall only be called before levels are built. * - * In case of error, hwloc_report_os_error() is called. - * * The caller should check whether the object type is filtered-out before calling this function. * * The topology cpuset/nodesets will be enlarged to include the object sets. * + * \p reason is a unique string identifying where and why this insertion call was performed + * (it will be displayed in case of internal insertion error). + * * Returns the object on success. * Returns NULL and frees obj on error. * Returns another object and frees obj if it was merged with an identical pre-existing object. */ -HWLOC_DECLSPEC struct hwloc_obj *hwloc_insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj); - -/** \brief Type of error callbacks during object insertion */ -typedef void (*hwloc_report_error_t)(const char * msg, int line); -/** \brief Report an insertion error from a backend */ -HWLOC_DECLSPEC void hwloc_report_os_error(const char * msg, int line); -/** \brief Check whether insertion errors are hidden */ -HWLOC_DECLSPEC int hwloc_hide_errors(void); - -/** \brief Add an object to the topology and specify which error callback to use. - * - * This function is similar to hwloc_insert_object_by_cpuset() but it allows specifying - * where to start insertion from (if \p root is NULL, the topology root object is used), - * and specifying the error callback. - */ -HWLOC_DECLSPEC struct hwloc_obj *hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root, hwloc_obj_t obj, hwloc_report_error_t report_error); +HWLOC_DECLSPEC hwloc_obj_t +hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root, + hwloc_obj_t obj, const char *reason); /** \brief Insert an object somewhere in the topology. * diff --git a/src/3rdparty/hwloc/include/hwloc/rename.h b/src/3rdparty/hwloc/include/hwloc/rename.h index 224e2577..c2a30485 100644 --- a/src/3rdparty/hwloc/include/hwloc/rename.h +++ b/src/3rdparty/hwloc/include/hwloc/rename.h @@ -1,6 +1,6 @@ /* * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright © 2010-2019 Inria. All rights reserved. + * Copyright © 2010-2020 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -119,6 +119,7 @@ extern "C" { #define HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WITH_DISALLOWED) #define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM) #define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) +#define HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IMPORT_SUPPORT) #define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid) #define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic) @@ -134,6 +135,7 @@ extern "C" { #define hwloc_topology_discovery_support HWLOC_NAME(topology_discovery_support) #define hwloc_topology_cpubind_support HWLOC_NAME(topology_cpubind_support) #define hwloc_topology_membind_support HWLOC_NAME(topology_membind_support) +#define hwloc_topology_misc_support HWLOC_NAME(topology_misc_support) #define hwloc_topology_support HWLOC_NAME(topology_support) #define hwloc_topology_get_support HWLOC_NAME(topology_get_support) @@ -170,6 +172,7 @@ extern "C" { #define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object) #define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object) #define hwloc_obj_add_other_obj_sets HWLOC_NAME(obj_add_other_obj_sets) +#define hwloc_topology_refresh HWLOC_NAME(topology_refresh) #define hwloc_topology_get_depth HWLOC_NAME(topology_get_depth) #define hwloc_get_type_depth HWLOC_NAME(get_type_depth) @@ -367,6 +370,51 @@ extern "C" { #define hwloc_cpuset_to_nodeset HWLOC_NAME(cpuset_to_nodeset) #define hwloc_cpuset_from_nodeset HWLOC_NAME(cpuset_from_nodeset) +/* memattrs.h */ + +#define hwloc_memattr_id_e HWLOC_NAME(memattr_id_e) +#define HWLOC_MEMATTR_ID_CAPACITY HWLOC_NAME_CAPS(MEMATTR_ID_CAPACITY) +#define HWLOC_MEMATTR_ID_LOCALITY HWLOC_NAME_CAPS(MEMATTR_ID_LOCALITY) +#define HWLOC_MEMATTR_ID_BANDWIDTH HWLOC_NAME_CAPS(MEMATTR_ID_BANDWIDTH) +#define HWLOC_MEMATTR_ID_LATENCY HWLOC_NAME_CAPS(MEMATTR_ID_LATENCY) + +#define hwloc_memattr_id_t HWLOC_NAME(memattr_id_t) +#define hwloc_memattr_get_by_name HWLOC_NAME(memattr_get_by_name) + +#define hwloc_location HWLOC_NAME(location) +#define hwloc_location_type_e HWLOC_NAME(location_type_e) +#define HWLOC_LOCATION_TYPE_OBJECT HWLOC_NAME_CAPS(LOCATION_TYPE_OBJECT) +#define HWLOC_LOCATION_TYPE_CPUSET HWLOC_NAME_CAPS(LOCATION_TYPE_CPUSET) +#define hwloc_location_u HWLOC_NAME(location_u) + +#define hwloc_memattr_get_value HWLOC_NAME(memattr_get_value) +#define hwloc_memattr_get_best_target HWLOC_NAME(memattr_get_best_target) +#define hwloc_memattr_get_best_initiator HWLOC_NAME(memattr_get_best_initiator) + +#define hwloc_local_numanode_flag_e HWLOC_NAME(local_numanode_flag_e) +#define HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_LARGER_LOCALITY) +#define HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY) +#define HWLOC_LOCAL_NUMANODE_FLAG_ALL HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_ALL) +#define hwloc_get_local_numanode_objs HWLOC_NAME(get_local_numanode_objs) + +#define hwloc_memattr_get_name HWLOC_NAME(memattr_get_name) +#define hwloc_memattr_get_flags HWLOC_NAME(memattr_get_flags) +#define hwloc_memattr_flag_e HWLOC_NAME(memattr_flag_e) +#define HWLOC_MEMATTR_FLAG_HIGHER_FIRST HWLOC_NAME_CAPS(MEMATTR_FLAG_HIGHER_FIRST) +#define HWLOC_MEMATTR_FLAG_LOWER_FIRST HWLOC_NAME_CAPS(MEMATTR_FLAG_LOWER_FIRST) +#define HWLOC_MEMATTR_FLAG_NEED_INITIATOR HWLOC_NAME_CAPS(MEMATTR_FLAG_NEED_INITIATOR) +#define hwloc_memattr_register HWLOC_NAME(memattr_register) +#define hwloc_memattr_set_value HWLOC_NAME(memattr_set_value) +#define hwloc_memattr_get_targets HWLOC_NAME(memattr_get_targets) +#define hwloc_memattr_get_initiators HWLOC_NAME(memattr_get_initiators) + +/* cpukinds.h */ + +#define hwloc_cpukinds_get_nr HWLOC_NAME(cpukinds_get_nr) +#define hwloc_cpukinds_get_by_cpuset HWLOC_NAME(cpukinds_get_by_cpuset) +#define hwloc_cpukinds_get_info HWLOC_NAME(cpukinds_get_info) +#define hwloc_cpukinds_register HWLOC_NAME(cpukinds_register) + /* export.h */ #define hwloc_topology_export_xml_flags_e HWLOC_NAME(topology_export_xml_flags_e) @@ -510,6 +558,12 @@ extern "C" { #define hwloc_nvml_get_device_osdev HWLOC_NAME(nvml_get_device_osdev) #define hwloc_nvml_get_device_osdev_by_index HWLOC_NAME(nvml_get_device_osdev_by_index) +/* rsmi.h */ + +#define hwloc_rsmi_get_device_cpuset HWLOC_NAME(rsmi_get_device_cpuset) +#define hwloc_rsmi_get_device_osdev HWLOC_NAME(rsmi_get_device_osdev) +#define hwloc_rsmi_get_device_osdev_by_index HWLOC_NAME(rsmi_get_device_osdev_by_index) + /* gl.h */ #define hwloc_gl_get_display_osdev_by_port_device HWLOC_NAME(gl_get_display_osdev_by_port_device) @@ -547,9 +601,6 @@ extern "C" { #define hwloc_plugin_check_namespace HWLOC_NAME(plugin_check_namespace) -#define hwloc_insert_object_by_cpuset HWLOC_NAME(insert_object_by_cpuset) -#define hwloc_report_error_t HWLOC_NAME(report_error_t) -#define hwloc_report_os_error HWLOC_NAME(report_os_error) #define hwloc_hide_errors HWLOC_NAME(hide_errors) #define hwloc__insert_object_by_cpuset HWLOC_NAME(_insert_object_by_cpuset) #define hwloc_insert_object_by_parent HWLOC_NAME(insert_object_by_parent) @@ -683,6 +734,7 @@ extern "C" { #define hwloc_cuda_component HWLOC_NAME(cuda_component) #define hwloc_gl_component HWLOC_NAME(gl_component) #define hwloc_nvml_component HWLOC_NAME(nvml_component) +#define hwloc_rsmi_component HWLOC_NAME(rsmi_component) #define hwloc_opencl_component HWLOC_NAME(opencl_component) #define hwloc_pci_component HWLOC_NAME(pci_component) @@ -691,6 +743,8 @@ extern "C" { /* private/private.h */ +#define hwloc_internal_location_s HWLOC_NAME(internal_location_s) + #define hwloc_special_level_s HWLOC_NAME(special_level_s) #define hwloc_pci_forced_locality_s HWLOC_NAME(pci_forced_locality_s) @@ -713,6 +767,8 @@ extern "C" { #define hwloc__attach_memory_object HWLOC_NAME(insert_memory_object) +#define hwloc_get_obj_by_type_and_gp_index HWLOC_NAME(get_obj_by_type_and_gp_index) + #define hwloc_pci_discovery_init HWLOC_NAME(pci_discovery_init) #define hwloc_pci_discovery_prepare HWLOC_NAME(pci_discovery_prepare) #define hwloc_pci_discovery_exit HWLOC_NAME(pci_discovery_exit) @@ -723,6 +779,7 @@ extern "C" { #define hwloc__add_info_nodup HWLOC_NAME(_add_info_nodup) #define hwloc__move_infos HWLOC_NAME(_move_infos) #define hwloc__free_infos HWLOC_NAME(_free_infos) +#define hwloc__tma_dup_infos HWLOC_NAME(_tma_dup_infos) #define hwloc_binding_hooks HWLOC_NAME(binding_hooks) #define hwloc_set_native_binding_hooks HWLOC_NAME(set_native_binding_hooks) @@ -764,6 +821,24 @@ extern "C" { #define hwloc_internal_distances_add_by_index HWLOC_NAME(internal_distances_add_by_index) #define hwloc_internal_distances_invalidate_cached_objs HWLOC_NAME(hwloc_internal_distances_invalidate_cached_objs) +#define hwloc_internal_memattr_s HWLOC_NAME(internal_memattr_s) +#define hwloc_internal_memattr_target_s HWLOC_NAME(internal_memattr_target_s) +#define hwloc_internal_memattr_initiator_s HWLOC_NAME(internal_memattr_initiator_s) +#define hwloc_internal_memattrs_init HWLOC_NAME(internal_memattrs_init) +#define hwloc_internal_memattrs_prepare HWLOC_NAME(internal_memattrs_prepare) +#define hwloc_internal_memattrs_dup HWLOC_NAME(internal_memattrs_dup) +#define hwloc_internal_memattrs_destroy HWLOC_NAME(internal_memattrs_destroy) +#define hwloc_internal_memattrs_need_refresh HWLOC_NAME(internal_memattrs_need_refresh) +#define hwloc_internal_memattrs_refresh HWLOC_NAME(internal_memattrs_refresh) + +#define hwloc_internal_cpukind_s HWLOC_NAME(internal_cpukind_s) +#define hwloc_internal_cpukinds_init HWLOC_NAME(internal_cpukinds_init) +#define hwloc_internal_cpukinds_destroy HWLOC_NAME(internal_cpukinds_destroy) +#define hwloc_internal_cpukinds_dup HWLOC_NAME(internal_cpukinds_dup) +#define hwloc_internal_cpukinds_register HWLOC_NAME(internal_cpukinds_register) +#define hwloc_internal_cpukinds_rank HWLOC_NAME(internal_cpukinds_rank) +#define hwloc_internal_cpukinds_restrict HWLOC_NAME(internal_cpukinds_restrict) + #define hwloc_encode_to_base64 HWLOC_NAME(encode_to_base64) #define hwloc_decode_from_base64 HWLOC_NAME(decode_from_base64) diff --git a/src/3rdparty/hwloc/include/private/autogen/config.h b/src/3rdparty/hwloc/include/private/autogen/config.h index a97bdfea..687e82bc 100644 --- a/src/3rdparty/hwloc/include/private/autogen/config.h +++ b/src/3rdparty/hwloc/include/private/autogen/config.h @@ -1,8 +1,8 @@ /* * Copyright © 2009, 2011, 2012 CNRS. All rights reserved. - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009, 2011, 2012, 2015 Université Bordeaux. All rights reserved. - * Copyright © 2009 Cisco Systems, Inc. All rights reserved. + * Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -575,7 +575,7 @@ #define PACKAGE "hwloc" /* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "http://www.open-mpi.org/projects/hwloc/" +#define PACKAGE_BUGREPORT "https://www.open-mpi.org/projects/hwloc/" /* Define to the full name of this package. */ #define PACKAGE_NAME "hwloc" @@ -668,5 +668,9 @@ /* Define this to the thread ID type */ #define hwloc_thread_t HANDLE +/* Define to 1 if you have the declaration of `GetModuleFileName', and to 0 if + you don't. */ +#define HAVE_DECL_GETMODULEFILENAME 1 + #endif /* HWLOC_CONFIGURE_H */ diff --git a/src/3rdparty/hwloc/include/private/debug.h b/src/3rdparty/hwloc/include/private/debug.h index 637e0141..29dca70b 100644 --- a/src/3rdparty/hwloc/include/private/debug.h +++ b/src/3rdparty/hwloc/include/private/debug.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2017 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009, 2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -19,6 +19,10 @@ #include #endif +#ifdef ANDROID +extern void JNIDebug(char *text); +#endif + /* Compile-time assertion */ #define HWLOC_BUILD_ASSERT(condition) ((void)sizeof(char[1 - 2*!(condition)])) @@ -44,9 +48,17 @@ static __hwloc_inline void hwloc_debug(const char *s __hwloc_attribute_unused, . { #ifdef HWLOC_DEBUG if (hwloc_debug_enabled()) { +#ifdef ANDROID + char buffer[256]; +#endif va_list ap; va_start(ap, s); +#ifdef ANDROID + vsprintf(buffer, s, ap); + JNIDebug(buffer); +#else vfprintf(stderr, s, ap); +#endif va_end(ap); } #endif @@ -57,21 +69,21 @@ static __hwloc_inline void hwloc_debug(const char *s __hwloc_attribute_unused, . if (hwloc_debug_enabled()) { \ char *s; \ hwloc_bitmap_asprintf(&s, bitmap); \ - fprintf(stderr, fmt, s); \ + hwloc_debug(fmt, s); \ free(s); \ } } while (0) #define hwloc_debug_1arg_bitmap(fmt, arg1, bitmap) do { \ if (hwloc_debug_enabled()) { \ char *s; \ hwloc_bitmap_asprintf(&s, bitmap); \ - fprintf(stderr, fmt, arg1, s); \ + hwloc_debug(fmt, arg1, s); \ free(s); \ } } while (0) #define hwloc_debug_2args_bitmap(fmt, arg1, arg2, bitmap) do { \ if (hwloc_debug_enabled()) { \ char *s; \ hwloc_bitmap_asprintf(&s, bitmap); \ - fprintf(stderr, fmt, arg1, arg2, s); \ + hwloc_debug(fmt, arg1, arg2, s); \ free(s); \ } } while (0) #else diff --git a/src/3rdparty/hwloc/include/private/internal-components.h b/src/3rdparty/hwloc/include/private/internal-components.h index d3c89783..0b82a45c 100644 --- a/src/3rdparty/hwloc/include/private/internal-components.h +++ b/src/3rdparty/hwloc/include/private/internal-components.h @@ -30,6 +30,7 @@ HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_rsmi_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component; diff --git a/src/3rdparty/hwloc/include/private/private.h b/src/3rdparty/hwloc/include/private/private.h index 84d95bb3..e0782659 100644 --- a/src/3rdparty/hwloc/include/private/private.h +++ b/src/3rdparty/hwloc/include/private/private.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * @@ -40,7 +40,19 @@ #endif #include -#define HWLOC_TOPOLOGY_ABI 0x20100 /* version of the layout of struct topology */ +#define HWLOC_TOPOLOGY_ABI 0x20400 /* version of the layout of struct topology */ + +struct hwloc_internal_location_s { + enum hwloc_location_type_e type; + union { + struct { + hwloc_obj_t obj; /* cached between refreshes */ + uint64_t gp_index; + hwloc_obj_type_t type; + } object; /* if type == HWLOC_LOCATION_TYPE_OBJECT */ + hwloc_cpuset_t cpuset; /* if type == HWLOC_LOCATION_TYPE_CPUSET */ + } location; +}; /***************************************************** * WARNING: @@ -163,6 +175,50 @@ struct hwloc_topology { } *first_dist, *last_dist; unsigned next_dist_id; + /* memory attributes */ + unsigned nr_memattrs; + struct hwloc_internal_memattr_s { + /* memattr info */ + char *name; /* TODO unit is implicit, in the documentation of standard attributes, or in the name? */ + unsigned long flags; +#define HWLOC_IMATTR_FLAG_STATIC_NAME (1U<<0) /* no need to free name */ +#define HWLOC_IMATTR_FLAG_CACHE_VALID (1U<<1) /* target and initiator are valid */ +#define HWLOC_IMATTR_FLAG_CONVENIENCE (1U<<2) /* convenience attribute reporting values from non-memattr attributes (R/O and no actual targets stored) */ + unsigned iflags; + + /* array of values */ + unsigned nr_targets; + struct hwloc_internal_memattr_target_s { + /* target object */ + hwloc_obj_t obj; /* cached between refreshes */ + hwloc_obj_type_t type; + unsigned os_index; /* only used temporarily during discovery when there's no obj/gp_index yet */ + hwloc_uint64_t gp_index; + + /* value if there are no initiator for this attr */ + hwloc_uint64_t noinitiator_value; + /* initiators otherwise */ + unsigned nr_initiators; + struct hwloc_internal_memattr_initiator_s { + struct hwloc_internal_location_s initiator; + hwloc_uint64_t value; + } *initiators; + } *targets; + } *memattrs; + + /* hybridcpus */ + unsigned nr_cpukinds; + unsigned nr_cpukinds_allocated; + struct hwloc_internal_cpukind_s { + hwloc_cpuset_t cpuset; +#define HWLOC_CPUKIND_EFFICIENCY_UNKNOWN -1 + int efficiency; + int forced_efficiency; /* returned by the hardware or OS if any */ + hwloc_uint64_t ranking_value; /* internal value for ranking */ + unsigned nr_infos; + struct hwloc_info_s *infos; + } *cpukinds; + int grouping; int grouping_verbose; unsigned grouping_nbaccuracies; @@ -240,8 +296,9 @@ extern void hwloc_topology_clear(struct hwloc_topology *topology); /* insert memory object as memory child of normal parent */ extern struct hwloc_obj * hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, - hwloc_obj_t obj, - hwloc_report_error_t report_error); + hwloc_obj_t obj, const char *reason); + +extern hwloc_obj_t hwloc_get_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index); extern void hwloc_pci_discovery_init(struct hwloc_topology *topology); extern void hwloc_pci_discovery_prepare(struct hwloc_topology *topology); @@ -261,6 +318,7 @@ extern hwloc_obj_t hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_t extern int hwloc__add_info(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value); extern int hwloc__add_info_nodup(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value, int replace); extern int hwloc__move_infos(struct hwloc_info_s **dst_infosp, unsigned *dst_countp, struct hwloc_info_s **src_infosp, unsigned *src_countp); +extern int hwloc__tma_dup_infos(struct hwloc_tma *tma, struct hwloc_info_s **dst_infosp, unsigned *dst_countp, struct hwloc_info_s *src_infos, unsigned src_count); extern void hwloc__free_infos(struct hwloc_info_s *infos, unsigned count); /* set native OS binding hooks */ @@ -354,6 +412,22 @@ extern int hwloc_internal_distances_add(hwloc_topology_t topology, const char *n extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags); extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology); +extern void hwloc_internal_memattrs_init(hwloc_topology_t topology); +extern void hwloc_internal_memattrs_prepare(hwloc_topology_t topology); +extern void hwloc_internal_memattrs_destroy(hwloc_topology_t topology); +extern void hwloc_internal_memattrs_need_refresh(hwloc_topology_t topology); +extern void hwloc_internal_memattrs_refresh(hwloc_topology_t topology); +extern int hwloc_internal_memattrs_dup(hwloc_topology_t new, hwloc_topology_t old); +extern int hwloc_internal_memattr_set_value(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_type_t target_type, hwloc_uint64_t target_gp_index, unsigned target_os_index, struct hwloc_internal_location_s *initiator, hwloc_uint64_t value); + +extern void hwloc_internal_cpukinds_init(hwloc_topology_t topology); +extern int hwloc_internal_cpukinds_rank(hwloc_topology_t topology); +extern void hwloc_internal_cpukinds_destroy(hwloc_topology_t topology); +extern int hwloc_internal_cpukinds_dup(hwloc_topology_t new, hwloc_topology_t old); +#define HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY (1<<0) +extern int hwloc_internal_cpukinds_register(hwloc_topology_t topology, hwloc_cpuset_t cpuset, int forced_efficiency, const struct hwloc_info_s *infos, unsigned nr_infos, unsigned long flags); +extern void hwloc_internal_cpukinds_restrict(hwloc_topology_t topology); + /* encode src buffer into target buffer. * targsize must be at least 4*((srclength+2)/3)+1. * target will be 0-terminated. diff --git a/src/3rdparty/hwloc/include/private/xml.h b/src/3rdparty/hwloc/include/private/xml.h index f59fca1f..3af5ba1e 100644 --- a/src/3rdparty/hwloc/include/private/xml.h +++ b/src/3rdparty/hwloc/include/private/xml.h @@ -46,7 +46,7 @@ struct hwloc_xml_backend_data_s { int (*find_child)(struct hwloc__xml_import_state_s * state, struct hwloc__xml_import_state_s * childstate, char **tagp); int (*close_tag)(struct hwloc__xml_import_state_s * state); /* look for an explicit closing tag */ void (*close_child)(struct hwloc__xml_import_state_s * state); - int (*get_content)(struct hwloc__xml_import_state_s * state, char **beginp, size_t expected_length); /* return 0 on empty content (and sets beginp to empty string), 1 on actual content, -1 on error or unexpected content length */ + int (*get_content)(struct hwloc__xml_import_state_s * state, const char **beginp, size_t expected_length); /* return 0 on empty content (and sets beginp to empty string), 1 on actual content, -1 on error or unexpected content length */ void (*close_content)(struct hwloc__xml_import_state_s * state); char * msgprefix; void *data; /* libxml2 doc, or nolibxml buffer */ diff --git a/src/3rdparty/hwloc/src/bind.c b/src/3rdparty/hwloc/src/bind.c index 0bd85e25..2b5d0994 100644 --- a/src/3rdparty/hwloc/src/bind.c +++ b/src/3rdparty/hwloc/src/bind.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2010, 2012 Université Bordeaux * Copyright © 2011-2015 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -921,5 +921,6 @@ hwloc_set_binding_hooks(struct hwloc_topology *topology) DO(mem,get_area_membind); DO(mem,get_area_memlocation); DO(mem,alloc_membind); +#undef DO } } diff --git a/src/3rdparty/hwloc/src/bitmap.c b/src/3rdparty/hwloc/src/bitmap.c index 4791a694..cf071edb 100644 --- a/src/3rdparty/hwloc/src/bitmap.c +++ b/src/3rdparty/hwloc/src/bitmap.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -818,7 +818,7 @@ int hwloc_bitmap_nr_ulongs(const struct hwloc_bitmap_s *set) return -1; last = hwloc_bitmap_last(set); - return (last + HWLOC_BITS_PER_LONG-1)/HWLOC_BITS_PER_LONG; + return (last + HWLOC_BITS_PER_LONG)/HWLOC_BITS_PER_LONG; } int hwloc_bitmap_only(struct hwloc_bitmap_s * set, unsigned cpu) diff --git a/src/3rdparty/hwloc/src/cpukinds.c b/src/3rdparty/hwloc/src/cpukinds.c new file mode 100644 index 00000000..5f2dd1aa --- /dev/null +++ b/src/3rdparty/hwloc/src/cpukinds.c @@ -0,0 +1,649 @@ +/* + * Copyright © 2020 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" +#include "private/debug.h" + + +/***************** + * Basics + */ + +void +hwloc_internal_cpukinds_init(struct hwloc_topology *topology) +{ + topology->cpukinds = NULL; + topology->nr_cpukinds = 0; + topology->nr_cpukinds_allocated = 0; +} + +void +hwloc_internal_cpukinds_destroy(struct hwloc_topology *topology) +{ + unsigned i; + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + hwloc_bitmap_free(kind->cpuset); + hwloc__free_infos(kind->infos, kind->nr_infos); + } + free(topology->cpukinds); + topology->cpukinds = NULL; + topology->nr_cpukinds = 0; +} + +int +hwloc_internal_cpukinds_dup(hwloc_topology_t new, hwloc_topology_t old) +{ + struct hwloc_tma *tma = new->tma; + struct hwloc_internal_cpukind_s *kinds; + unsigned i; + + kinds = hwloc_tma_malloc(tma, old->nr_cpukinds * sizeof(*kinds)); + if (!kinds) + return -1; + new->cpukinds = kinds; + new->nr_cpukinds = old->nr_cpukinds; + memcpy(kinds, old->cpukinds, old->nr_cpukinds * sizeof(*kinds)); + + for(i=0;inr_cpukinds; i++) { + kinds[i].cpuset = hwloc_bitmap_tma_dup(tma, old->cpukinds[i].cpuset); + if (!kinds[i].cpuset) { + new->nr_cpukinds = i; + goto failed; + } + if (hwloc__tma_dup_infos(tma, + &kinds[i].infos, &kinds[i].nr_infos, + old->cpukinds[i].infos, old->cpukinds[i].nr_infos) < 0) { + assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ + hwloc_bitmap_free(kinds[i].cpuset); + new->nr_cpukinds = i; + goto failed; + } + } + + return 0; + + failed: + hwloc_internal_cpukinds_destroy(new); + return -1; +} + +void +hwloc_internal_cpukinds_restrict(hwloc_topology_t topology) +{ + unsigned i; + int removed = 0; + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + hwloc_bitmap_and(kind->cpuset, kind->cpuset, hwloc_get_root_obj(topology)->cpuset); + if (hwloc_bitmap_iszero(kind->cpuset)) { + hwloc_bitmap_free(kind->cpuset); + hwloc__free_infos(kind->infos, kind->nr_infos); + memmove(kind, kind+1, (topology->nr_cpukinds - i - 1)*sizeof(*kind)); + i--; + topology->nr_cpukinds--; + removed = 1; + } + } + if (removed) + hwloc_internal_cpukinds_rank(topology); +} + + +/******************** + * Registering + */ + +static __hwloc_inline int +hwloc__cpukind_check_duplicate_info(struct hwloc_internal_cpukind_s *kind, + const char *name, const char *value) +{ + unsigned i; + for(i=0; inr_infos; i++) + if (!strcmp(kind->infos[i].name, name) + && !strcmp(kind->infos[i].value, value)) + return 1; + return 0; +} + +static __hwloc_inline void +hwloc__cpukind_add_infos(struct hwloc_internal_cpukind_s *kind, + const struct hwloc_info_s *infos, unsigned nr_infos) +{ + unsigned i; + for(i=0; iinfos, &kind->nr_infos, infos[i].name, infos[i].value); + } +} + +int +hwloc_internal_cpukinds_register(hwloc_topology_t topology, hwloc_cpuset_t cpuset, + int forced_efficiency, + const struct hwloc_info_s *infos, unsigned nr_infos, + unsigned long flags) +{ + struct hwloc_internal_cpukind_s *kinds; + unsigned i, max, bits, oldnr, newnr; + + if (hwloc_bitmap_iszero(cpuset)) { + hwloc_bitmap_free(cpuset); + errno = EINVAL; + return -1; + } + + if (flags & ~HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY) { + errno = EINVAL; + return -1; + } + + /* TODO: for now, only windows provides a forced efficiency. + * if another backend ever provides a conflicting value, the first backend value will be kept. + * (user-provided values are not an issue, they are meant to overwrite) + */ + + /* If we have N kinds currently, we may need 2N+1 kinds after inserting the new one: + * - each existing kind may get split into which PUs are in the new kind and which aren't. + * - some PUs might not have been in any kind yet. + */ + max = 2 * topology->nr_cpukinds + 1; + /* Allocate the power-of-two above 2N+1. */ + bits = hwloc_flsl(max-1) + 1; + max = 1U<cpukinds; + if (max > topology->nr_cpukinds_allocated) { + kinds = realloc(kinds, max * sizeof(*kinds)); + if (!kinds) { + hwloc_bitmap_free(cpuset); + return -1; + } + memset(&kinds[topology->nr_cpukinds_allocated], 0, (max - topology->nr_cpukinds_allocated) * sizeof(*kinds)); + topology->nr_cpukinds_allocated = max; + topology->cpukinds = kinds; + } + + newnr = oldnr = topology->nr_cpukinds; + for(i=0; inr_cpukinds = newnr; + return 0; +} + +int +hwloc_cpukinds_register(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, + int forced_efficiency, + unsigned nr_infos, struct hwloc_info_s *infos, + unsigned long flags) +{ + hwloc_bitmap_t cpuset; + int err; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (!_cpuset || hwloc_bitmap_iszero(_cpuset)) { + errno = EINVAL; + return -1; + } + + cpuset = hwloc_bitmap_dup(_cpuset); + if (!cpuset) + return -1; + + if (forced_efficiency < 0) + forced_efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN; + + err = hwloc_internal_cpukinds_register(topology, cpuset, forced_efficiency, infos, nr_infos, HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY); + if (err < 0) + return err; + + hwloc_internal_cpukinds_rank(topology); + return 0; +} + + +/********************* + * Ranking + */ + +static int +hwloc__cpukinds_check_duplicate_rankings(struct hwloc_topology *topology) +{ + unsigned i,j; + for(i=0; inr_cpukinds; i++) + for(j=i+1; jnr_cpukinds; j++) + if (topology->cpukinds[i].forced_efficiency == topology->cpukinds[j].forced_efficiency) + /* if any duplicate, fail */ + return -1; + return 0; +} + +static int +hwloc__cpukinds_try_rank_by_forced_efficiency(struct hwloc_topology *topology) +{ + unsigned i; + + hwloc_debug("Trying to rank cpukinds by forced efficiency...\n"); + for(i=0; inr_cpukinds; i++) { + if (topology->cpukinds[i].forced_efficiency == HWLOC_CPUKIND_EFFICIENCY_UNKNOWN) + /* if any unknown, fail */ + return -1; + topology->cpukinds[i].ranking_value = topology->cpukinds[i].forced_efficiency; + } + + return hwloc__cpukinds_check_duplicate_rankings(topology); +} + +struct hwloc_cpukinds_info_summary { + int have_max_freq; + int have_base_freq; + int have_intel_core_type; + struct hwloc_cpukind_info_summary { + unsigned intel_core_type; /* 1 for atom, 2 for core */ + unsigned max_freq, base_freq; /* MHz, hence < 100000 */ + } * summaries; +}; + +static void +hwloc__cpukinds_summarize_info(struct hwloc_topology *topology, + struct hwloc_cpukinds_info_summary *summary) +{ + unsigned i, j; + + summary->have_max_freq = 1; + summary->have_base_freq = 1; + summary->have_intel_core_type = 1; + + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + for(j=0; jnr_infos; j++) { + struct hwloc_info_s *info = &kind->infos[j]; + if (!strcmp(info->name, "FrequencyMaxMHz")) { + summary->summaries[i].max_freq = atoi(info->value); + } else if (!strcmp(info->name, "FrequencyBaseMHz")) { + summary->summaries[i].base_freq = atoi(info->value); + } else if (!strcmp(info->name, "CoreType")) { + if (!strcmp(info->value, "IntelAtom")) + summary->summaries[i].intel_core_type = 1; + else if (!strcmp(info->value, "IntelCore")) + summary->summaries[i].intel_core_type = 2; + } + } + hwloc_debug("cpukind #%u has intel_core_type %u max_freq %u base_freq %u\n", + i, summary->summaries[i].intel_core_type, + summary->summaries[i].max_freq, summary->summaries[i].base_freq); + if (!summary->summaries[i].base_freq) + summary->have_base_freq = 0; + if (!summary->summaries[i].max_freq) + summary->have_max_freq = 0; + if (!summary->summaries[i].intel_core_type) + summary->have_intel_core_type = 0; + } +} + +enum hwloc_cpukinds_ranking { + HWLOC_CPUKINDS_RANKING_DEFAULT, /* forced + frequency on ARM, forced + coretype_frequency otherwise */ + HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY, /* default without forced */ + HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY, + HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY, + HWLOC_CPUKINDS_RANKING_CORETYPE, + HWLOC_CPUKINDS_RANKING_FREQUENCY, + HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX, + HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE, + HWLOC_CPUKINDS_RANKING_NONE +}; + +static int +hwloc__cpukinds_try_rank_by_info(struct hwloc_topology *topology, + enum hwloc_cpukinds_ranking heuristics, + struct hwloc_cpukinds_info_summary *summary) +{ + unsigned i; + + if (HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY == heuristics) { + hwloc_debug("Trying to rank cpukinds by coretype+frequency...\n"); + /* we need intel_core_type + (base or max freq) for all kinds */ + if (!summary->have_intel_core_type + || (!summary->have_max_freq && !summary->have_base_freq)) + return -1; + /* rank first by coretype (Core>>Atom) then by frequency, base if available, max otherwise */ + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + if (summary->have_base_freq) + kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].base_freq; + else + kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].max_freq; + } + + } else if (HWLOC_CPUKINDS_RANKING_CORETYPE == heuristics) { + hwloc_debug("Trying to rank cpukinds by coretype...\n"); + /* we need intel_core_type */ + if (!summary->have_intel_core_type) + return -1; + /* rank by coretype (Core>>Atom) */ + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + kind->ranking_value = (summary->summaries[i].intel_core_type << 20); + } + + } else if (HWLOC_CPUKINDS_RANKING_FREQUENCY == heuristics) { + hwloc_debug("Trying to rank cpukinds by frequency...\n"); + /* we need base or max freq for all kinds */ + if (!summary->have_max_freq && !summary->have_base_freq) + return -1; + /* rank first by frequency, base if available, max otherwise */ + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + if (summary->have_base_freq) + kind->ranking_value = summary->summaries[i].base_freq; + else + kind->ranking_value = summary->summaries[i].max_freq; + } + + } else if (HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX == heuristics) { + hwloc_debug("Trying to rank cpukinds by frequency max...\n"); + /* we need max freq for all kinds */ + if (!summary->have_max_freq) + return -1; + /* rank first by frequency, base if available, max otherwise */ + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + kind->ranking_value = summary->summaries[i].max_freq; + } + + } else if (HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE == heuristics) { + hwloc_debug("Trying to rank cpukinds by frequency base...\n"); + /* we need max freq for all kinds */ + if (!summary->have_base_freq) + return -1; + /* rank first by frequency, base if available, max otherwise */ + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + kind->ranking_value = summary->summaries[i].base_freq; + } + + } else assert(0); + + return hwloc__cpukinds_check_duplicate_rankings(topology); +} + +static int hwloc__cpukinds_compare_ranking_values(const void *_a, const void *_b) +{ + const struct hwloc_internal_cpukind_s *a = _a; + const struct hwloc_internal_cpukind_s *b = _b; + return a->ranking_value - b->ranking_value; +} + +/* this function requires ranking values to be unique */ +static void +hwloc__cpukinds_finalize_ranking(struct hwloc_topology *topology) +{ + unsigned i; + /* sort */ + qsort(topology->cpukinds, topology->nr_cpukinds, sizeof(*topology->cpukinds), hwloc__cpukinds_compare_ranking_values); + /* define our own efficiency between 0 and N-1 */ + for(i=0; inr_cpukinds; i++) + topology->cpukinds[i].efficiency = i; +} + +int +hwloc_internal_cpukinds_rank(struct hwloc_topology *topology) +{ + enum hwloc_cpukinds_ranking heuristics; + char *env; + unsigned i; + int err; + + if (!topology->nr_cpukinds) + return 0; + + if (topology->nr_cpukinds == 1) { + topology->cpukinds[0].efficiency = 0; + return 0; + } + + heuristics = HWLOC_CPUKINDS_RANKING_DEFAULT; + env = getenv("HWLOC_CPUKINDS_RANKING"); + if (env) { + if (!strcmp(env, "default")) + heuristics = HWLOC_CPUKINDS_RANKING_DEFAULT; + else if (!strcmp(env, "none")) + heuristics = HWLOC_CPUKINDS_RANKING_NONE; + else if (!strcmp(env, "coretype+frequency")) + heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY; + else if (!strcmp(env, "coretype")) + heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE; + else if (!strcmp(env, "frequency")) + heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY; + else if (!strcmp(env, "frequency_max")) + heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX; + else if (!strcmp(env, "frequency_base")) + heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE; + else if (!strcmp(env, "forced_efficiency")) + heuristics = HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY; + else if (!strcmp(env, "no_forced_efficiency")) + heuristics = HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY; + else if (!hwloc_hide_errors()) + fprintf(stderr, "Failed to recognize HWLOC_CPUKINDS_RANKING value %s\n", env); + } + + if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT + || heuristics == HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY) { + /* default is forced_efficiency first */ + struct hwloc_cpukinds_info_summary summary; + enum hwloc_cpukinds_ranking subheuristics; + const char *arch; + + if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT) + hwloc_debug("Using default ranking strategy...\n"); + else + hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env); + + if (heuristics != HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY) { + err = hwloc__cpukinds_try_rank_by_forced_efficiency(topology); + if (!err) + goto ready; + } + + summary.summaries = calloc(topology->nr_cpukinds, sizeof(*summary.summaries)); + if (!summary.summaries) + goto failed; + hwloc__cpukinds_summarize_info(topology, &summary); + + arch = hwloc_obj_get_info_by_name(topology->levels[0][0], "Architecture"); + /* TODO: rather coretype_frequency only on x86/Intel? */ + if (arch && (!strncmp(arch, "arm", 3) || !strncmp(arch, "aarch", 5))) + /* then frequency on ARM */ + subheuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY; + else + /* or coretype+frequency otherwise */ + subheuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY; + + err = hwloc__cpukinds_try_rank_by_info(topology, subheuristics, &summary); + free(summary.summaries); + if (!err) + goto ready; + + } else if (heuristics == HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY) { + hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env); + + err = hwloc__cpukinds_try_rank_by_forced_efficiency(topology); + if (!err) + goto ready; + + } else if (heuristics != HWLOC_CPUKINDS_RANKING_NONE) { + /* custom heuristics */ + struct hwloc_cpukinds_info_summary summary; + + hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env); + + summary.summaries = calloc(topology->nr_cpukinds, sizeof(*summary.summaries)); + if (!summary.summaries) + goto failed; + hwloc__cpukinds_summarize_info(topology, &summary); + + err = hwloc__cpukinds_try_rank_by_info(topology, heuristics, &summary); + free(summary.summaries); + if (!err) + goto ready; + } + + failed: + /* failed to rank, clear efficiencies */ + for(i=0; inr_cpukinds; i++) + topology->cpukinds[i].efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN; + hwloc_debug("Failed to rank cpukinds.\n\n"); + return 0; + + ready: + for(i=0; inr_cpukinds; i++) + hwloc_debug("cpukind #%u got ranking value %llu\n", i, (unsigned long long) topology->cpukinds[i].ranking_value); + hwloc__cpukinds_finalize_ranking(topology); +#ifdef HWLOC_DEBUG + for(i=0; inr_cpukinds; i++) + assert(topology->cpukinds[i].efficiency == (int) i); +#endif + hwloc_debug("\n"); + return 0; +} + + +/***************** + * Consulting + */ + +int +hwloc_cpukinds_get_nr(hwloc_topology_t topology, unsigned long flags) +{ + if (flags) { + errno = EINVAL; + return -1; + } + + return topology->nr_cpukinds; +} + +int +hwloc_cpukinds_get_info(hwloc_topology_t topology, + unsigned id, + hwloc_bitmap_t cpuset, + int *efficiencyp, + unsigned *nr_infosp, struct hwloc_info_s **infosp, + unsigned long flags) +{ + struct hwloc_internal_cpukind_s *kind; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (id >= topology->nr_cpukinds) { + errno = ENOENT; + return -1; + } + + kind = &topology->cpukinds[id]; + + if (cpuset) + hwloc_bitmap_copy(cpuset, kind->cpuset); + + if (efficiencyp) + *efficiencyp = kind->efficiency; + + if (nr_infosp && infosp) { + *nr_infosp = kind->nr_infos; + *infosp = kind->infos; + } + return 0; +} + +int +hwloc_cpukinds_get_by_cpuset(hwloc_topology_t topology, + hwloc_const_bitmap_t cpuset, + unsigned long flags) +{ + unsigned id; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (!cpuset || hwloc_bitmap_iszero(cpuset)) { + errno = EINVAL; + return -1; + } + + for(id=0; idnr_cpukinds; id++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[id]; + int res = hwloc_bitmap_compare_inclusion(cpuset, kind->cpuset); + if (res == HWLOC_BITMAP_EQUAL || res == HWLOC_BITMAP_INCLUDED) { + return (int) id; + } else if (res == HWLOC_BITMAP_INTERSECTS || res == HWLOC_BITMAP_CONTAINS) { + errno = EXDEV; + return -1; + } + } + + errno = ENOENT; + return -1; +} diff --git a/src/3rdparty/hwloc/src/diff.c b/src/3rdparty/hwloc/src/diff.c index 7794358b..7449a858 100644 --- a/src/3rdparty/hwloc/src/diff.c +++ b/src/3rdparty/hwloc/src/diff.c @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2019 Inria. All rights reserved. + * Copyright © 2013-2020 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -333,10 +333,8 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1, if (!err) { if (SETS_DIFFERENT(allowed_cpuset, topo1, topo2) - || SETS_DIFFERENT(allowed_nodeset, topo1, topo2)) { - hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff); - err = 1; - } + || SETS_DIFFERENT(allowed_nodeset, topo1, topo2)) + goto roottoocomplex; } if (!err) { @@ -346,33 +344,78 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1, dist1 = topo1->first_dist; dist2 = topo2->first_dist; while (dist1 || dist2) { - if (!!dist1 != !!dist2) { - hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff); - err = 1; - break; - } + if (!!dist1 != !!dist2) + goto roottoocomplex; if (dist1->unique_type != dist2->unique_type || dist1->different_types || dist2->different_types /* too lazy to support this case */ || dist1->nbobjs != dist2->nbobjs || dist1->kind != dist2->kind - || memcmp(dist1->values, dist2->values, dist1->nbobjs * dist1->nbobjs * sizeof(*dist1->values))) { - hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff); - err = 1; - break; - } + || memcmp(dist1->values, dist2->values, dist1->nbobjs * dist1->nbobjs * sizeof(*dist1->values))) + goto roottoocomplex; for(i=0; inbobjs; i++) /* gp_index isn't enforced above. so compare logical_index instead, which is enforced. requires distances refresh() above */ - if (dist1->objs[i]->logical_index != dist2->objs[i]->logical_index) { - hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff); - err = 1; - break; - } + if (dist1->objs[i]->logical_index != dist2->objs[i]->logical_index) + goto roottoocomplex; dist1 = dist1->next; dist2 = dist2->next; } } + if (!err) { + /* memattrs */ + hwloc_internal_memattrs_refresh(topo1); + hwloc_internal_memattrs_refresh(topo2); + if (topo1->nr_memattrs != topo2->nr_memattrs) + goto roottoocomplex; + for(i=0; inr_memattrs; i++) { + struct hwloc_internal_memattr_s *imattr1 = &topo1->memattrs[i], *imattr2 = &topo2->memattrs[i]; + unsigned j; + if (strcmp(imattr1->name, imattr2->name) + || imattr1->flags != imattr2->flags + || imattr1->nr_targets != imattr2->nr_targets) + goto roottoocomplex; + if (i == HWLOC_MEMATTR_ID_CAPACITY + || i == HWLOC_MEMATTR_ID_LOCALITY) + /* no need to check virtual attributes, there were refreshed from other topology attributes, checked above */ + continue; + for(j=0; jnr_targets; j++) { + struct hwloc_internal_memattr_target_s *imtg1 = &imattr1->targets[j], *imtg2 = &imattr2->targets[j]; + if (imtg1->type != imtg2->type) + goto roottoocomplex; + if (imtg1->obj->logical_index != imtg2->obj->logical_index) + goto roottoocomplex; + if (imattr1->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + unsigned k; + for(k=0; knr_initiators; k++) { + struct hwloc_internal_memattr_initiator_s *imi1 = &imtg1->initiators[k], *imi2 = &imtg2->initiators[k]; + if (imi1->value != imi2->value + || imi1->initiator.type != imi2->initiator.type) + goto roottoocomplex; + if (imi1->initiator.type == HWLOC_LOCATION_TYPE_CPUSET) { + if (!hwloc_bitmap_isequal(imi1->initiator.location.cpuset, imi2->initiator.location.cpuset)) + goto roottoocomplex; + } else if (imi1->initiator.type == HWLOC_LOCATION_TYPE_OBJECT) { + if (imi1->initiator.location.object.type != imi2->initiator.location.object.type) + goto roottoocomplex; + if (imi1->initiator.location.object.obj->logical_index != imi2->initiator.location.object.obj->logical_index) + goto roottoocomplex; + } else { + assert(0); + } + } + } else { + if (imtg1->noinitiator_value != imtg2->noinitiator_value) + goto roottoocomplex; + } + } + } + } + return err; + + roottoocomplex: + hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff); + return 1; } /******************** diff --git a/src/3rdparty/hwloc/src/distances.c b/src/3rdparty/hwloc/src/distances.c index 4f2897a0..c4854956 100644 --- a/src/3rdparty/hwloc/src/distances.c +++ b/src/3rdparty/hwloc/src/distances.c @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2019 Inria. All rights reserved. + * Copyright © 2010-2020 Inria. All rights reserved. * Copyright © 2011-2012 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -526,36 +526,6 @@ int hwloc_distances_add(hwloc_topology_t topology, * Refresh objects in distances */ -static hwloc_obj_t hwloc_find_obj_by_depth_and_gp_index(hwloc_topology_t topology, unsigned depth, uint64_t gp_index) -{ - hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0); - while (obj) { - if (obj->gp_index == gp_index) - return obj; - obj = obj->next_cousin; - } - return NULL; -} - -static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index) -{ - int depth = hwloc_get_type_depth(topology, type); - if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) - return NULL; - if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) { - int topodepth = hwloc_topology_get_depth(topology); - for(depth=0; depthcpuset); res_obj = hwloc__insert_object_by_cpuset(topology, NULL, group_obj, - (kind & HWLOC_DISTANCES_KIND_FROM_USER) ? hwloc_report_user_distance_error : hwloc_report_os_error); + (kind & HWLOC_DISTANCES_KIND_FROM_USER) ? "distances:fromuser:group" : "distances:group"); /* res_obj may be NULL on failure to insert. */ if (!res_obj) failed++; diff --git a/src/3rdparty/hwloc/src/memattrs.c b/src/3rdparty/hwloc/src/memattrs.c new file mode 100644 index 00000000..16e9896e --- /dev/null +++ b/src/3rdparty/hwloc/src/memattrs.c @@ -0,0 +1,1197 @@ +/* + * Copyright © 2020 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#include "private/autogen/config.h" +#include "hwloc.h" +#include "private/private.h" + + +/***************************** + * Attributes + */ + +static __hwloc_inline +hwloc_uint64_t hwloc__memattr_get_convenience_value(hwloc_memattr_id_t id, + hwloc_obj_t node) +{ + if (id == HWLOC_MEMATTR_ID_CAPACITY) + return node->attr->numanode.local_memory; + else if (id == HWLOC_MEMATTR_ID_LOCALITY) + return hwloc_bitmap_weight(node->cpuset); + else + assert(0); + return 0; /* shut up the compiler */ +} + +void +hwloc_internal_memattrs_init(struct hwloc_topology *topology) +{ + topology->nr_memattrs = 0; + topology->memattrs = NULL; +} + +static void +hwloc__setup_memattr(struct hwloc_internal_memattr_s *imattr, + char *name, + unsigned long flags, + unsigned long iflags) +{ + imattr->name = name; + imattr->flags = flags; + imattr->iflags = iflags; + + imattr->nr_targets = 0; + imattr->targets = NULL; +} + +void +hwloc_internal_memattrs_prepare(struct hwloc_topology *topology) +{ +#define NR_DEFAULT_MEMATTRS 4 + topology->memattrs = malloc(NR_DEFAULT_MEMATTRS * sizeof(*topology->memattrs)); + if (!topology->memattrs) + return; + + assert(HWLOC_MEMATTR_ID_CAPACITY < NR_DEFAULT_MEMATTRS); + hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_CAPACITY], + (char *) "Capacity", + HWLOC_MEMATTR_FLAG_HIGHER_FIRST, + HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE); + + assert(HWLOC_MEMATTR_ID_LOCALITY < NR_DEFAULT_MEMATTRS); + hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LOCALITY], + (char *) "Locality", + HWLOC_MEMATTR_FLAG_LOWER_FIRST, + HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE); + + assert(HWLOC_MEMATTR_ID_BANDWIDTH < NR_DEFAULT_MEMATTRS); + hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH], + (char *) "Bandwidth", + HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, + HWLOC_IMATTR_FLAG_STATIC_NAME); + + assert(HWLOC_MEMATTR_ID_LATENCY < NR_DEFAULT_MEMATTRS); + hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LATENCY], + (char *) "Latency", + HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, + HWLOC_IMATTR_FLAG_STATIC_NAME); + + topology->nr_memattrs = NR_DEFAULT_MEMATTRS; +} + +static void +hwloc__imi_destroy(struct hwloc_internal_memattr_initiator_s *imi) +{ + if (imi->initiator.type == HWLOC_LOCATION_TYPE_CPUSET) + hwloc_bitmap_free(imi->initiator.location.cpuset); +} + +static void +hwloc__imtg_destroy(struct hwloc_internal_memattr_s *imattr, + struct hwloc_internal_memattr_target_s *imtg) +{ + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* only attributes with initiators may have something to free() in the array */ + unsigned k; + for(k=0; knr_initiators; k++) + hwloc__imi_destroy(&imtg->initiators[k]); + } + free(imtg->initiators); +} + +void +hwloc_internal_memattrs_destroy(struct hwloc_topology *topology) +{ + unsigned id; + for(id=0; idnr_memattrs; id++) { + struct hwloc_internal_memattr_s *imattr = &topology->memattrs[id]; + unsigned j; + for(j=0; jnr_targets; j++) + hwloc__imtg_destroy(imattr, &imattr->targets[j]); + free(imattr->targets); + if (!(imattr->iflags & HWLOC_IMATTR_FLAG_STATIC_NAME)) + free(imattr->name); + } + free(topology->memattrs); + + topology->memattrs = NULL; + topology->nr_memattrs = 0; +} + +int +hwloc_internal_memattrs_dup(struct hwloc_topology *new, struct hwloc_topology *old) +{ + struct hwloc_tma *tma = new->tma; + struct hwloc_internal_memattr_s *imattrs; + hwloc_memattr_id_t id; + + imattrs = hwloc_tma_malloc(tma, old->nr_memattrs * sizeof(*imattrs)); + if (!imattrs) + return -1; + new->memattrs = imattrs; + new->nr_memattrs = old->nr_memattrs; + memcpy(imattrs, old->memattrs, old->nr_memattrs * sizeof(*imattrs)); + + for(id=0; idnr_memattrs; id++) { + struct hwloc_internal_memattr_s *oimattr = &old->memattrs[id]; + struct hwloc_internal_memattr_s *nimattr = &imattrs[id]; + unsigned j; + + assert(oimattr->name); + nimattr->name = hwloc_tma_strdup(tma, oimattr->name); + if (!nimattr->name) { + assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ + new->nr_memattrs = id; + goto failed; + } + nimattr->iflags &= ~HWLOC_IMATTR_FLAG_STATIC_NAME; + nimattr->iflags &= ~HWLOC_IMATTR_FLAG_CACHE_VALID; /* cache will need refresh */ + + if (!oimattr->nr_targets) + continue; + + nimattr->targets = hwloc_tma_malloc(tma, oimattr->nr_targets * sizeof(*nimattr->targets)); + if (!nimattr->targets) { + free(nimattr->name); + new->nr_memattrs = id; + goto failed; + } + memcpy(nimattr->targets, oimattr->targets, oimattr->nr_targets * sizeof(*nimattr->targets)); + + for(j=0; jnr_targets; j++) { + struct hwloc_internal_memattr_target_s *oimtg = &oimattr->targets[j]; + struct hwloc_internal_memattr_target_s *nimtg = &nimattr->targets[j]; + unsigned k; + + nimtg->obj = NULL; /* cache will need refresh */ + + if (!oimtg->nr_initiators) + continue; + + nimtg->initiators = hwloc_tma_malloc(tma, oimtg->nr_initiators * sizeof(*nimtg->initiators)); + if (!nimtg->initiators) { + nimattr->nr_targets = j; + new->nr_memattrs = id+1; + goto failed; + } + memcpy(nimtg->initiators, oimtg->initiators, oimtg->nr_initiators * sizeof(*nimtg->initiators)); + + for(k=0; knr_initiators; k++) { + struct hwloc_internal_memattr_initiator_s *oimi = &oimtg->initiators[k]; + struct hwloc_internal_memattr_initiator_s *nimi = &nimtg->initiators[k]; + if (oimi->initiator.type == HWLOC_LOCATION_TYPE_CPUSET) { + nimi->initiator.location.cpuset = hwloc_bitmap_tma_dup(tma, oimi->initiator.location.cpuset); + if (!nimi->initiator.location.cpuset) { + nimtg->nr_initiators = k; + nimattr->nr_targets = j+1; + new->nr_memattrs = id+1; + goto failed; + } + } else if (oimi->initiator.type == HWLOC_LOCATION_TYPE_OBJECT) { + nimi->initiator.location.object.obj = NULL; /* cache will need refresh */ + } + } + } + } + return 0; + + failed: + hwloc_internal_memattrs_destroy(new); + return -1; +} + +int +hwloc_memattr_get_by_name(hwloc_topology_t topology, + const char *name, + hwloc_memattr_id_t *idp) +{ + unsigned id; + for(id=0; idnr_memattrs; id++) { + if (!strcmp(topology->memattrs[id].name, name)) { + *idp = id; + return 0; + } + } + errno = EINVAL; + return -1; +} + +int +hwloc_memattr_get_name(hwloc_topology_t topology, + hwloc_memattr_id_t id, + const char **namep) +{ + if (id >= topology->nr_memattrs) { + errno = EINVAL; + return -1; + } + *namep = topology->memattrs[id].name; + return 0; +} + +int +hwloc_memattr_get_flags(hwloc_topology_t topology, + hwloc_memattr_id_t id, + unsigned long *flagsp) +{ + if (id >= topology->nr_memattrs) { + errno = EINVAL; + return -1; + } + *flagsp = topology->memattrs[id].flags; + return 0; +} + +int +hwloc_memattr_register(hwloc_topology_t topology, + const char *_name, + unsigned long flags, + hwloc_memattr_id_t *id) +{ + struct hwloc_internal_memattr_s *newattrs; + char *name; + unsigned i; + + /* check flags */ + if (flags & ~(HWLOC_MEMATTR_FLAG_NEED_INITIATOR|HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST)) { + errno = EINVAL; + return -1; + } + if (!(flags & (HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST))) { + errno = EINVAL; + return -1; + } + if ((flags & (HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST)) + == (HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST)) { + errno = EINVAL; + return -1; + } + + if (!_name) { + errno = EINVAL; + return -1; + } + + /* check name isn't already used */ + for(i=0; inr_memattrs; i++) { + if (!strcmp(_name, topology->memattrs[i].name)) { + errno = EBUSY; + return -1; + } + } + + name = strdup(_name); + if (!name) + return -1; + + newattrs = realloc(topology->memattrs, (topology->nr_memattrs + 1) * sizeof(*topology->memattrs)); + if (!newattrs) { + free(name); + return -1; + } + + hwloc__setup_memattr(&newattrs[topology->nr_memattrs], + name, flags, 0); + + /* memattr valid when just created */ + newattrs[topology->nr_memattrs].iflags |= HWLOC_IMATTR_FLAG_CACHE_VALID; + + *id = topology->nr_memattrs; + topology->nr_memattrs++; + topology->memattrs = newattrs; + return 0; +} + + +/*************************** + * Internal Locations + */ + +/* return 1 if cpuset/obj matchs the existing initiator location, + * for instance if the cpuset of query is included in the cpuset of existing + */ +static int +match_internal_location(struct hwloc_internal_location_s *iloc, + struct hwloc_internal_memattr_initiator_s *imi) +{ + if (iloc->type != imi->initiator.type) + return 0; + switch (iloc->type) { + case HWLOC_LOCATION_TYPE_CPUSET: + return hwloc_bitmap_isincluded(iloc->location.cpuset, imi->initiator.location.cpuset); + case HWLOC_LOCATION_TYPE_OBJECT: + return iloc->location.object.type == imi->initiator.location.object.type + && iloc->location.object.gp_index == imi->initiator.location.object.gp_index; + default: + return 0; + } +} + +static int +to_internal_location(struct hwloc_internal_location_s *iloc, + struct hwloc_location *location) +{ + iloc->type = location->type; + + switch (location->type) { + case HWLOC_LOCATION_TYPE_CPUSET: + if (!location->location.cpuset || hwloc_bitmap_iszero(location->location.cpuset)) { + errno = EINVAL; + return -1; + } + iloc->location.cpuset = location->location.cpuset; + return 0; + case HWLOC_LOCATION_TYPE_OBJECT: + if (!location->location.object) { + errno = EINVAL; + return -1; + } + iloc->location.object.gp_index = location->location.object->gp_index; + iloc->location.object.type = location->location.object->type; + return 0; + default: + errno = EINVAL; + return -1; + } +} + +static int +from_internal_location(struct hwloc_internal_location_s *iloc, + struct hwloc_location *location) +{ + location->type = iloc->type; + + switch (iloc->type) { + case HWLOC_LOCATION_TYPE_CPUSET: + location->location.cpuset = iloc->location.cpuset; + return 0; + case HWLOC_LOCATION_TYPE_OBJECT: + /* requires the cache to be refreshed */ + location->location.object = iloc->location.object.obj; + if (!location->location.object) + return -1; + return 0; + default: + errno = EINVAL; + return -1; + } +} + + +/************************ + * Refreshing + */ + +static int +hwloc__imi_refresh(struct hwloc_topology *topology, + struct hwloc_internal_memattr_initiator_s *imi) +{ + switch (imi->initiator.type) { + case HWLOC_LOCATION_TYPE_CPUSET: { + hwloc_bitmap_and(imi->initiator.location.cpuset, imi->initiator.location.cpuset, topology->levels[0][0]->cpuset); + if (hwloc_bitmap_iszero(imi->initiator.location.cpuset)) { + hwloc__imi_destroy(imi); + return -1; + } + return 0; + } + case HWLOC_LOCATION_TYPE_OBJECT: { + hwloc_obj_t obj = hwloc_get_obj_by_type_and_gp_index(topology, + imi->initiator.location.object.type, + imi->initiator.location.object.gp_index); + if (!obj) { + hwloc__imi_destroy(imi); + return -1; + } + imi->initiator.location.object.obj = obj; + return 0; + } + default: + assert(0); + } + return -1; +} + +static int +hwloc__imtg_refresh(struct hwloc_topology *topology, + struct hwloc_internal_memattr_s *imattr, + struct hwloc_internal_memattr_target_s *imtg) +{ + hwloc_obj_t node; + + /* no need to refresh convenience memattrs */ + assert(!(imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE)); + + /* check the target object */ + if (imtg->gp_index == (hwloc_uint64_t) -1) { + /* only NUMA and PU may work with os_index, and only NUMA is currently used internally */ + if (imtg->type == HWLOC_OBJ_NUMANODE) + node = hwloc_get_numanode_obj_by_os_index(topology, imtg->os_index); + else if (imtg->type == HWLOC_OBJ_PU) + node = hwloc_get_pu_obj_by_os_index(topology, imtg->os_index); + else + node = NULL; + } else { + node = hwloc_get_obj_by_type_and_gp_index(topology, imtg->type, imtg->gp_index); + } + if (!node) { + hwloc__imtg_destroy(imattr, imtg); + return -1; + } + + /* save the gp_index in case it wasn't initialized yet */ + imtg->gp_index = node->gp_index; + /* cache the object */ + imtg->obj = node; + + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* check the initiators */ + unsigned k, l; + for(k=0, l=0; knr_initiators; k++) { + int err = hwloc__imi_refresh(topology, &imtg->initiators[k]); + if (err < 0) + continue; + if (k != l) + memcpy(&imtg->initiators[l], &imtg->initiators[k], sizeof(*imtg->initiators)); + l++; + } + imtg->nr_initiators = l; + if (!imtg->nr_initiators) { + hwloc__imtg_destroy(imattr, imtg); + return -1; + } + } + return 0; +} + +static void +hwloc__imattr_refresh(struct hwloc_topology *topology, + struct hwloc_internal_memattr_s *imattr) +{ + unsigned j, k; + for(j=0, k=0; jnr_targets; j++) { + int ret = hwloc__imtg_refresh(topology, imattr, &imattr->targets[j]); + if (!ret) { + /* target still valid, move it if some former targets were removed */ + if (j != k) + memcpy(&imattr->targets[k], &imattr->targets[j], sizeof(*imattr->targets)); + k++; + } + } + imattr->nr_targets = k; + imattr->iflags |= HWLOC_IMATTR_FLAG_CACHE_VALID; +} + +void +hwloc_internal_memattrs_refresh(struct hwloc_topology *topology) +{ + unsigned id; + for(id=0; idnr_memattrs; id++) { + struct hwloc_internal_memattr_s *imattr = &topology->memattrs[id]; + if (imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID) + /* nothing to refresh */ + continue; + hwloc__imattr_refresh(topology, imattr); + } +} + +void +hwloc_internal_memattrs_need_refresh(struct hwloc_topology *topology) +{ + unsigned id; + for(id=0; idnr_memattrs; id++) { + struct hwloc_internal_memattr_s *imattr = &topology->memattrs[id]; + if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) + /* no need to refresh convenience memattrs */ + continue; + imattr->iflags &= ~HWLOC_IMATTR_FLAG_CACHE_VALID; + } +} + + +/******************************** + * Targets + */ + +static struct hwloc_internal_memattr_target_s * +hwloc__memattr_get_target(struct hwloc_internal_memattr_s *imattr, + hwloc_obj_type_t target_type, + hwloc_uint64_t target_gp_index, + unsigned target_os_index, + int create) +{ + struct hwloc_internal_memattr_target_s *news, *new; + unsigned j; + + for(j=0; jnr_targets; j++) { + if (target_type == imattr->targets[j].type) + if ((target_gp_index != (hwloc_uint64_t)-1 && target_gp_index == imattr->targets[j].gp_index) + || (target_os_index != (unsigned)-1 && target_os_index == imattr->targets[j].os_index)) + return &imattr->targets[j]; + } + if (!create) + return NULL; + + news = realloc(imattr->targets, (imattr->nr_targets+1)*sizeof(*imattr->targets)); + if (!news) + return NULL; + imattr->targets = news; + + /* FIXME sort targets? by logical index at the end of load? */ + + new = &news[imattr->nr_targets]; + new->type = target_type; + new->gp_index = target_gp_index; + new->os_index = target_os_index; + + /* cached object will be refreshed later on actual access */ + new->obj = NULL; + imattr->iflags &= ~HWLOC_IMATTR_FLAG_CACHE_VALID; + /* When setting a value after load(), the caller has the target object + * (and initiator object, if not CPU set). Hence, we could avoid invalidating + * the cache here. + * The overhead of the imattr-wide refresh isn't high enough so far + * to justify making the cache management more complex. + */ + + new->nr_initiators = 0; + new->initiators = NULL; + new->noinitiator_value = 0; + imattr->nr_targets++; + return new; +} + +static struct hwloc_internal_memattr_initiator_s * +hwloc__memattr_get_initiator_from_location(struct hwloc_internal_memattr_s *imattr, + struct hwloc_internal_memattr_target_s *imtg, + struct hwloc_location *location); + +int +hwloc_memattr_get_targets(hwloc_topology_t topology, + hwloc_memattr_id_t id, + struct hwloc_location *initiator, + unsigned long flags, + unsigned *nrp, hwloc_obj_t *targets, hwloc_uint64_t *values) +{ + struct hwloc_internal_memattr_s *imattr; + unsigned i, found = 0, max; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (!nrp || (*nrp && !targets)) { + errno = EINVAL; + return -1; + } + max = *nrp; + + if (id >= topology->nr_memattrs) { + errno = EINVAL; + return -1; + } + imattr = &topology->memattrs[id]; + + if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) { + /* convenience attributes */ + for(i=0; ; i++) { + hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i); + if (!node) + break; + if (foundiflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + hwloc__imattr_refresh(topology, imattr); + + for(i=0; inr_targets; i++) { + struct hwloc_internal_memattr_target_s *imtg = &imattr->targets[i]; + hwloc_uint64_t value = 0; + + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + if (initiator) { + /* find a matching initiator */ + struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_get_initiator_from_location(imattr, imtg, initiator); + if (!imi) + continue; + value = imi->value; + } + } else { + value = imtg->noinitiator_value; + } + + if (foundobj; + if (values) + values[found] = value; + } + found++; + } + + done: + *nrp = found; + return 0; +} + + +/************************ + * Initiators + */ + +static struct hwloc_internal_memattr_initiator_s * +hwloc__memattr_target_get_initiator(struct hwloc_internal_memattr_target_s *imtg, + struct hwloc_internal_location_s *iloc, + int create) +{ + struct hwloc_internal_memattr_initiator_s *news, *new; + unsigned k; + + for(k=0; knr_initiators; k++) { + struct hwloc_internal_memattr_initiator_s *imi = &imtg->initiators[k]; + if (match_internal_location(iloc, imi)) { + return imi; + } + } + + if (!create) + return NULL; + + news = realloc(imtg->initiators, (imtg->nr_initiators+1)*sizeof(*imtg->initiators)); + if (!news) + return NULL; + new = &news[imtg->nr_initiators]; + + new->initiator = *iloc; + if (iloc->type == HWLOC_LOCATION_TYPE_CPUSET) { + new->initiator.location.cpuset = hwloc_bitmap_dup(iloc->location.cpuset); + if (!new->initiator.location.cpuset) + goto out_with_realloc; + } + + imtg->nr_initiators++; + imtg->initiators = news; + return new; + + out_with_realloc: + imtg->initiators = news; + return NULL; +} + +static struct hwloc_internal_memattr_initiator_s * +hwloc__memattr_get_initiator_from_location(struct hwloc_internal_memattr_s *imattr, + struct hwloc_internal_memattr_target_s *imtg, + struct hwloc_location *location) +{ + struct hwloc_internal_memattr_initiator_s *imi; + struct hwloc_internal_location_s iloc; + + assert(imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR); + + /* use the initiator value */ + if (!location) { + errno = EINVAL; + return NULL; + } + + if (to_internal_location(&iloc, location) < 0) { + errno = EINVAL; + return NULL; + } + + imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0); + if (!imi) { + errno = EINVAL; + return NULL; + } + + return imi; +} + +int +hwloc_memattr_get_initiators(hwloc_topology_t topology, + hwloc_memattr_id_t id, + hwloc_obj_t target_node, + unsigned long flags, + unsigned *nrp, struct hwloc_location *initiators, hwloc_uint64_t *values) +{ + struct hwloc_internal_memattr_s *imattr; + struct hwloc_internal_memattr_target_s *imtg; + unsigned i, max; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (!nrp || (*nrp && !initiators)) { + errno = EINVAL; + return -1; + } + max = *nrp; + + if (id >= topology->nr_memattrs) { + errno = EINVAL; + return -1; + } + imattr = &topology->memattrs[id]; + if (!(imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR)) { + *nrp = 0; + return 0; + } + + /* all convenience attributes have no initiators */ + assert(!(imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE)); + + if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + hwloc__imattr_refresh(topology, imattr); + + imtg = hwloc__memattr_get_target(imattr, target_node->type, target_node->gp_index, target_node->os_index, 0); + if (!imtg) { + errno = EINVAL; + return -1; + } + + for(i=0; inr_initiators && iinitiators[i]; + int err = from_internal_location(&imi->initiator, &initiators[i]); + assert(!err); + if (values) + /* no need to handle capacity/locality special cases here, those are initiator-less attributes */ + values[i] = imi->value; + } + + *nrp = imtg->nr_initiators; + return 0; +} + + +/************************** + * Values + */ + +int +hwloc_memattr_get_value(hwloc_topology_t topology, + hwloc_memattr_id_t id, + hwloc_obj_t target_node, + struct hwloc_location *initiator, + unsigned long flags, + hwloc_uint64_t *valuep) +{ + struct hwloc_internal_memattr_s *imattr; + struct hwloc_internal_memattr_target_s *imtg; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (id >= topology->nr_memattrs) { + errno = EINVAL; + return -1; + } + imattr = &topology->memattrs[id]; + + if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) { + /* convenience attributes */ + *valuep = hwloc__memattr_get_convenience_value(id, target_node); + return 0; + } + + /* normal attributes */ + + if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + hwloc__imattr_refresh(topology, imattr); + + imtg = hwloc__memattr_get_target(imattr, target_node->type, target_node->gp_index, target_node->os_index, 0); + if (!imtg) { + errno = EINVAL; + return -1; + } + + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* find the initiator and set its value */ + struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_get_initiator_from_location(imattr, imtg, initiator); + if (!imi) + return -1; + *valuep = imi->value; + } else { + /* get the no-initiator value */ + *valuep = imtg->noinitiator_value; + } + return 0; +} + +static int +hwloc__internal_memattr_set_value(hwloc_topology_t topology, + hwloc_memattr_id_t id, + hwloc_obj_type_t target_type, + hwloc_uint64_t target_gp_index, + unsigned target_os_index, + struct hwloc_internal_location_s *initiator, + hwloc_uint64_t value) +{ + struct hwloc_internal_memattr_s *imattr; + struct hwloc_internal_memattr_target_s *imtg; + + if (id >= topology->nr_memattrs) { + /* something bad happened during init */ + errno = EINVAL; + return -1; + } + imattr = &topology->memattrs[id]; + + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* check given initiator */ + if (!initiator) { + errno = EINVAL; + return -1; + } + } + + if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) { + /* convenience attributes are read-only */ + errno = EINVAL; + return -1; + } + + if (topology->is_loaded && !(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + /* don't refresh when adding values during load (some nodes might not be ready yet), + * we'll refresh later + */ + hwloc__imattr_refresh(topology, imattr); + + imtg = hwloc__memattr_get_target(imattr, target_type, target_gp_index, target_os_index, 1); + if (!imtg) + return -1; + + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* find/add the initiator and set its value */ + // FIXME what if cpuset is larger than an existing one ? + struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_target_get_initiator(imtg, initiator, 1); + if (!imi) + return -1; + imi->value = value; + + } else { + /* set the no-initiator value */ + imtg->noinitiator_value = value; + } + + return 0; + +} + +int +hwloc_internal_memattr_set_value(hwloc_topology_t topology, + hwloc_memattr_id_t id, + hwloc_obj_type_t target_type, + hwloc_uint64_t target_gp_index, + unsigned target_os_index, + struct hwloc_internal_location_s *initiator, + hwloc_uint64_t value) +{ + assert(id != HWLOC_MEMATTR_ID_CAPACITY); + assert(id != HWLOC_MEMATTR_ID_LOCALITY); + + return hwloc__internal_memattr_set_value(topology, id, target_type, target_gp_index, target_os_index, initiator, value); +} + +int +hwloc_memattr_set_value(hwloc_topology_t topology, + hwloc_memattr_id_t id, + hwloc_obj_t target_node, + struct hwloc_location *initiator, + unsigned long flags, + hwloc_uint64_t value) +{ + struct hwloc_internal_location_s iloc, *ilocp; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (initiator) { + if (to_internal_location(&iloc, initiator) < 0) { + errno = EINVAL; + return -1; + } + ilocp = &iloc; + } else { + ilocp = NULL; + } + + return hwloc__internal_memattr_set_value(topology, id, target_node->type, target_node->gp_index, target_node->os_index, ilocp, value); +} + + +/********************** + * Best target + */ + +static void +hwloc__update_best_target(hwloc_obj_t *best_obj, hwloc_uint64_t *best_value, int *found, + hwloc_obj_t new_obj, hwloc_uint64_t new_value, + int keep_highest) +{ + if (*found) { + if (keep_highest) { + if (new_value <= *best_value) + return; + } else { + if (new_value >= *best_value) + return; + } + } + + *best_obj = new_obj; + *best_value = new_value; + *found = 1; +} + +int +hwloc_memattr_get_best_target(hwloc_topology_t topology, + hwloc_memattr_id_t id, + struct hwloc_location *initiator, + unsigned long flags, + hwloc_obj_t *bestp, hwloc_uint64_t *valuep) +{ + struct hwloc_internal_memattr_s *imattr; + hwloc_uint64_t best_value = 0; /* shutup the compiler */ + hwloc_obj_t best = NULL; + int found = 0; + unsigned j; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (id >= topology->nr_memattrs) { + errno = EINVAL; + return -1; + } + imattr = &topology->memattrs[id]; + + if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) { + /* convenience attributes */ + for(j=0; ; j++) { + hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, j); + hwloc_uint64_t value; + if (!node) + break; + value = hwloc__memattr_get_convenience_value(id, node); + hwloc__update_best_target(&best, &best_value, &found, + node, value, + imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST); + } + goto done; + } + + /* normal attributes */ + + if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + /* not strictly need */ + hwloc__imattr_refresh(topology, imattr); + + for(j=0; jnr_targets; j++) { + struct hwloc_internal_memattr_target_s *imtg = &imattr->targets[j]; + hwloc_uint64_t value; + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* find the initiator and set its value */ + struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_get_initiator_from_location(imattr, imtg, initiator); + if (!imi) + continue; + value = imi->value; + } else { + /* get the no-initiator value */ + value = imtg->noinitiator_value; + } + hwloc__update_best_target(&best, &best_value, &found, + imtg->obj, value, + imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST); + } + + done: + if (found) { + assert(best); + *bestp = best; + if (valuep) + *valuep = best_value; + return 0; + } else { + errno = ENOENT; + return -1; + } +} + +/********************** + * Best initiators + */ + +static void +hwloc__update_best_initiator(struct hwloc_internal_location_s *best_initiator, hwloc_uint64_t *best_value, int *found, + struct hwloc_internal_location_s *new_initiator, hwloc_uint64_t new_value, + int keep_highest) +{ + if (*found) { + if (keep_highest) { + if (new_value <= *best_value) + return; + } else { + if (new_value >= *best_value) + return; + } + } + + *best_initiator = *new_initiator; + *best_value = new_value; + *found = 1; +} + +int +hwloc_memattr_get_best_initiator(hwloc_topology_t topology, + hwloc_memattr_id_t id, + hwloc_obj_t target_node, + unsigned long flags, + struct hwloc_location *bestp, hwloc_uint64_t *valuep) +{ + struct hwloc_internal_memattr_s *imattr; + struct hwloc_internal_memattr_target_s *imtg; + struct hwloc_internal_location_s best_initiator; + hwloc_uint64_t best_value; + int found; + unsigned i; + + if (flags) { + errno = EINVAL; + return -1; + } + + if (id >= topology->nr_memattrs) { + errno = EINVAL; + return -1; + } + imattr = &topology->memattrs[id]; + + if (!(imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR)) { + errno = EINVAL; + return -1; + } + + if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) + /* not strictly need */ + hwloc__imattr_refresh(topology, imattr); + + imtg = hwloc__memattr_get_target(imattr, target_node->type, target_node->gp_index, target_node->os_index, 0); + if (!imtg) { + errno = EINVAL; + return -1; + } + + found = 0; + for(i=0; inr_initiators; i++) { + struct hwloc_internal_memattr_initiator_s *imi = &imtg->initiators[i]; + hwloc__update_best_initiator(&best_initiator, &best_value, &found, + &imi->initiator, imi->value, + imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST); + } + + if (found) { + if (valuep) + *valuep = best_value; + return from_internal_location(&best_initiator, bestp); + } else { + errno = ENOENT; + return -1; + } +} + +/**************************** + * Listing local nodes + */ + +static __hwloc_inline int +match_local_obj_cpuset(hwloc_obj_t node, hwloc_cpuset_t cpuset, unsigned long flags) +{ + if (flags & HWLOC_LOCAL_NUMANODE_FLAG_ALL) + return 1; + if ((flags & HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY) + && hwloc_bitmap_isincluded(cpuset, node->cpuset)) + return 1; + if ((flags & HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY) + && hwloc_bitmap_isincluded(node->cpuset, cpuset)) + return 1; + return hwloc_bitmap_isequal(node->cpuset, cpuset); +} + +int +hwloc_get_local_numanode_objs(hwloc_topology_t topology, + struct hwloc_location *location, + unsigned *nrp, + hwloc_obj_t *nodes, + unsigned long flags) +{ + hwloc_cpuset_t cpuset; + hwloc_obj_t node; + unsigned i; + + if (flags & ~(HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY + |HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY + | HWLOC_LOCAL_NUMANODE_FLAG_ALL)) { + errno = EINVAL; + return -1; + } + + if (!nrp || (*nrp && !nodes)) { + errno = EINVAL; + return -1; + } + + if (!location) { + if (!(flags & HWLOC_LOCAL_NUMANODE_FLAG_ALL)) { + errno = EINVAL; + return -1; + } + cpuset = NULL; /* unused */ + + } else { + if (location->type == HWLOC_LOCATION_TYPE_CPUSET) { + cpuset = location->location.cpuset; + } else if (location->type == HWLOC_LOCATION_TYPE_OBJECT) { + hwloc_obj_t obj = location->location.object; + while (!obj->cpuset) + obj = obj->parent; + cpuset = obj->cpuset; + } else { + errno = EINVAL; + return -1; + } + } + + i = 0; + for(node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0); + node; + node = node->next_cousin) { + if (!match_local_obj_cpuset(node, cpuset, flags)) + continue; + if (i < *nrp) + nodes[i] = node; + i++; + } + + *nrp = i; + return 0; +} diff --git a/src/3rdparty/hwloc/src/misc.c b/src/3rdparty/hwloc/src/misc.c index a7b9a5eb..6f0b4a5a 100644 --- a/src/3rdparty/hwloc/src/misc.c +++ b/src/3rdparty/hwloc/src/misc.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -114,7 +114,7 @@ void hwloc_add_uname_info(struct hwloc_topology *topology __hwloc_attribute_unus char * hwloc_progname(struct hwloc_topology *topology __hwloc_attribute_unused) { -#if HAVE_DECL_GETMODULEFILENAME +#if (defined HAVE_DECL_GETMODULEFILENAME) && HAVE_DECL_GETMODULEFILENAME char name[256], *local_basename; unsigned res = GetModuleFileName(NULL, name, sizeof(name)); if (res == sizeof(name) || !res) diff --git a/src/3rdparty/hwloc/src/pci-common.c b/src/3rdparty/hwloc/src/pci-common.c index a817c8da..1149113b 100644 --- a/src/3rdparty/hwloc/src/pci-common.c +++ b/src/3rdparty/hwloc/src/pci-common.c @@ -232,7 +232,8 @@ enum hwloc_pci_busid_comparison_e { HWLOC_PCI_BUSID_LOWER, HWLOC_PCI_BUSID_HIGHER, HWLOC_PCI_BUSID_INCLUDED, - HWLOC_PCI_BUSID_SUPERSET + HWLOC_PCI_BUSID_SUPERSET, + HWLOC_PCI_BUSID_EQUAL }; static enum hwloc_pci_busid_comparison_e @@ -274,11 +275,8 @@ hwloc_pci_compare_busids(struct hwloc_obj *a, struct hwloc_obj *b) if (a->attr->pcidev.func > b->attr->pcidev.func) return HWLOC_PCI_BUSID_HIGHER; - /* Should never reach here. Abort on both debug builds and - non-debug builds */ - assert(0); - fprintf(stderr, "Bad assertion in hwloc %s:%d (aborting)\n", __FILE__, __LINE__); - exit(1); + /* Should never reach here. */ + return HWLOC_PCI_BUSID_EQUAL; } static void @@ -329,6 +327,23 @@ hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_firs } return; } + case HWLOC_PCI_BUSID_EQUAL: { + static int reported = 0; + if (!reported && !hwloc_hide_errors()) { + fprintf(stderr, "*********************************************************\n"); + fprintf(stderr, "* hwloc %s received invalid PCI information.\n", HWLOC_VERSION); + fprintf(stderr, "*\n"); + fprintf(stderr, "* Trying to insert PCI object %04x:%02x:%02x.%01x at %04x:%02x:%02x.%01x\n", + new->attr->pcidev.domain, new->attr->pcidev.bus, new->attr->pcidev.dev, new->attr->pcidev.func, + (*curp)->attr->pcidev.domain, (*curp)->attr->pcidev.bus, (*curp)->attr->pcidev.dev, (*curp)->attr->pcidev.func); + fprintf(stderr, "*\n"); + fprintf(stderr, "* hwloc will now ignore this object and continue.\n"); + fprintf(stderr, "*********************************************************\n"); + reported = 1; + } + hwloc_free_unlinked_object(new); + return; + } } } /* add to the end of the list if higher than everybody */ @@ -425,39 +440,10 @@ hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology, static struct hwloc_obj * hwloc_pci_fixup_busid_parent(struct hwloc_topology *topology __hwloc_attribute_unused, - struct hwloc_pcidev_attr_s *busid, - struct hwloc_obj *parent) + struct hwloc_pcidev_attr_s *busid __hwloc_attribute_unused, + struct hwloc_obj *parent __hwloc_attribute_unused) { - /* Xeon E5v3 in cluster-on-die mode only have PCI on the first NUMA node of each package. - * but many dual-processor host report the second PCI hierarchy on 2nd NUMA of first package. - */ - if (parent->depth >= 2 - && parent->type == HWLOC_OBJ_NUMANODE - && parent->sibling_rank == 1 && parent->parent->arity == 2 - && parent->parent->type == HWLOC_OBJ_PACKAGE - && parent->parent->sibling_rank == 0 && parent->parent->parent->arity == 2) { - const char *cpumodel = hwloc_obj_get_info_by_name(parent->parent, "CPUModel"); - if (cpumodel && strstr(cpumodel, "Xeon")) { - if (!hwloc_hide_errors()) { - fprintf(stderr, "****************************************************************************\n"); - fprintf(stderr, "* hwloc %s has encountered an incorrect PCI locality information.\n", HWLOC_VERSION); - fprintf(stderr, "* PCI bus %04x:%02x is supposedly close to 2nd NUMA node of 1st package,\n", - busid->domain, busid->bus); - fprintf(stderr, "* however hwloc believes this is impossible on this architecture.\n"); - fprintf(stderr, "* Therefore the PCI bus will be moved to 1st NUMA node of 2nd package.\n"); - fprintf(stderr, "*\n"); - fprintf(stderr, "* If you feel this fixup is wrong, disable it by setting in your environment\n"); - fprintf(stderr, "* HWLOC_PCI_%04x_%02x_LOCALCPUS= (empty value), and report the problem\n", - busid->domain, busid->bus); - fprintf(stderr, "* to the hwloc's user mailing list together with the XML output of lstopo.\n"); - fprintf(stderr, "*\n"); - fprintf(stderr, "* You may silence this message by setting HWLOC_HIDE_ERRORS=1 in your environment.\n"); - fprintf(stderr, "****************************************************************************\n"); - } - return parent->parent->next_sibling->first_child; - } - } - + /* no quirk for now */ return parent; } diff --git a/src/3rdparty/hwloc/src/shmem.c b/src/3rdparty/hwloc/src/shmem.c index 94d55eef..c73c6d92 100644 --- a/src/3rdparty/hwloc/src/shmem.c +++ b/src/3rdparty/hwloc/src/shmem.c @@ -1,5 +1,5 @@ /* - * Copyright © 2017-2019 Inria. All rights reserved. + * Copyright © 2017-2020 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -97,6 +97,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology, * without being able to free() them. */ hwloc_internal_distances_refresh(topology); + hwloc_internal_memattrs_refresh(topology); header.header_version = HWLOC_SHMEM_HEADER_VERSION; header.header_length = sizeof(header); @@ -134,8 +135,9 @@ hwloc_shmem_topology_write(hwloc_topology_t topology, assert((char *)mmap_res <= (char *)mmap_address + length); - /* now refresh the new distances so that adopters can use them without refreshing the R/O shmem mapping */ + /* now refresh the new distances/memattrs so that adopters can use them without refreshing the R/O shmem mapping */ hwloc_internal_distances_refresh(new); + hwloc_internal_memattrs_refresh(topology); /* topology is saved, release resources now */ munmap(mmap_address, length); @@ -214,11 +216,13 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp, new->support.discovery = malloc(sizeof(*new->support.discovery)); new->support.cpubind = malloc(sizeof(*new->support.cpubind)); new->support.membind = malloc(sizeof(*new->support.membind)); - if (!new->support.discovery || !new->support.cpubind || !new->support.membind) + new->support.misc = malloc(sizeof(*new->support.misc)); + if (!new->support.discovery || !new->support.cpubind || !new->support.membind || !new->support.misc) goto out_with_support; memcpy(new->support.discovery, old->support.discovery, sizeof(*new->support.discovery)); memcpy(new->support.cpubind, old->support.cpubind, sizeof(*new->support.cpubind)); memcpy(new->support.membind, old->support.membind, sizeof(*new->support.membind)); + memcpy(new->support.misc, old->support.misc, sizeof(*new->support.misc)); hwloc_set_binding_hooks(new); /* clear userdata callbacks pointing to the writer process' functions */ new->userdata_export_cb = NULL; @@ -236,6 +240,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp, free(new->support.discovery); free(new->support.cpubind); free(new->support.membind); + free(new->support.misc); free(new); out_with_components: hwloc_components_fini(); @@ -252,6 +257,7 @@ hwloc__topology_disadopt(hwloc_topology_t topology) free(topology->support.discovery); free(topology->support.cpubind); free(topology->support.membind); + free(topology->support.misc); free(topology); } diff --git a/src/3rdparty/hwloc/src/static-components.h b/src/3rdparty/hwloc/src/static-components.h index dac227a6..f2cb254a 100644 --- a/src/3rdparty/hwloc/src/static-components.h +++ b/src/3rdparty/hwloc/src/static-components.h @@ -1,9 +1,4 @@ -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_noos_component; -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_component; -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_synthetic_component; -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_nolibxml_component; -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_windows_component; -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component; +#include static const struct hwloc_component * hwloc_static_components[] = { &hwloc_noos_component, &hwloc_xml_component, diff --git a/src/3rdparty/hwloc/src/topology-synthetic.c b/src/3rdparty/hwloc/src/topology-synthetic.c index 50092e47..5dd4baaa 100644 --- a/src/3rdparty/hwloc/src/topology-synthetic.c +++ b/src/3rdparty/hwloc/src/topology-synthetic.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -471,7 +471,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, /* initialize parent arity to 0 so that the levels are not infinite */ data->level[count-1].arity = 0; - while (*pos == ' ') + while (*pos == ' ' || *pos == '\n') pos++; if (!*pos) @@ -912,7 +912,7 @@ hwloc_synthetic_insert_attached(struct hwloc_topology *topology, hwloc_synthetic_set_attr(&attached->attr, child); - hwloc_insert_object_by_cpuset(topology, child); + hwloc__insert_object_by_cpuset(topology, NULL, child, "synthetic:attached"); hwloc_synthetic_insert_attached(topology, data, attached->next, set); } @@ -964,7 +964,7 @@ hwloc__look_synthetic(struct hwloc_topology *topology, hwloc_synthetic_set_attr(&curlevel->attr, obj); - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "synthetic"); } hwloc_synthetic_insert_attached(topology, data, curlevel->attached, set); diff --git a/src/3rdparty/hwloc/src/topology-windows.c b/src/3rdparty/hwloc/src/topology-windows.c index 195e5e22..b6458b6f 100644 --- a/src/3rdparty/hwloc/src/topology-windows.c +++ b/src/3rdparty/hwloc/src/topology-windows.c @@ -93,9 +93,10 @@ typedef struct _GROUP_AFFINITY { #endif #ifndef HAVE_PROCESSOR_RELATIONSHIP -typedef struct _PROCESSOR_RELATIONSHIP { +typedef struct HWLOC_PROCESSOR_RELATIONSHIP { BYTE Flags; - BYTE Reserved[21]; + BYTE EfficiencyClass; /* for RelationProcessorCore, higher means greater performance but less efficiency, only available in Win10+ */ + BYTE Reserved[20]; WORD GroupCount; GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY]; } PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP; @@ -228,9 +229,12 @@ static PFN_VIRTUALFREEEX VirtualFreeExProc; typedef BOOL (WINAPI *PFN_QUERYWORKINGSETEX)(HANDLE hProcess, PVOID pv, DWORD cb); static PFN_QUERYWORKINGSETEX QueryWorkingSetExProc; +typedef NTSTATUS (WINAPI *PFN_RTLGETVERSION)(OSVERSIONINFOEX*); +PFN_RTLGETVERSION RtlGetVersionProc; + static void hwloc_win_get_function_ptrs(void) { - HMODULE kernel32; + HMODULE kernel32, ntdll; #if HWLOC_HAVE_GCC_W_CAST_FUNCTION_TYPE #pragma GCC diagnostic ignored "-Wcast-function-type" @@ -275,6 +279,9 @@ static void hwloc_win_get_function_ptrs(void) QueryWorkingSetExProc = (PFN_QUERYWORKINGSETEX) GetProcAddress(psapi, "QueryWorkingSetEx"); } + ntdll = GetModuleHandle("ntdll"); + RtlGetVersionProc = (PFN_RTLGETVERSION) GetProcAddress(ntdll, "RtlGetVersion"); + #if HWLOC_HAVE_GCC_W_CAST_FUNCTION_TYPE #pragma GCC diagnostic warning "-Wcast-function-type" #endif @@ -734,6 +741,88 @@ hwloc_win_get_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unuse } + +/************************* + * Efficiency classes + */ + +struct hwloc_win_efficiency_classes { + unsigned nr_classes; + unsigned nr_classes_allocated; + struct hwloc_win_efficiency_class { + unsigned value; + hwloc_bitmap_t cpuset; + } *classes; +}; + +static void +hwloc_win_efficiency_classes_init(struct hwloc_win_efficiency_classes *classes) +{ + classes->classes = NULL; + classes->nr_classes_allocated = 0; + classes->nr_classes = 0; +} + +static int +hwloc_win_efficiency_classes_add(struct hwloc_win_efficiency_classes *classes, + hwloc_const_bitmap_t cpuset, + unsigned value) +{ + unsigned i; + + /* look for existing class with that efficiency value */ + for(i=0; inr_classes; i++) { + if (classes->classes[i].value == value) { + hwloc_bitmap_or(classes->classes[i].cpuset, classes->classes[i].cpuset, cpuset); + return 0; + } + } + + /* extend the array if needed */ + if (classes->nr_classes == classes->nr_classes_allocated) { + struct hwloc_win_efficiency_class *tmp; + unsigned new_nr_allocated = 2*classes->nr_classes_allocated; + if (!new_nr_allocated) { +#define HWLOC_WIN_EFFICIENCY_CLASSES_DEFAULT_MAX 4 /* 2 should be enough is most cases */ + new_nr_allocated = HWLOC_WIN_EFFICIENCY_CLASSES_DEFAULT_MAX; + } + tmp = realloc(classes->classes, new_nr_allocated * sizeof(*classes->classes)); + if (!tmp) + return -1; + classes->classes = tmp; + classes->nr_classes_allocated = new_nr_allocated; + } + + /* add new class */ + classes->classes[classes->nr_classes].cpuset = hwloc_bitmap_alloc(); + if (!classes->classes[classes->nr_classes].cpuset) + return -1; + classes->classes[classes->nr_classes].value = value; + hwloc_bitmap_copy(classes->classes[classes->nr_classes].cpuset, cpuset); + classes->nr_classes++; + return 0; +} + +static void +hwloc_win_efficiency_classes_register(hwloc_topology_t topology, + struct hwloc_win_efficiency_classes *classes) +{ + unsigned i; + for(i=0; inr_classes; i++) { + hwloc_internal_cpukinds_register(topology, classes->classes[i].cpuset, classes->classes[i].value, NULL, 0, 0); + classes->classes[i].cpuset = NULL; /* given to cpukinds */ + } +} + +static void +hwloc_win_efficiency_classes_destroy(struct hwloc_win_efficiency_classes *classes) +{ + unsigned i; + for(i=0; inr_classes; i++) + hwloc_bitmap_free(classes->classes[i].cpuset); + free(classes->classes); +} + /************************* * discovery */ @@ -753,6 +842,12 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta DWORD length; int gotnuma = 0; int gotnumamemory = 0; + OSVERSIONINFOEX osvi; + char versionstr[20]; + char hostname[122] = ""; + unsigned hostname_size = sizeof(hostname); + int has_efficiencyclass = 0; + struct hwloc_win_efficiency_classes eclasses; assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); @@ -760,6 +855,25 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta /* somebody discovered things */ return -1; + ZeroMemory(&osvi, sizeof(OSVERSIONINFOEX)); + osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX); + + if (RtlGetVersionProc) { + /* RtlGetVersion() returns the currently-running Windows version */ + RtlGetVersionProc(&osvi); + } else { + /* GetVersionEx() and isWindows10OrGreater() depend on what the manifest says + * (manifest of the program, not of libhwloc.dll), they may return old versions + * if the currently-running Windows is not listed in the manifest. + */ + GetVersionEx((LPOSVERSIONINFO)&osvi); + } + + if (osvi.dwMajorVersion >= 10) { + has_efficiencyclass = 1; + hwloc_win_efficiency_classes_init(&eclasses); + } + hwloc_alloc_root_sets(topology->levels[0][0]); GetSystemInfo(&SystemInfo); @@ -887,7 +1001,7 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta default: break; } - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformation"); } free(procInfo); @@ -919,6 +1033,7 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta (void*) procInfo < (void*) ((uintptr_t) procInfoTotal + length); procInfo = (void*) ((uintptr_t) procInfo + procInfo->Size)) { unsigned num, i; + unsigned efficiency_class = 0; GROUP_AFFINITY *GroupMask; /* Ignore unknown caches */ @@ -953,6 +1068,11 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta type = HWLOC_OBJ_CORE; num = procInfo->Processor.GroupCount; GroupMask = procInfo->Processor.GroupMask; + if (has_efficiencyclass) + /* the EfficiencyClass field didn't exist before Windows10 and recent MSVC headers, + * so just access it manually instead of trying to detect it. + */ + efficiency_class = * ((&procInfo->Processor.Flags) + 1); break; case RelationGroup: /* So strange an interface... */ @@ -981,7 +1101,7 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id); obj->cpuset = set; obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP; - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformation:ProcessorGroup"); } else hwloc_bitmap_free(set); } @@ -1005,6 +1125,11 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta } hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_obj_type_string(type), id, obj->cpuset); switch (type) { + case HWLOC_OBJ_CORE: { + if (has_efficiencyclass) + hwloc_win_efficiency_classes_add(&eclasses, obj->cpuset, efficiency_class); + break; + } case HWLOC_OBJ_NUMANODE: { ULONGLONG avail; @@ -1055,7 +1180,7 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta default: break; } - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformationEx"); } free(procInfoTotal); } @@ -1076,29 +1201,88 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta hwloc_bitmap_only(obj->cpuset, idx); hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n", idx, obj->cpuset); - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:ProcessorGroup:pu"); } hwloc_bitmap_foreach_end(); hwloc_bitmap_free(groups_pu_set); } else { /* no processor groups */ - SYSTEM_INFO sysinfo; hwloc_obj_t obj; unsigned idx; - GetSystemInfo(&sysinfo); for(idx=0; idx<32; idx++) - if (sysinfo.dwActiveProcessorMask & (((DWORD_PTR)1)<cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_only(obj->cpuset, idx); hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n", idx, obj->cpuset); - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:pu"); } } + if (has_efficiencyclass) { + topology->support.discovery->cpukind_efficiency = 1; + hwloc_win_efficiency_classes_register(topology, &eclasses); + } + out: + if (has_efficiencyclass) + hwloc_win_efficiency_classes_destroy(&eclasses); + + /* emulate uname instead of calling hwloc_add_uname_info() */ hwloc_obj_add_info(topology->levels[0][0], "Backend", "Windows"); - hwloc_add_uname_info(topology, NULL); + hwloc_obj_add_info(topology->levels[0][0], "OSName", "Windows"); + +#if defined(__CYGWIN__) + hwloc_obj_add_info(topology->levels[0][0], "WindowsBuildEnvironment", "Cygwin"); +#elif defined(__MINGW32__) + hwloc_obj_add_info(topology->levels[0][0], "WindowsBuildEnvironment", "MinGW"); +#endif + + /* see https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-osversioninfoexa */ + if (osvi.dwMajorVersion == 10) { + if (osvi.dwMinorVersion == 0) + hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "10"); + } else if (osvi.dwMajorVersion == 6) { + if (osvi.dwMinorVersion == 3) + hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "8.1"); /* or "Server 2012 R2" */ + else if (osvi.dwMinorVersion == 2) + hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "8"); /* or "Server 2012" */ + else if (osvi.dwMinorVersion == 1) + hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "7"); /* or "Server 2008 R2" */ + else if (osvi.dwMinorVersion == 0) + hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "Vista"); /* or "Server 2008" */ + } /* earlier versions are ignored */ + + snprintf(versionstr, sizeof(versionstr), "%u.%u.%u", osvi.dwMajorVersion, osvi.dwMinorVersion, osvi.dwBuildNumber); + hwloc_obj_add_info(topology->levels[0][0], "OSVersion", versionstr); + +#if !defined(__CYGWIN__) + GetComputerName(hostname, &hostname_size); +#else + gethostname(hostname, hostname_size); +#endif + if (*hostname) + hwloc_obj_add_info(topology->levels[0][0], "Hostname", hostname); + + /* convert to unix-like architecture strings */ + switch (SystemInfo.wProcessorArchitecture) { + case 0: + hwloc_obj_add_info(topology->levels[0][0], "Architecture", "i686"); + break; + case 9: + hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86_64"); + break; + case 5: + hwloc_obj_add_info(topology->levels[0][0], "Architecture", "arm"); + break; + case 12: + hwloc_obj_add_info(topology->levels[0][0], "Architecture", "arm64"); + break; + case 6: + hwloc_obj_add_info(topology->levels[0][0], "Architecture", "ia64"); + break; + } + return 0; } diff --git a/src/3rdparty/hwloc/src/topology-x86.c b/src/3rdparty/hwloc/src/topology-x86.c index 1060157d..267384ee 100644 --- a/src/3rdparty/hwloc/src/topology-x86.c +++ b/src/3rdparty/hwloc/src/topology-x86.c @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2019 Inria. All rights reserved. + * Copyright © 2010-2020 Inria. All rights reserved. * Copyright © 2010-2013 Université Bordeaux * Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -181,6 +181,7 @@ enum hwloc_x86_disc_flags { #define has_topoext(features) ((features)[6] & (1 << 22)) #define has_x2apic(features) ((features)[4] & (1 << 21)) +#define has_hybrid(features) ((features)[18] & (1 << 15)) struct cacheinfo { hwloc_obj_cache_type_t type; @@ -217,6 +218,9 @@ struct procinfo { unsigned cpustepping; unsigned cpumodelnumber; unsigned cpufamilynumber; + + unsigned hybridcoretype; + unsigned hybridnativemodel; }; enum cpuid_type { @@ -681,6 +685,15 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns } } + if (highest_cpuid >= 0x1a && has_hybrid(features)) { + /* Get hybrid cpu information from cpuid 0x1a */ + eax = 0x1a; + ecx = 0; + cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); + infos->hybridcoretype = eax >> 24; + infos->hybridnativemodel = eax & 0xffffff; + } + /********************************************************************************* * Get the hierarchy of thread, core, die, package, etc. from CPU-specific leaves */ @@ -751,7 +764,13 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns /* default cacheid value */ cache->cacheid = infos->apicid / cache->nbthreads_sharing; - if (cpuid_type == amd) { + if (cpuid_type == intel) { + /* round nbthreads_sharing to nearest power of two to build a mask (for clearing lower bits) */ + unsigned bits = hwloc_flsl(cache->nbthreads_sharing-1); + unsigned mask = ~((1U<cacheid = infos->apicid & mask; + + } else if (cpuid_type == amd) { /* AMD quirks */ if (infos->cpufamilynumber == 0x17 && cache->level == 3 && cache->nbthreads_sharing == 6) { @@ -872,7 +891,7 @@ hwloc_x86_add_groups(hwloc_topology_t topology, obj->attr->group.dont_merge = dont_merge; hwloc_debug_2args_bitmap("os %s %u has cpuset %s\n", subtype, id, obj_cpuset); - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "x86:group"); } } @@ -930,7 +949,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns hwloc_debug_1arg_bitmap("os package %u has cpuset %s\n", packageid, package_cpuset); - hwloc_insert_object_by_cpuset(topology, package); + hwloc__insert_object_by_cpuset(topology, NULL, package, "x86:package"); } else { /* Annotate packages previously-existing packages */ @@ -986,7 +1005,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns hwloc_bitmap_set(node->nodeset, nodeid); hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n", nodeid, node_cpuset); - hwloc_insert_object_by_cpuset(topology, node); + hwloc__insert_object_by_cpuset(topology, NULL, node, "x86:numa"); gotnuma++; } } @@ -1033,7 +1052,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns unknown_obj->attr->group.subkind = level; hwloc_debug_2args_bitmap("os unknown%u %u has cpuset %s\n", level, unknownid, unknown_cpuset); - hwloc_insert_object_by_cpuset(topology, unknown_obj); + hwloc__insert_object_by_cpuset(topology, NULL, unknown_obj, "x86:group:unknown"); } } } @@ -1073,7 +1092,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns die->cpuset = die_cpuset; hwloc_debug_1arg_bitmap("os die %u has cpuset %s\n", dieid, die_cpuset); - hwloc_insert_object_by_cpuset(topology, die); + hwloc__insert_object_by_cpuset(topology, NULL, die, "x86:die"); } } } @@ -1111,7 +1130,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns core->cpuset = core_cpuset; hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n", coreid, core_cpuset); - hwloc_insert_object_by_cpuset(topology, core); + hwloc__insert_object_by_cpuset(topology, NULL, core, "x86:core"); } } } @@ -1125,7 +1144,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns obj->cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_only(obj->cpuset, i); hwloc_debug_1arg_bitmap("PU %u has cpuset %s\n", i, obj->cpuset); - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "x86:pu"); } } @@ -1208,7 +1227,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0"); hwloc_debug_2args_bitmap("os L%u cache %u has cpuset %s\n", level, cacheid, cache_cpuset); - hwloc_insert_object_by_cpuset(topology, cache); + hwloc__insert_object_by_cpuset(topology, NULL, cache, "x86:cache"); } } } @@ -1274,8 +1293,41 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long hwloc_bitmap_free(orig_cpuset); } - if (data->apicid_unique) + if (data->apicid_unique) { summarize(backend, infos, flags); + + if (has_hybrid(features)) { + /* use hybrid info for cpukinds */ + hwloc_bitmap_t atomset = hwloc_bitmap_alloc(); + hwloc_bitmap_t coreset = hwloc_bitmap_alloc(); + for(i=0; iapicid_unique, do nothing and return success, so that the caller does nothing either */ return 0; @@ -1354,7 +1406,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) unsigned highest_cpuid; unsigned highest_ext_cpuid; /* This stores cpuid features with the same indexing as Linux */ - unsigned features[10] = { 0 }; + unsigned features[19] = { 0 }; struct procinfo *infos = NULL; enum cpuid_type cpuid_type = unknown; hwloc_x86_os_state_t os_state; @@ -1381,6 +1433,9 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) /* check if binding works */ memset(&hooks, 0, sizeof(hooks)); support.membind = &memsupport; + /* We could just copy the main hooks (except in some corner cases), + * but the current overhead is negligible, so just always reget them. + */ hwloc_set_native_binding_hooks(&hooks, &support); if (hooks.get_thisthread_cpubind && hooks.set_thisthread_cpubind) { get_cpubind = hooks.get_thisthread_cpubind; @@ -1451,6 +1506,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) ecx = 0; cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); features[9] = ebx; + features[18] = edx; } if (cpuid_type != intel && highest_ext_cpuid >= 0x80000001) { diff --git a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c index 5a0006a0..c0691962 100644 --- a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c +++ b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c @@ -213,7 +213,7 @@ hwloc__nolibxml_import_close_child(hwloc__xml_import_state_t state) static int hwloc__nolibxml_import_get_content(hwloc__xml_import_state_t state, - char **beginp, size_t expected_length) + const char **beginp, size_t expected_length) { hwloc__nolibxml_import_state_data_t nstate = (void*) state->data; char *buffer = nstate->tagbuffer; @@ -224,7 +224,7 @@ hwloc__nolibxml_import_get_content(hwloc__xml_import_state_t state, if (nstate->closed) { if (expected_length) return -1; - *beginp = (char *) ""; + *beginp = ""; return 0; } diff --git a/src/3rdparty/hwloc/src/topology-xml.c b/src/3rdparty/hwloc/src/topology-xml.c index ba242853..fe04dd94 100644 --- a/src/3rdparty/hwloc/src/topology-xml.c +++ b/src/3rdparty/hwloc/src/topology-xml.c @@ -1,7 +1,7 @@ /* * Copyright © 2009 CNRS * Copyright © 2009-2020 Inria. All rights reserved. - * Copyright © 2009-2011 Université Bordeaux + * Copyright © 2009-2011, 2020 Université Bordeaux * Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ @@ -481,11 +481,9 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, } } - static int -hwloc__xml_import_info(struct hwloc_xml_backend_data_s *data, - hwloc_obj_t obj, - hwloc__xml_import_state_t state) +hwloc___xml_import_info(char **infonamep, char **infovaluep, + hwloc__xml_import_state_t state) { char *infoname = NULL; char *infovalue = NULL; @@ -502,6 +500,25 @@ hwloc__xml_import_info(struct hwloc_xml_backend_data_s *data, return -1; } + *infonamep = infoname; + *infovaluep = infovalue; + + return state->global->close_tag(state); +} + +static int +hwloc__xml_import_obj_info(struct hwloc_xml_backend_data_s *data, + hwloc_obj_t obj, + hwloc__xml_import_state_t state) +{ + char *infoname = NULL; + char *infovalue = NULL; + int err; + + err = hwloc___xml_import_info(&infoname, &infovalue, state); + if (err < 0) + return err; + if (infoname) { /* empty strings are ignored by libxml */ if (data->version_major < 2 && @@ -518,7 +535,7 @@ hwloc__xml_import_info(struct hwloc_xml_backend_data_s *data, } } - return state->global->close_tag(state); + return err; } static int @@ -694,14 +711,15 @@ hwloc__xml_import_userdata(hwloc_topology_t topology __hwloc_attribute_unused, h } if (!topology->userdata_import_cb) { - char *buffer; + const char *buffer; size_t reallength = encoded ? BASE64_ENCODED_LENGTH(length) : length; ret = state->global->get_content(state, &buffer, reallength); if (ret < 0) return -1; } else if (topology->userdata_not_decoded) { - char *buffer, *fakename; + const char *buffer; + char *fakename; size_t reallength = encoded ? BASE64_ENCODED_LENGTH(length) : length; ret = state->global->get_content(state, &buffer, reallength); if (ret < 0) @@ -714,7 +732,7 @@ hwloc__xml_import_userdata(hwloc_topology_t topology __hwloc_attribute_unused, h free(fakename); } else if (encoded && length) { - char *encoded_buffer; + const char *encoded_buffer; size_t encoded_length = BASE64_ENCODED_LENGTH(length); ret = state->global->get_content(state, &encoded_buffer, encoded_length); if (ret < 0) @@ -734,7 +752,7 @@ hwloc__xml_import_userdata(hwloc_topology_t topology __hwloc_attribute_unused, h } } else { /* always handle length==0 in the non-encoded case */ - char *buffer = (char *) ""; + const char *buffer = ""; if (length) { ret = state->global->get_content(state, &buffer, length); if (ret < 0) @@ -888,7 +906,7 @@ hwloc__xml_import_object(hwloc_topology_t topology, } } else if (!strcmp(tag, "info")) { - ret = hwloc__xml_import_info(data, obj, &childstate); + ret = hwloc__xml_import_obj_info(data, obj, &childstate); } else if (data->version_major < 2 && !strcmp(tag, "distances")) { ret = hwloc__xml_v1import_distances(data, obj, &childstate); } else if (!strcmp(tag, "userdata")) { @@ -1238,6 +1256,80 @@ hwloc__xml_import_object(hwloc_topology_t topology, return -1; } +static int +hwloc__xml_v2import_support(hwloc_topology_t topology, + hwloc__xml_import_state_t state) +{ + char *name = NULL; + int value = 1; /* value is optional */ + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "name")) + name = attrvalue; + else if (!strcmp(attrname, "value")) + value = atoi(attrvalue); + else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring unknown support attribute %s\n", + state->global->msgprefix, attrname); + } + } + + if (name && topology->flags & HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT) { +#ifdef HWLOC_DEBUG + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_support) == 4*sizeof(void*)); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_discovery_support) == 6); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_cpubind_support) == 11); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 15); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_misc_support) == 1); +#endif + +#define DO(_cat,_name) if (!strcmp(#_cat "." #_name, name)) topology->support._cat->_name = value + DO(discovery,pu); + else DO(discovery,numa); + else DO(discovery,numa_memory); + else DO(discovery,disallowed_pu); + else DO(discovery,disallowed_numa); + else DO(discovery,cpukind_efficiency); + else DO(cpubind,set_thisproc_cpubind); + else DO(cpubind,get_thisproc_cpubind); + else DO(cpubind,set_proc_cpubind); + else DO(cpubind,get_proc_cpubind); + else DO(cpubind,set_thisthread_cpubind); + else DO(cpubind,get_thisthread_cpubind); + else DO(cpubind,set_thread_cpubind); + else DO(cpubind,get_thread_cpubind); + else DO(cpubind,get_thisproc_last_cpu_location); + else DO(cpubind,get_proc_last_cpu_location); + else DO(cpubind,get_thisthread_last_cpu_location); + else DO(membind,set_thisproc_membind); + else DO(membind,get_thisproc_membind); + else DO(membind,set_proc_membind); + else DO(membind,get_proc_membind); + else DO(membind,set_thisthread_membind); + else DO(membind,get_thisthread_membind); + else DO(membind,set_area_membind); + else DO(membind,get_area_membind); + else DO(membind,alloc_membind); + else DO(membind,firsttouch_membind); + else DO(membind,bind_membind); + else DO(membind,interleave_membind); + else DO(membind,nexttouch_membind); + else DO(membind,migrate_membind); + else DO(membind,get_area_memlocation); + + else if (!strcmp("custom.exported_support", name)) + /* support was exported in a custom/fake field, mark it as imported here */ + topology->support.misc->imported_support = 1; + +#undef DO + } + + return 0; +} + static int hwloc__xml_v2import_distances(hwloc_topology_t topology, hwloc__xml_import_state_t state, @@ -1317,7 +1409,8 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, nr_u64values = 0; while (1) { struct hwloc__xml_import_state_s childstate; - char *attrname, *attrvalue, *tag, *buffer; + char *attrname, *attrvalue, *tag; + const char *buffer; int length; int is_index = 0; int is_u64values = 0; @@ -1356,7 +1449,7 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, if (is_index) { /* get indexes */ - char *tmp, *tmp2; + const char *tmp, *tmp2; if (nr_indexes >= nbobjs) { if (hwloc__xml_verbose()) fprintf(stderr, "%s: %s with more than %u indexes\n", @@ -1398,7 +1491,7 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, } else if (is_u64values) { /* get uint64_t values */ - char *tmp; + const char *tmp; if (nr_u64values >= nbobjs*nbobjs) { if (hwloc__xml_verbose()) fprintf(stderr, "%s: %s with more than %u u64values\n", @@ -1491,6 +1584,259 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, #undef _TAG_NAME } +static int +hwloc__xml_import_memattr_value(hwloc_topology_t topology, + hwloc_memattr_id_t id, + unsigned long flags, + hwloc__xml_import_state_t state) +{ + char *target_obj_gp_index_s = NULL; + char *target_obj_type_s = NULL; + hwloc_uint64_t target_obj_gp_index; + char *value_s = NULL; + hwloc_uint64_t value; + char *initiator_cpuset_s = NULL; + char *initiator_obj_gp_index_s = NULL; + char *initiator_obj_type_s = NULL; + hwloc_obj_type_t target_obj_type = HWLOC_OBJ_TYPE_NONE; + + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "target_obj_gp_index")) + target_obj_gp_index_s = attrvalue; + else if (!strcmp(attrname, "target_obj_type")) + target_obj_type_s = attrvalue; + else if (!strcmp(attrname, "value")) + value_s = attrvalue; + else if (!strcmp(attrname, "initiator_cpuset")) + initiator_cpuset_s = attrvalue; + else if (!strcmp(attrname, "initiator_obj_gp_index")) + initiator_obj_gp_index_s = attrvalue; + else if (!strcmp(attrname, "initiator_obj_type")) + initiator_obj_type_s = attrvalue; + else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring unknown memattr_value attribute %s\n", + state->global->msgprefix, attrname); + return -1; + } + } + + if (!target_obj_type_s) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring memattr_value without target_obj_type.\n", + state->global->msgprefix); + return -1; + } + if (hwloc_type_sscanf(target_obj_type_s, &target_obj_type, NULL, 0) < 0) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: failed to identify memattr_value target object type %s\n", + state->global->msgprefix, target_obj_type_s); + return -1; + } + + if (!value_s || !target_obj_gp_index_s) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring memattr_value without value and target_obj_gp_index\n", + state->global->msgprefix); + return -1; + } + target_obj_gp_index = strtoull(target_obj_gp_index_s, NULL, 10); + value = strtoull(value_s, NULL, 10); + + if (flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* add a value with initiator */ + struct hwloc_internal_location_s loc; + if (!initiator_cpuset_s && (!initiator_obj_gp_index_s || !initiator_obj_type_s)) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring memattr_value without initiator attributes\n", + state->global->msgprefix); + return -1; + } + + /* setup the initiator */ + if (initiator_cpuset_s) { + loc.type = HWLOC_LOCATION_TYPE_CPUSET; + loc.location.cpuset = hwloc_bitmap_alloc(); + if (!loc.location.cpuset) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: failed to allocated memattr_value initiator cpuset\n", + state->global->msgprefix); + return -1; + } + hwloc_bitmap_sscanf(loc.location.cpuset, initiator_cpuset_s); + } else { + loc.type = HWLOC_LOCATION_TYPE_OBJECT; + loc.location.object.gp_index = strtoull(initiator_obj_gp_index_s, NULL, 10); + if (hwloc_type_sscanf(initiator_obj_type_s, &loc.location.object.type, NULL, 0) < 0) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: failed to identify memattr_value initiator object type %s\n", + state->global->msgprefix, initiator_obj_type_s); + return -1; + } + } + + hwloc_internal_memattr_set_value(topology, id, target_obj_type, target_obj_gp_index, (unsigned)-1, &loc, value); + + if (loc.type == HWLOC_LOCATION_TYPE_CPUSET) + hwloc_bitmap_free(loc.location.cpuset); + + } else { + /* add a value without initiator */ + hwloc_internal_memattr_set_value(topology, id, target_obj_type, target_obj_gp_index, (unsigned)-1, NULL, value); + } + + return 0; +} + +static int +hwloc__xml_import_memattr(hwloc_topology_t topology, + hwloc__xml_import_state_t state) +{ + char *name = NULL; + unsigned long flags = (unsigned long) -1; + hwloc_memattr_id_t id = (hwloc_memattr_id_t) -1; + int ret; + + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "name")) + name = attrvalue; + else if (!strcmp(attrname, "flags")) + flags = strtoul(attrvalue, NULL, 10); + else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring unknown memattr attribute %s\n", + state->global->msgprefix, attrname); + return -1; + } + } + + if (name && flags != (unsigned long) -1) { + hwloc_memattr_id_t _id; + + ret = hwloc_memattr_get_by_name(topology, name, &_id); + if (ret < 0) { + /* register a new attribute */ + ret = hwloc_memattr_register(topology, name, flags, &_id); + if (!ret) + id = _id; + } else { + /* check the flags of the existing attribute */ + unsigned long mflags; + ret = hwloc_memattr_get_flags(topology, _id, &mflags); + if (!ret && mflags == flags) + id = _id; + } + /* if there's no matching attribute, id is -1 and values will be ignored below */ + } + + while (1) { + struct hwloc__xml_import_state_s childstate; + char *tag; + + ret = state->global->find_child(state, &childstate, &tag); + if (ret <= 0) + break; + + if (!strcmp(tag, "memattr_value")) { + ret = hwloc__xml_import_memattr_value(topology, id, flags, &childstate); + } else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: memattr with unrecognized child %s\n", + state->global->msgprefix, tag); + ret = -1; + } + + if (ret < 0) + goto error; + + state->global->close_child(&childstate); + } + + return state->global->close_tag(state); + + error: + return -1; +} + +static int +hwloc__xml_import_cpukind(hwloc_topology_t topology, + hwloc__xml_import_state_t state) +{ + hwloc_bitmap_t cpuset = NULL; + int forced_efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN; + unsigned nr_infos = 0; + struct hwloc_info_s *infos = NULL; + int ret; + + while (1) { + char *attrname, *attrvalue; + if (state->global->next_attr(state, &attrname, &attrvalue) < 0) + break; + if (!strcmp(attrname, "cpuset")) { + if (!cpuset) + cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_sscanf(cpuset, attrvalue); + } else if (!strcmp(attrname, "forced_efficiency")) { + forced_efficiency = atoi(attrvalue); + } else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring unknown cpukind attribute %s\n", + state->global->msgprefix, attrname); + hwloc_bitmap_free(cpuset); + return -1; + } + } + + while (1) { + struct hwloc__xml_import_state_s childstate; + char *tag; + + ret = state->global->find_child(state, &childstate, &tag); + if (ret <= 0) + break; + + if (!strcmp(tag, "info")) { + char *infoname = NULL; + char *infovalue = NULL; + ret = hwloc___xml_import_info(&infoname, &infovalue, &childstate); + if (!ret && infoname && infovalue) + hwloc__add_info(&infos, &nr_infos, infoname, infovalue); + } else { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: cpukind with unrecognized child %s\n", + state->global->msgprefix, tag); + ret = -1; + } + + if (ret < 0) + goto error; + + state->global->close_child(&childstate); + } + + if (!cpuset) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring cpukind without cpuset\n", + state->global->msgprefix); + goto error; + } + + hwloc_internal_cpukinds_register(topology, cpuset, forced_efficiency, infos, nr_infos, HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY); + + return state->global->close_tag(state); + + error: + hwloc__free_infos(infos, nr_infos); + hwloc_bitmap_free(cpuset); + return -1; +} + static int hwloc__xml_import_diff_one(hwloc__xml_import_state_t state, hwloc_topology_diff_t *firstdiffp, @@ -1759,6 +2105,18 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) ret = hwloc__xml_v2import_distances(topology, &childstate, 1); if (ret < 0) goto failed; + } else if (!strcmp(tag, "support")) { + ret = hwloc__xml_v2import_support(topology, &childstate); + if (ret < 0) + goto failed; + } else if (!strcmp(tag, "memattr")) { + ret = hwloc__xml_import_memattr(topology, &childstate); + if (ret < 0) + goto failed; + } else if (!strcmp(tag, "cpukind")) { + ret = hwloc__xml_import_cpukind(topology, &childstate); + if (ret < 0) + goto failed; } else { if (hwloc__xml_verbose()) fprintf(stderr, "%s: ignoring unknown tag `%s' after root object.\n", @@ -1864,12 +2222,14 @@ done: /* keep the "Backend" information intact */ /* we could add "BackendSource=XML" to notify that XML was used between the actual backend and here */ - topology->support.discovery->pu = 1; - topology->support.discovery->disallowed_pu = 1; - if (data->nbnumanodes) { - topology->support.discovery->numa = 1; - topology->support.discovery->numa_memory = 1; // FIXME - topology->support.discovery->disallowed_numa = 1; + if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT)) { + topology->support.discovery->pu = 1; + topology->support.discovery->disallowed_pu = 1; + if (data->nbnumanodes) { + topology->support.discovery->numa = 1; + topology->support.discovery->numa_memory = 1; // FIXME + topology->support.discovery->disallowed_numa = 1; + } } if (data->look_done) @@ -2620,9 +2980,199 @@ hwloc__xml_v2export_distances(hwloc__xml_export_state_t parentstate, hwloc_topol hwloc___xml_v2export_distances(parentstate, dist); } +static void +hwloc__xml_v2export_support(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology) +{ + struct hwloc__xml_export_state_s state; + char tmp[11]; + +#ifdef HWLOC_DEBUG + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_support) == 4*sizeof(void*)); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_discovery_support) == 6); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_cpubind_support) == 11); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 15); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_misc_support) == 1); +#endif + +#define DO(_cat,_name) do { \ + if (topology->support._cat->_name) { \ + parentstate->new_child(parentstate, &state, "support"); \ + state.new_prop(&state, "name", #_cat "." #_name); \ + if (topology->support._cat->_name != 1) { \ + sprintf(tmp, "%u", topology->support._cat->_name); \ + state.new_prop(&state, "value", tmp); \ + } \ + state.end_object(&state, "support"); \ + } \ + } while (0) + + DO(discovery,pu); + DO(discovery,numa); + DO(discovery,numa_memory); + DO(discovery,disallowed_pu); + DO(discovery,disallowed_numa); + DO(discovery,cpukind_efficiency); + DO(cpubind,set_thisproc_cpubind); + DO(cpubind,get_thisproc_cpubind); + DO(cpubind,set_proc_cpubind); + DO(cpubind,get_proc_cpubind); + DO(cpubind,set_thisthread_cpubind); + DO(cpubind,get_thisthread_cpubind); + DO(cpubind,set_thread_cpubind); + DO(cpubind,get_thread_cpubind); + DO(cpubind,get_thisproc_last_cpu_location); + DO(cpubind,get_proc_last_cpu_location); + DO(cpubind,get_thisthread_last_cpu_location); + DO(membind,set_thisproc_membind); + DO(membind,get_thisproc_membind); + DO(membind,set_proc_membind); + DO(membind,get_proc_membind); + DO(membind,set_thisthread_membind); + DO(membind,get_thisthread_membind); + DO(membind,set_area_membind); + DO(membind,get_area_membind); + DO(membind,alloc_membind); + DO(membind,firsttouch_membind); + DO(membind,bind_membind); + DO(membind,interleave_membind); + DO(membind,nexttouch_membind); + DO(membind,migrate_membind); + DO(membind,get_area_memlocation); + + /* misc.imported_support would be meaningless in the remote importer, + * but the importer needs to know whether we exported support or not + * (in case there are no support bit set at all), + * use a custom/fake field to do so. + */ + parentstate->new_child(parentstate, &state, "support"); + state.new_prop(&state, "name", "custom.exported_support"); + state.end_object(&state, "support"); + +#undef DO +} + +static void +hwloc__xml_export_memattr_target(hwloc__xml_export_state_t state, + struct hwloc_internal_memattr_s *imattr, + struct hwloc_internal_memattr_target_s *imtg) +{ + struct hwloc__xml_export_state_s vstate; + char tmp[255]; + + if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* export all initiators */ + unsigned k; + for(k=0; knr_initiators; k++) { + struct hwloc_internal_memattr_initiator_s *imi = &imtg->initiators[k]; + state->new_child(state, &vstate, "memattr_value"); + vstate.new_prop(&vstate, "target_obj_type", hwloc_obj_type_string(imtg->type)); + snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imtg->gp_index); + vstate.new_prop(&vstate, "target_obj_gp_index", tmp); + snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imi->value); + vstate.new_prop(&vstate, "value", tmp); + switch (imi->initiator.type) { + case HWLOC_LOCATION_TYPE_OBJECT: + snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imi->initiator.location.object.gp_index); + vstate.new_prop(&vstate, "initiator_obj_gp_index", tmp); + vstate.new_prop(&vstate, "initiator_obj_type", hwloc_obj_type_string(imi->initiator.location.object.type)); + break; + case HWLOC_LOCATION_TYPE_CPUSET: { + char *setstring; + hwloc_bitmap_asprintf(&setstring, imi->initiator.location.cpuset); + if (setstring) + vstate.new_prop(&vstate, "initiator_cpuset", setstring); + free(setstring); + break; + } + default: + assert(0); + } + vstate.end_object(&vstate, "memattr_value"); + } + } else { + /* just export the global value */ + state->new_child(state, &vstate, "memattr_value"); + vstate.new_prop(&vstate, "target_obj_type", hwloc_obj_type_string(imtg->type)); + snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imtg->gp_index); + vstate.new_prop(&vstate, "target_obj_gp_index", tmp); + snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imtg->noinitiator_value); + vstate.new_prop(&vstate, "value", tmp); + vstate.end_object(&vstate, "memattr_value"); + } +} + +static void +hwloc__xml_export_memattrs(hwloc__xml_export_state_t state, hwloc_topology_t topology) +{ + unsigned id; + for(id=0; idnr_memattrs; id++) { + struct hwloc_internal_memattr_s *imattr; + struct hwloc__xml_export_state_s mstate; + char tmp[255]; + unsigned j; + + if (id == HWLOC_MEMATTR_ID_CAPACITY || id == HWLOC_MEMATTR_ID_LOCALITY) + /* no need to export virtual memattrs */ + continue; + + imattr = &topology->memattrs[id]; + if ((id == HWLOC_MEMATTR_ID_LATENCY || id == HWLOC_MEMATTR_ID_BANDWIDTH) + && !imattr->nr_targets) + /* no need to export target-less attributes for initial attributes, no release support attributes without those definitions */ + continue; + + state->new_child(state, &mstate, "memattr"); + mstate.new_prop(&mstate, "name", imattr->name); + snprintf(tmp, sizeof(tmp), "%lu", imattr->flags); + mstate.new_prop(&mstate, "flags", tmp); + + for(j=0; jnr_targets; j++) + hwloc__xml_export_memattr_target(&mstate, imattr, &imattr->targets[j]); + + mstate.end_object(&mstate, "memattr"); + } +} + +static void +hwloc__xml_export_cpukinds(hwloc__xml_export_state_t state, hwloc_topology_t topology) +{ + unsigned i; + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + struct hwloc__xml_export_state_s cstate; + char *setstring; + unsigned j; + + state->new_child(state, &cstate, "cpukind"); + hwloc_bitmap_asprintf(&setstring, kind->cpuset); + cstate.new_prop(&cstate, "cpuset", setstring); + free(setstring); + if (kind->forced_efficiency != HWLOC_CPUKIND_EFFICIENCY_UNKNOWN) { + char tmp[11]; + snprintf(tmp, sizeof(tmp), "%d", kind->forced_efficiency); + cstate.new_prop(&cstate, "forced_efficiency", tmp); + } + + for(j=0; jnr_infos; j++) { + char *name = hwloc__xml_export_safestrdup(kind->infos[j].name); + char *value = hwloc__xml_export_safestrdup(kind->infos[j].value); + struct hwloc__xml_export_state_s istate; + cstate.new_child(&cstate, &istate, "info"); + istate.new_prop(&istate, "name", name); + istate.new_prop(&istate, "value", value); + istate.end_object(&istate, "info"); + free(name); + free(value); + } + + cstate.end_object(&cstate, "cpukind"); + } +} + void hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t topology, unsigned long flags) { + char *env; hwloc_obj_t root = hwloc_get_root_obj(topology); if (flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) { @@ -2665,6 +3215,11 @@ hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t top } else { hwloc__xml_v2export_object (state, topology, root, flags); hwloc__xml_v2export_distances (state, topology); + env = getenv("HWLOC_XML_EXPORT_SUPPORT"); + if (!env || atoi(env)) + hwloc__xml_v2export_support(state, topology); + hwloc__xml_export_memattrs(state, topology); + hwloc__xml_export_cpukinds(state, topology); } } diff --git a/src/3rdparty/hwloc/src/topology.c b/src/3rdparty/hwloc/src/topology.c index 34692517..94387ece 100644 --- a/src/3rdparty/hwloc/src/topology.c +++ b/src/3rdparty/hwloc/src/topology.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -75,16 +75,49 @@ int hwloc_hide_errors(void) return hide; } -void hwloc_report_os_error(const char *msg, int line) + +/* format the obj info to print in error messages */ +static void +report_insert_error_format_obj(char *buf, size_t buflen, hwloc_obj_t obj) +{ + char typestr[64]; + char *cpusetstr; + char *nodesetstr = NULL; + + hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0); + hwloc_bitmap_asprintf(&cpusetstr, obj->cpuset); + if (obj->nodeset) /* may be missing during insert */ + hwloc_bitmap_asprintf(&nodesetstr, obj->nodeset); + if (obj->os_index != HWLOC_UNKNOWN_INDEX) + snprintf(buf, buflen, "%s (P#%u cpuset %s%s%s)", + typestr, obj->os_index, cpusetstr, + nodesetstr ? " nodeset " : "", + nodesetstr ? nodesetstr : ""); + else + snprintf(buf, buflen, "%s (cpuset %s%s%s)", + typestr, cpusetstr, + nodesetstr ? " nodeset " : "", + nodesetstr ? nodesetstr : ""); + free(cpusetstr); + free(nodesetstr); +} + +static void report_insert_error(hwloc_obj_t new, hwloc_obj_t old, const char *msg, const char *reason) { static int reported = 0; - if (!reported && !hwloc_hide_errors()) { + if (reason && !reported && !hwloc_hide_errors()) { + char newstr[512]; + char oldstr[512]; + report_insert_error_format_obj(newstr, sizeof(newstr), new); + report_insert_error_format_obj(oldstr, sizeof(oldstr), old); + fprintf(stderr, "****************************************************************************\n"); fprintf(stderr, "* hwloc %s received invalid information from the operating system.\n", HWLOC_VERSION); fprintf(stderr, "*\n"); - fprintf(stderr, "* %s\n", msg); - fprintf(stderr, "* Error occurred in topology.c line %d\n", line); + fprintf(stderr, "* Failed with: %s\n", msg); + fprintf(stderr, "* while inserting %s at %s\n", newstr, oldstr); + fprintf(stderr, "* coming from: %s\n", reason); fprintf(stderr, "*\n"); fprintf(stderr, "* The following FAQ entry in the hwloc documentation may help:\n"); fprintf(stderr, "* What should I do when hwloc reports \"operating system\" warnings?\n"); @@ -264,7 +297,7 @@ hwloc_setup_pu_level(struct hwloc_topology *topology, hwloc_debug_2args_bitmap("cpu %u (os %u) has cpuset %s\n", cpu, oscpu, obj->cpuset); - hwloc_insert_object_by_cpuset(topology, obj); + hwloc__insert_object_by_cpuset(topology, NULL, obj, "core:pulevel"); cpu++; } @@ -347,16 +380,18 @@ hwloc_debug_print_object(int indent __hwloc_attribute_unused, hwloc_obj_t obj) static void hwloc_debug_print_objects(int indent __hwloc_attribute_unused, hwloc_obj_t obj) { - hwloc_obj_t child; - hwloc_debug_print_object(indent, obj); - for_each_child (child, obj) - hwloc_debug_print_objects(indent + 1, child); - for_each_memory_child (child, obj) - hwloc_debug_print_objects(indent + 1, child); - for_each_io_child (child, obj) - hwloc_debug_print_objects(indent + 1, child); - for_each_misc_child (child, obj) - hwloc_debug_print_objects(indent + 1, child); + if (hwloc_debug_enabled() >= 2) { + hwloc_obj_t child; + hwloc_debug_print_object(indent, obj); + for_each_child (child, obj) + hwloc_debug_print_objects(indent + 1, child); + for_each_memory_child (child, obj) + hwloc_debug_print_objects(indent + 1, child); + for_each_io_child (child, obj) + hwloc_debug_print_objects(indent + 1, child); + for_each_misc_child (child, obj) + hwloc_debug_print_objects(indent + 1, child); + } } #else /* !HWLOC_DEBUG */ #define hwloc_debug_print_object(indent, obj) do { /* nothing */ } while (0) @@ -472,29 +507,33 @@ int hwloc_obj_add_info(hwloc_obj_t obj, const char *name, const char *value) } /* This function may be called with topology->tma set, it cannot free() or realloc() */ -static int hwloc__tma_dup_infos(struct hwloc_tma *tma, hwloc_obj_t new, hwloc_obj_t src) +int hwloc__tma_dup_infos(struct hwloc_tma *tma, + struct hwloc_info_s **newip, unsigned *newcp, + struct hwloc_info_s *oldi, unsigned oldc) { + struct hwloc_info_s *newi; unsigned i, j; - new->infos = hwloc_tma_calloc(tma, src->infos_count * sizeof(*src->infos)); - if (!new->infos) + newi = hwloc_tma_calloc(tma, oldc * sizeof(*newi)); + if (!newi) return -1; - for(i=0; iinfos_count; i++) { - new->infos[i].name = hwloc_tma_strdup(tma, src->infos[i].name); - new->infos[i].value = hwloc_tma_strdup(tma, src->infos[i].value); - if (!new->infos[i].name || !new->infos[i].value) + for(i=0; iinfos_count = src->infos_count; + *newip = newi; + *newcp = oldc; return 0; failed: assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ for(j=0; j<=i; j++) { - free(new->infos[i].name); - free(new->infos[i].value); + free(newi[i].name); + free(newi[i].value); } - free(new->infos); - new->infos = NULL; + free(newi); + *newip = NULL; return -1; } @@ -812,7 +851,7 @@ hwloc__duplicate_object(struct hwloc_topology *newtopology, newobj->nodeset = hwloc_bitmap_tma_dup(tma, src->nodeset); newobj->complete_nodeset = hwloc_bitmap_tma_dup(tma, src->complete_nodeset); - hwloc__tma_dup_infos(tma, newobj, src); + hwloc__tma_dup_infos(tma, &newobj->infos, &newobj->infos_count, src->infos, src->infos_count); /* find our level */ if (src->depth < 0) { @@ -970,6 +1009,7 @@ hwloc__topology_dup(hwloc_topology_t *newp, memcpy(new->support.discovery, old->support.discovery, sizeof(*old->support.discovery)); memcpy(new->support.cpubind, old->support.cpubind, sizeof(*old->support.cpubind)); memcpy(new->support.membind, old->support.membind, sizeof(*old->support.membind)); + memcpy(new->support.misc, old->support.misc, sizeof(*old->support.misc)); new->allowed_cpuset = hwloc_bitmap_tma_dup(tma, old->allowed_cpuset); new->allowed_nodeset = hwloc_bitmap_tma_dup(tma, old->allowed_nodeset); @@ -1008,6 +1048,14 @@ hwloc__topology_dup(hwloc_topology_t *newp, if (err < 0) goto out_with_topology; + err = hwloc_internal_memattrs_dup(new, old); + if (err < 0) + goto out_with_topology; + + err = hwloc_internal_cpukinds_dup(new, old); + if (err < 0) + goto out_with_topology; + /* we connected everything during duplication */ new->modified = 0; @@ -1229,31 +1277,6 @@ hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2) return 0; } -/* format the obj info to print in error messages */ -static void -hwloc__report_error_format_obj(char *buf, size_t buflen, hwloc_obj_t obj) -{ - char typestr[64]; - char *cpusetstr; - char *nodesetstr = NULL; - hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0); - hwloc_bitmap_asprintf(&cpusetstr, obj->cpuset); - if (obj->nodeset) /* may be missing during insert */ - hwloc_bitmap_asprintf(&nodesetstr, obj->nodeset); - if (obj->os_index != HWLOC_UNKNOWN_INDEX) - snprintf(buf, buflen, "%s (P#%u cpuset %s%s%s)", - typestr, obj->os_index, cpusetstr, - nodesetstr ? " nodeset " : "", - nodesetstr ? nodesetstr : ""); - else - snprintf(buf, buflen, "%s (cpuset %s%s%s)", - typestr, cpusetstr, - nodesetstr ? " nodeset " : "", - nodesetstr ? nodesetstr : ""); - free(cpusetstr); - free(nodesetstr); -} - /* * How to insert objects into the topology. * @@ -1390,9 +1413,9 @@ hwloc__insert_try_merge_group(hwloc_obj_t old, hwloc_obj_t new) */ static struct hwloc_obj * hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur, hwloc_obj_t obj, - hwloc_report_error_t report_error) + const char *reason) { - hwloc_obj_t child, next_child = NULL; + hwloc_obj_t child, next_child = NULL, tmp; /* These will always point to the pointer to their next last child. */ hwloc_obj_t *cur_children = &cur->first_child; hwloc_obj_t *obj_children = &obj->first_child; @@ -1430,18 +1453,10 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur case HWLOC_OBJ_INCLUDED: /* OBJ is strictly contained is some child of CUR, go deeper. */ - return hwloc___insert_object_by_cpuset(topology, child, obj, report_error); + return hwloc___insert_object_by_cpuset(topology, child, obj, reason); case HWLOC_OBJ_INTERSECTS: - if (report_error) { - char childstr[512]; - char objstr[512]; - char msg[1100]; - hwloc__report_error_format_obj(objstr, sizeof(objstr), obj); - hwloc__report_error_format_obj(childstr, sizeof(childstr), child); - snprintf(msg, sizeof(msg), "%s intersects with %s without inclusion!", objstr, childstr); - report_error(msg, __LINE__); - } + report_insert_error(obj, child, "intersection without inclusion", reason); goto putback; case HWLOC_OBJ_DIFFERENT: @@ -1464,6 +1479,8 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur if (setres == HWLOC_OBJ_EQUAL) { obj->memory_first_child = child->memory_first_child; child->memory_first_child = NULL; + for(tmp=obj->memory_first_child; tmp; tmp = tmp->next_sibling) + tmp->parent = obj; } break; } @@ -1483,7 +1500,9 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur return obj; putback: - /* Put-back OBJ children in CUR and return an error. */ + /* OBJ cannot be inserted. + * Put-back OBJ children in CUR and return an error. + */ if (putp) cur_children = putp; /* No need to try to insert before where OBJ was supposed to go */ else @@ -1492,12 +1511,12 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur while ((child = obj->first_child) != NULL) { /* Remove from OBJ */ obj->first_child = child->next_sibling; - obj->parent = cur; - /* Find child position in CUR, and insert. */ + /* Find child position in CUR, and reinsert it. */ while (*cur_children && hwloc__object_cpusets_compare_first(*cur_children, child) < 0) cur_children = &(*cur_children)->next_sibling; child->next_sibling = *cur_children; *cur_children = child; + child->parent = cur; } return NULL; } @@ -1521,7 +1540,7 @@ hwloc__find_obj_covering_memory_cpuset(struct hwloc_topology *topology, hwloc_ob static struct hwloc_obj * hwloc__find_insert_memory_parent(struct hwloc_topology *topology, hwloc_obj_t obj, - hwloc_report_error_t report_error) + const char *reason) { hwloc_obj_t parent, group, result; @@ -1573,7 +1592,7 @@ hwloc__find_insert_memory_parent(struct hwloc_topology *topology, hwloc_obj_t ob return parent; } - result = hwloc__insert_object_by_cpuset(topology, parent, group, report_error); + result = hwloc__insert_object_by_cpuset(topology, parent, group, reason); if (!result) { /* failed to insert, fallback to larger parent */ return parent; @@ -1586,8 +1605,7 @@ hwloc__find_insert_memory_parent(struct hwloc_topology *topology, hwloc_obj_t ob /* only works for MEMCACHE and NUMAnode with a single bit in nodeset */ static hwloc_obj_t hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_obj_t parent, - hwloc_obj_t obj, - hwloc_report_error_t report_error) + hwloc_obj_t obj, const char *reason) { hwloc_obj_t *curp = &parent->memory_first_child; unsigned first = hwloc_bitmap_first(obj->nodeset); @@ -1611,20 +1629,12 @@ hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_o if (obj->type == HWLOC_OBJ_NUMANODE) { if (cur->type == HWLOC_OBJ_NUMANODE) { /* identical NUMA nodes? ignore the new one */ - if (report_error) { - char curstr[512]; - char objstr[512]; - char msg[1100]; - hwloc__report_error_format_obj(curstr, sizeof(curstr), cur); - hwloc__report_error_format_obj(objstr, sizeof(objstr), obj); - snprintf(msg, sizeof(msg), "%s and %s have identical nodesets!", objstr, curstr); - report_error(msg, __LINE__); - } + report_insert_error(obj, cur, "NUMAnodes with identical nodesets", reason); return NULL; } assert(cur->type == HWLOC_OBJ_MEMCACHE); /* insert the new NUMA node below that existing memcache */ - return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, report_error); + return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, reason); } else { assert(obj->type == HWLOC_OBJ_MEMCACHE); @@ -1637,7 +1647,7 @@ hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_o * (depth starts from the NUMA node). * insert the new memcache below the existing one */ - return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, report_error); + return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, reason); } /* insert the memcache above the existing memcache or numa node */ obj->next_sibling = cur->next_sibling; @@ -1673,8 +1683,7 @@ hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_o */ struct hwloc_obj * hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, - hwloc_obj_t obj, - hwloc_report_error_t report_error) + hwloc_obj_t obj, const char *reason) { hwloc_obj_t result; @@ -1704,7 +1713,7 @@ hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_bitmap_copy(obj->complete_cpuset, parent->complete_cpuset); #endif - result = hwloc___attach_memory_object_by_nodeset(topology, parent, obj, report_error); + result = hwloc___attach_memory_object_by_nodeset(topology, parent, obj, reason); if (result == obj) { /* Add the bit to the top sets, and to the parent CPU-side object */ if (obj->type == HWLOC_OBJ_NUMANODE) { @@ -1722,8 +1731,7 @@ hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, /* insertion routine that lets you change the error reporting callback */ struct hwloc_obj * hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root, - hwloc_obj_t obj, - hwloc_report_error_t report_error) + hwloc_obj_t obj, const char *reason) { struct hwloc_obj *result; @@ -1740,20 +1748,20 @@ hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root if (hwloc__obj_type_is_memory(obj->type)) { if (!root) { - root = hwloc__find_insert_memory_parent(topology, obj, report_error); + root = hwloc__find_insert_memory_parent(topology, obj, reason); if (!root) { hwloc_free_unlinked_object(obj); return NULL; } } - return hwloc__attach_memory_object(topology, root, obj, report_error); + return hwloc__attach_memory_object(topology, root, obj, reason); } if (!root) /* Start at the top. */ root = topology->levels[0][0]; - result = hwloc___insert_object_by_cpuset(topology, root, obj, report_error); + result = hwloc___insert_object_by_cpuset(topology, root, obj, reason); if (result && result->type == HWLOC_OBJ_PU) { /* Add the bit to the top sets */ if (hwloc_bitmap_isset(result->cpuset, result->os_index)) @@ -1769,12 +1777,6 @@ hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root /* the default insertion routine warns in case of error. * it's used by most backends */ -struct hwloc_obj * -hwloc_insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj) -{ - return hwloc__insert_object_by_cpuset(topology, NULL, obj, hwloc_report_os_error); -} - void hwloc_insert_object_by_parent(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_obj_t obj) { @@ -1917,6 +1919,7 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t if (hwloc_bitmap_isset(nodeset, numa->os_index)) hwloc_bitmap_or(obj->cpuset, obj->cpuset, numa->cpuset); } + /* FIXME insert by nodeset to group NUMAs even if CPUless? */ cmp = hwloc_obj_cmp_sets(obj, root); if (cmp == HWLOC_OBJ_INCLUDED) { @@ -2047,7 +2050,7 @@ hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_topology *topology, hwloc_bitmap_and(cpuset, cpuset, hwloc_topology_get_topology_cpuset(topology)); group_obj->cpuset = hwloc_bitmap_dup(cpuset); group_obj->attr->group.kind = HWLOC_GROUP_KIND_IO; - parent = hwloc__insert_object_by_cpuset(topology, largeparent, group_obj, hwloc_report_os_error); + parent = hwloc__insert_object_by_cpuset(topology, largeparent, group_obj, "topology:io_parent"); if (!parent) /* Failed to insert the Group, maybe a conflicting cpuset */ return largeparent; @@ -3251,7 +3254,7 @@ hwloc_discover(struct hwloc_topology *topology, * produced by hwloc_setup_pu_level() */ - /* To be able to just use hwloc_insert_object_by_cpuset to insert the object + /* To be able to just use hwloc__insert_object_by_cpuset to insert the object * in the topology according to the cpuset, the cpuset field must be * initialized. */ @@ -3356,7 +3359,7 @@ hwloc_discover(struct hwloc_topology *topology, hwloc_bitmap_set(node->nodeset, 0); memcpy(&node->attr->numanode, &topology->machine_memory, sizeof(topology->machine_memory)); memset(&topology->machine_memory, 0, sizeof(topology->machine_memory)); - hwloc_insert_object_by_cpuset(topology, node); + hwloc__insert_object_by_cpuset(topology, NULL, node, "core:defaultnumanode"); } else { /* if we're sure we found all NUMA nodes without their sizes (x86 backend?), * we could split topology->total_memory in all of them. @@ -3514,6 +3517,7 @@ hwloc_topology_setup_defaults(struct hwloc_topology *topology) memset(topology->support.discovery, 0, sizeof(*topology->support.discovery)); memset(topology->support.cpubind, 0, sizeof(*topology->support.cpubind)); memset(topology->support.membind, 0, sizeof(*topology->support.membind)); + memset(topology->support.misc, 0, sizeof(*topology->support.misc)); /* Only the System object on top by default */ topology->next_gp_index = 1; /* keep 0 as an invalid value */ @@ -3590,6 +3594,7 @@ hwloc__topology_init (struct hwloc_topology **topologyp, topology->support.discovery = hwloc_tma_malloc(tma, sizeof(*topology->support.discovery)); topology->support.cpubind = hwloc_tma_malloc(tma, sizeof(*topology->support.cpubind)); topology->support.membind = hwloc_tma_malloc(tma, sizeof(*topology->support.membind)); + topology->support.misc = hwloc_tma_malloc(tma, sizeof(*topology->support.misc)); topology->nb_levels_allocated = nblevels; /* enough for default 10 levels = Mach+Pack+Die+NUMA+L3+L2+L1d+L1i+Co+PU */ topology->levels = hwloc_tma_calloc(tma, topology->nb_levels_allocated * sizeof(*topology->levels)); @@ -3598,6 +3603,8 @@ hwloc__topology_init (struct hwloc_topology **topologyp, hwloc__topology_filter_init(topology); hwloc_internal_distances_init(topology); + hwloc_internal_memattrs_init(topology); + hwloc_internal_cpukinds_init(topology); topology->userdata_export_cb = NULL; topology->userdata_import_cb = NULL; @@ -3691,7 +3698,7 @@ hwloc_topology_set_flags (struct hwloc_topology *topology, unsigned long flags) return -1; } - if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) { + if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES|HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT)) { errno = EINVAL; return -1; } @@ -3827,7 +3834,9 @@ hwloc_topology_clear (struct hwloc_topology *topology) { /* no need to set to NULL after free() since callers will call setup_defaults() or just destroy the rest of the topology */ unsigned l; + hwloc_internal_cpukinds_destroy(topology); hwloc_internal_distances_destroy(topology); + hwloc_internal_memattrs_destroy(topology); hwloc_free_object_and_children(topology->levels[0][0]); hwloc_bitmap_free(topology->allowed_cpuset); hwloc_bitmap_free(topology->allowed_nodeset); @@ -3858,6 +3867,7 @@ hwloc_topology_destroy (struct hwloc_topology *topology) free(topology->support.discovery); free(topology->support.cpubind); free(topology->support.membind); + free(topology->support.misc); free(topology); } @@ -3873,7 +3883,9 @@ hwloc_topology_load (struct hwloc_topology *topology) return -1; } + /* initialize envvar-related things */ hwloc_internal_distances_prepare(topology); + hwloc_internal_memattrs_prepare(topology); if (getenv("HWLOC_XML_USERDATA_NOT_DECODED")) topology->userdata_not_decoded = 1; @@ -3954,6 +3966,9 @@ hwloc_topology_load (struct hwloc_topology *topology) #endif hwloc_topology_check(topology); + /* Rank cpukinds */ + hwloc_internal_cpukinds_rank(topology); + /* Mark distances objs arrays as invalid since we may have removed objects * from the topology after adding the distances (remove_empty, etc). * It would be hard to actually verify whether it's needed. @@ -3964,6 +3979,10 @@ hwloc_topology_load (struct hwloc_topology *topology) */ hwloc_internal_distances_refresh(topology); + /* Same for memattrs */ + hwloc_internal_memattrs_need_refresh(topology); + hwloc_internal_memattrs_refresh(topology); + topology->is_loaded = 1; if (topology->backend_phases & HWLOC_DISC_PHASE_TWEAK) { @@ -4246,10 +4265,12 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_bitmap_t se /* some objects may have disappeared, we need to update distances objs arrays */ hwloc_internal_distances_invalidate_cached_objs(topology); + hwloc_internal_memattrs_need_refresh(topology); hwloc_filter_levels_keep_structure(topology); hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]); propagate_total_memory(topology->levels[0][0]); + hwloc_internal_cpukinds_restrict(topology); #ifndef HWLOC_DEBUG if (getenv("HWLOC_DEBUG_CHECK")) @@ -4334,6 +4355,15 @@ hwloc_topology_allow(struct hwloc_topology *topology, return -1; } +int +hwloc_topology_refresh(struct hwloc_topology *topology) +{ + hwloc_internal_cpukinds_rank(topology); + hwloc_internal_distances_refresh(topology); + hwloc_internal_memattrs_refresh(topology); + return 0; +} + int hwloc_topology_is_thissystem(struct hwloc_topology *topology) { diff --git a/src/3rdparty/hwloc/src/traversal.c b/src/3rdparty/hwloc/src/traversal.c index 0b744d78..f9076ab5 100644 --- a/src/3rdparty/hwloc/src/traversal.c +++ b/src/3rdparty/hwloc/src/traversal.c @@ -1,7 +1,7 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2019 Inria. All rights reserved. - * Copyright © 2009-2010 Université Bordeaux + * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2010, 2020 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ @@ -138,6 +138,37 @@ hwloc_obj_type_is_icache(hwloc_obj_type_t type) return hwloc__obj_type_is_icache(type); } +static hwloc_obj_t hwloc_get_obj_by_depth_and_gp_index(hwloc_topology_t topology, unsigned depth, uint64_t gp_index) +{ + hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0); + while (obj) { + if (obj->gp_index == gp_index) + return obj; + obj = obj->next_cousin; + } + return NULL; +} + +hwloc_obj_t hwloc_get_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) + return NULL; + if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) { + for(depth=1 /* no multiple machine levels */; + (unsigned) depth < topology->nb_levels-1 /* no multiple PU levels */; + depth++) { + if (hwloc_get_depth_type(topology, depth) == type) { + hwloc_obj_t obj = hwloc_get_obj_by_depth_and_gp_index(topology, depth, gp_index); + if (obj) + return obj; + } + } + return NULL; + } + return hwloc_get_obj_by_depth_and_gp_index(topology, depth, gp_index); +} + unsigned hwloc_get_closest_objs (struct hwloc_topology *topology, struct hwloc_obj *src, struct hwloc_obj **objs, unsigned max) { struct hwloc_obj *parent, *nextparent, **src_objs; @@ -654,7 +685,11 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t unsigned i; for(i=0; iinfos_count; i++) { struct hwloc_info_s *info = &obj->infos[i]; - const char *quote = strchr(info->value, ' ') ? "\"" : ""; + const char *quote; + if (strchr(info->value, ' ')) + quote = "\""; + else + quote = ""; res = hwloc_snprintf(tmp, tmplen, "%s%s=%s%s%s", prefix, info->name, @@ -673,3 +708,31 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t return ret; } + +int hwloc_bitmap_singlify_per_core(hwloc_topology_t topology, hwloc_bitmap_t cpuset, unsigned which) +{ + hwloc_obj_t core = NULL; + while ((core = hwloc_get_next_obj_covering_cpuset_by_type(topology, cpuset, HWLOC_OBJ_CORE, core)) != NULL) { + /* this core has some PUs in the cpuset, find the index-th one */ + unsigned i = 0; + int pu = -1; + do { + pu = hwloc_bitmap_next(core->cpuset, pu); + if (pu == -1) { + /* no which-th PU in cpuset and core, remove the entire core */ + hwloc_bitmap_andnot(cpuset, cpuset, core->cpuset); + break; + } + if (hwloc_bitmap_isset(cpuset, pu)) { + if (i == which) { + /* remove the entire core except that exact pu */ + hwloc_bitmap_andnot(cpuset, cpuset, core->cpuset); + hwloc_bitmap_set(cpuset, pu); + break; + } + i++; + } + } while (1); + } + return 0; +}