Merge branch 'dev'
This commit is contained in:
commit
3c985eef25
247 changed files with 6793 additions and 6107 deletions
12
CHANGELOG.md
12
CHANGELOG.md
|
@ -1,3 +1,15 @@
|
|||
# v6.7.0
|
||||
- **[#1991](https://github.com/xmrig/xmrig/issues/1991) Added Apple M1 processor support.**
|
||||
- **[#1986](https://github.com/xmrig/xmrig/pull/1986) Up to 20-30% faster RandomX dataset initialization with AVX2 on some CPUs.**
|
||||
- [#1964](https://github.com/xmrig/xmrig/pull/1964) Cleanup and refactoring.
|
||||
- [#1966](https://github.com/xmrig/xmrig/pull/1966) Removed libcpuid support.
|
||||
- [#1968](https://github.com/xmrig/xmrig/pull/1968) Added virtual machine detection.
|
||||
- [#1969](https://github.com/xmrig/xmrig/pull/1969) [#1970](https://github.com/xmrig/xmrig/pull/1970) Fixed errors found by static analysis.
|
||||
- [#1977](https://github.com/xmrig/xmrig/pull/1977) Fixed: secure JIT and huge pages are incompatible on Windows.
|
||||
- [#1979](https://github.com/xmrig/xmrig/pull/1979) Term `x64` replaced to `64-bit`.
|
||||
- [#1980](https://github.com/xmrig/xmrig/pull/1980) Fixed build on gcc 11.
|
||||
- [#1989](https://github.com/xmrig/xmrig/pull/1989) Fixed broken Dero solo mining.
|
||||
|
||||
# v6.6.2
|
||||
- [#1958](https://github.com/xmrig/xmrig/pull/1958) Added example mining scripts to help new miners.
|
||||
- [#1959](https://github.com/xmrig/xmrig/pull/1959) Optimized JIT compiler.
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
cmake_minimum_required(VERSION 2.8)
|
||||
cmake_minimum_required(VERSION 2.8.12)
|
||||
project(xmrig)
|
||||
|
||||
option(WITH_LIBCPUID "Enable libcpuid support" ON)
|
||||
option(WITH_HWLOC "Enable hwloc support" ON)
|
||||
option(WITH_CN_LITE "Enable CryptoNight-Lite algorithms family" ON)
|
||||
option(WITH_CN_HEAVY "Enable CryptoNight-Heavy algorithms family" ON)
|
||||
|
@ -26,6 +25,7 @@ option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
|
|||
option(WITH_PROFILING "Enable profiling for developers" OFF)
|
||||
option(WITH_SSE4_1 "Enable SSE 4.1 for Blake2" ON)
|
||||
option(WITH_BENCHMARK "Enable builtin RandomX benchmark and stress test" ON)
|
||||
option(WITH_SECURE_JIT "Enable secure access to JIT memory" OFF)
|
||||
|
||||
option(BUILD_STATIC "Build static binary" OFF)
|
||||
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)
|
||||
|
@ -168,7 +168,7 @@ else()
|
|||
endif()
|
||||
endif()
|
||||
|
||||
add_definitions(-DXMRIG_MINER_PROJECT)
|
||||
add_definitions(-DXMRIG_MINER_PROJECT -DXMRIG_JSON_SINGLE_LINE_ARRAY)
|
||||
add_definitions(-D__STDC_FORMAT_MACROS -DUNICODE)
|
||||
|
||||
find_package(UV REQUIRED)
|
||||
|
@ -205,7 +205,7 @@ if (WITH_DEBUG_LOG)
|
|||
add_definitions(/DAPP_DEBUG)
|
||||
endif()
|
||||
|
||||
add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES})
|
||||
add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES})
|
||||
target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY})
|
||||
|
||||
if (WIN32)
|
||||
|
@ -216,6 +216,6 @@ if (WIN32)
|
|||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/solo_mine_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
endif()
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES Clang AND CMAKE_BUILD_TYPE STREQUAL Release)
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES Clang AND CMAKE_BUILD_TYPE STREQUAL Release AND NOT CMAKE_GENERATOR STREQUAL Xcode)
|
||||
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_STRIP} ${CMAKE_PROJECT_NAME})
|
||||
endif()
|
||||
|
|
|
@ -18,7 +18,6 @@ endif()
|
|||
|
||||
if (ARM_TARGET AND ARM_TARGET GREATER 6)
|
||||
set(XMRIG_ARM ON)
|
||||
set(WITH_LIBCPUID OFF)
|
||||
add_definitions(/DXMRIG_ARM)
|
||||
|
||||
message(STATUS "Use ARM_TARGET=${ARM_TARGET} (${CMAKE_SYSTEM_PROCESSOR})")
|
||||
|
|
|
@ -32,6 +32,10 @@ elseif(XMRIG_OS_APPLE)
|
|||
else()
|
||||
add_definitions(/DXMRIG_OS_MACOS)
|
||||
endif()
|
||||
|
||||
if (XMRIG_ARM)
|
||||
set(WITH_SECURE_JIT ON)
|
||||
endif()
|
||||
elseif(XMRIG_OS_UNIX)
|
||||
add_definitions(/DXMRIG_OS_UNIX)
|
||||
|
||||
|
@ -43,3 +47,7 @@ elseif(XMRIG_OS_UNIX)
|
|||
add_definitions(/DXMRIG_OS_FREEBSD)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WITH_SECURE_JIT)
|
||||
add_definitions(/DXMRIG_SECURE_JIT)
|
||||
endif()
|
||||
|
|
|
@ -61,7 +61,11 @@ if (WITH_RANDOMX)
|
|||
src/crypto/randomx/jit_compiler_a64.cpp
|
||||
)
|
||||
# cheat because cmake and ccache hate each other
|
||||
set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
|
||||
if (CMAKE_GENERATOR STREQUAL Xcode)
|
||||
set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE ASM)
|
||||
else()
|
||||
set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
|
||||
endif()
|
||||
else()
|
||||
list(APPEND SOURCES_CRYPTO
|
||||
src/crypto/randomx/jit_compiler_fallback.cpp
|
||||
|
@ -108,6 +112,13 @@ if (WITH_RANDOMX)
|
|||
remove_definitions(/DXMRIG_FIX_RYZEN)
|
||||
message("-- WITH_MSR=OFF")
|
||||
endif()
|
||||
|
||||
if (WITH_PROFILING)
|
||||
add_definitions(/DXMRIG_FEATURE_PROFILING)
|
||||
|
||||
list(APPEND HEADERS_CRYPTO src/crypto/rx/Profiler.h)
|
||||
list(APPEND SOURCES_CRYPTO src/crypto/rx/Profiler.cpp)
|
||||
endif()
|
||||
else()
|
||||
remove_definitions(/DXMRIG_ALGO_RANDOMX)
|
||||
endif()
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
@echo off
|
||||
cd %~dp0
|
||||
xmrig.exe --bench=10M --submit
|
||||
pause
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
@echo off
|
||||
cd %~dp0
|
||||
xmrig.exe --bench=1M --submit
|
||||
pause
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/bash -e
|
||||
|
||||
HWLOC_VERSION="2.2.0"
|
||||
HWLOC_VERSION="2.4.0"
|
||||
|
||||
mkdir -p deps
|
||||
mkdir -p deps/include
|
||||
|
@ -8,7 +8,7 @@ mkdir -p deps/lib
|
|||
|
||||
mkdir -p build && cd build
|
||||
|
||||
wget https://download.open-mpi.org/release/hwloc/v2.2/hwloc-${HWLOC_VERSION}.tar.gz -O hwloc-${HWLOC_VERSION}.tar.gz
|
||||
wget https://download.open-mpi.org/release/hwloc/v2.4/hwloc-${HWLOC_VERSION}.tar.gz -O hwloc-${HWLOC_VERSION}.tar.gz
|
||||
tar -xzf hwloc-${HWLOC_VERSION}.tar.gz
|
||||
|
||||
cd hwloc-${HWLOC_VERSION}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/bash -e
|
||||
|
||||
OPENSSL_VERSION="1.1.1h"
|
||||
OPENSSL_VERSION="1.1.1i"
|
||||
|
||||
mkdir -p deps
|
||||
mkdir -p deps/include
|
||||
|
|
|
@ -15,5 +15,6 @@
|
|||
:: Choose pools outside of top 5 to help Monero network be more decentralized!
|
||||
:: Smaller pools also often have smaller fees/payout limits.
|
||||
|
||||
cd %~dp0
|
||||
xmrig.exe -o pool.hashvault.pro:3333 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD -p x
|
||||
pause
|
||||
|
|
|
@ -11,5 +11,6 @@
|
|||
:: Mining solo is the best way to help Monero network be more decentralized!
|
||||
:: But you will only get a payout when you find a block which can take more than a year for a single low-end PC.
|
||||
|
||||
cd %~dp0
|
||||
xmrig.exe -o node.xmr.to:18081 -a rx/0 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD --daemon
|
||||
pause
|
||||
|
|
2
src/3rdparty/argon2/CMakeLists.txt
vendored
2
src/3rdparty/argon2/CMakeLists.txt
vendored
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 2.8)
|
||||
cmake_minimum_required(VERSION 2.8.12)
|
||||
|
||||
project(argon2 C)
|
||||
set(CMAKE_C_STANDARD 99)
|
||||
|
|
2
src/3rdparty/fmt/format-inl.h
vendored
2
src/3rdparty/fmt/format-inl.h
vendored
|
@ -1754,7 +1754,7 @@ inline bool divisible_by_power_of_2(uint64_t x, int exp) FMT_NOEXCEPT {
|
|||
#ifdef FMT_BUILTIN_CTZLL
|
||||
return FMT_BUILTIN_CTZLL(x) >= exp;
|
||||
#else
|
||||
return exp < num_bits<uint64_t>()) && x == ((x >> exp) << exp);
|
||||
return (exp < num_bits<uint64_t>()) && x == ((x >> exp) << exp);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
36
src/3rdparty/hwloc/CMakeLists.txt
vendored
36
src/3rdparty/hwloc/CMakeLists.txt
vendored
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required (VERSION 2.8)
|
||||
cmake_minimum_required (VERSION 2.8.12)
|
||||
project (hwloc C)
|
||||
|
||||
include_directories(include)
|
||||
|
@ -13,23 +13,25 @@ set(HEADERS
|
|||
)
|
||||
|
||||
set(SOURCES
|
||||
src/base64.c
|
||||
src/bind.c
|
||||
src/bitmap.c
|
||||
src/components.c
|
||||
src/diff.c
|
||||
src/distances.c
|
||||
src/misc.c
|
||||
src/pci-common.c
|
||||
src/shmem.c
|
||||
src/topology.c
|
||||
src/topology-noos.c
|
||||
src/topology-synthetic.c
|
||||
src/topology-windows.c
|
||||
src/topology-x86.c
|
||||
src/topology-xml.c
|
||||
src/topology-xml-nolibxml.c
|
||||
src/base64.c
|
||||
src/bind.c
|
||||
src/bitmap.c
|
||||
src/components.c
|
||||
src/diff.c
|
||||
src/distances.c
|
||||
src/misc.c
|
||||
src/pci-common.c
|
||||
src/shmem.c
|
||||
src/topology.c
|
||||
src/topology-noos.c
|
||||
src/topology-synthetic.c
|
||||
src/topology-windows.c
|
||||
src/topology-x86.c
|
||||
src/topology-xml.c
|
||||
src/topology-xml-nolibxml.c
|
||||
src/traversal.c
|
||||
src/memattrs.c
|
||||
src/cpukinds.c
|
||||
)
|
||||
|
||||
add_library(hwloc STATIC
|
||||
|
|
71
src/3rdparty/hwloc/NEWS
vendored
71
src/3rdparty/hwloc/NEWS
vendored
|
@ -2,6 +2,7 @@ Copyright © 2009 CNRS
|
|||
Copyright © 2009-2020 Inria. All rights reserved.
|
||||
Copyright © 2009-2013 Université Bordeaux
|
||||
Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
Copyright © 2020 Hewlett Packard Enterprise. All rights reserved.
|
||||
|
||||
$COPYRIGHT$
|
||||
|
||||
|
@ -16,6 +17,76 @@ bug fixes (and other actions) for each version of hwloc since version
|
|||
0.9.
|
||||
|
||||
|
||||
Version 2.4.0
|
||||
-------------
|
||||
* API
|
||||
+ Add hwloc/cpukinds.h for reporting information about hybrid CPUs.
|
||||
- Use Linux cpufreq frequencies to rank cores by efficiency.
|
||||
- Use x86 CPUID hybrid leaf and future Linux kernels sysfs CPU type
|
||||
files to identify Intel Atom and Core cores.
|
||||
- Use the Windows native EfficiencyClass to separate kinds.
|
||||
* Backends
|
||||
+ Properly handle Linux kernel 5.10+ exposing ACPI HMAT information
|
||||
with knowledge of Generic Initiators.
|
||||
* Tools
|
||||
+ lstopo has new --cpukinds and --no-cpukinds options for showing
|
||||
CPU kinds or not in textual and graphical modes respectively.
|
||||
+ hwloc-calc has a new --cpukind option for filtering PUs by kind.
|
||||
+ hwloc-annotate has a new cpukind command for modifying CPU kinds.
|
||||
* Misc
|
||||
+ Fix hwloc_bitmap_nr_ulongs(), thanks to Norbert Eicker.
|
||||
+ Add a documentation section about
|
||||
"Topology Attributes: Distances, Memory Attributes and CPU Kinds".
|
||||
+ Silence some spurious warnings in the OpenCL backend and when showing
|
||||
process binding with lstopo --ps.
|
||||
|
||||
|
||||
Version 2.3.0
|
||||
-------------
|
||||
* API
|
||||
+ Add hwloc/memattrs.h for exposing latency/bandwidth information
|
||||
between initiators (CPU sets for now) and target NUMA nodes,
|
||||
typically on heterogeneous platforms.
|
||||
- When available, bandwidths and latencies are read from the ACPI HMAT
|
||||
table exposed by Linux kernel 5.2+.
|
||||
- Attributes may also be customized to expose user-defined performance
|
||||
information.
|
||||
+ Add hwloc_get_local_numanode_objs() for listing NUMA nodes that are
|
||||
local to some locality.
|
||||
+ The new topology flag HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT causes
|
||||
support arrays to be loaded from XML exported with hwloc 2.3+.
|
||||
- hwloc_topology_get_support() now returns an additional "misc"
|
||||
array with feature "imported_support" set when support was imported.
|
||||
+ Add hwloc_topology_refresh() to refresh internal caches after modifying
|
||||
the topology and before consulting the topology in a multithread context.
|
||||
* Backends
|
||||
+ Add a ROCm SMI backend and a hwloc/rsmi.h helper file for getting
|
||||
the locality of AMD GPUs, now exposed as "rsmi" OS devices.
|
||||
Thanks to Mike Li.
|
||||
+ Remove POWER device-tree-based topology on Linux,
|
||||
(it was disabled by default since 2.1).
|
||||
* Tools
|
||||
+ Command-line options for specifying flags now understand comma-separated
|
||||
lists of flag names (substrings).
|
||||
+ hwloc-info and hwloc-calc have new --local-memory --local-memory-flags
|
||||
and --best-memattr options for reporting local memory nodes and filtering
|
||||
by memory attributes.
|
||||
+ hwloc-bind has a new --best-memattr option for filtering by memory attributes
|
||||
among the memory binding set.
|
||||
+ Tools that have a --restrict option may now receive a nodeset or
|
||||
some custom flags for restricting the topology.
|
||||
+ lstopo now has a --thickness option for changing line thickness in the
|
||||
graphical output.
|
||||
+ Fix lstopo drawing when autoresizing on Windows 10.
|
||||
+ Pressing the F5 key in lstopo X11 and Windows graphical/interactive outputs
|
||||
now refreshes the display according to the current topology and binding.
|
||||
+ Add a tikz lstopo graphical backend to generate picture easily included into
|
||||
LaTeX documents. Thanks to Clement Foyer.
|
||||
* Misc
|
||||
+ The default installation path of the Bash completion file has changed to
|
||||
${datadir}/bash-completion/completions/hwloc. Thanks to Tomasz Kłoczko.
|
||||
|
||||
|
||||
Version 2.2.0
|
||||
-------------
|
||||
* API
|
||||
|
|
8
src/3rdparty/hwloc/README
vendored
8
src/3rdparty/hwloc/README
vendored
|
@ -23,9 +23,9 @@ APIs are documented after these sections.
|
|||
|
||||
Installation
|
||||
|
||||
hwloc (http://www.open-mpi.org/projects/hwloc/) is available under the BSD
|
||||
license. It is hosted as a sub-project of the overall Open MPI project (http://
|
||||
www.open-mpi.org/). Note that hwloc does not require any functionality from
|
||||
hwloc (https://www.open-mpi.org/projects/hwloc/) is available under the BSD
|
||||
license. It is hosted as a sub-project of the overall Open MPI project (https:/
|
||||
/www.open-mpi.org/). Note that hwloc does not require any functionality from
|
||||
Open MPI -- it is a wholly separate (and much smaller!) project and code base.
|
||||
It just happens to be hosted as part of the overall Open MPI project.
|
||||
|
||||
|
@ -75,7 +75,7 @@ Bugs should be reported in the tracker (https://github.com/open-mpi/hwloc/
|
|||
issues). Opening a new issue automatically displays lots of hints about how to
|
||||
debug and report issues.
|
||||
|
||||
Questions may be sent to the users or developers mailing lists (http://
|
||||
Questions may be sent to the users or developers mailing lists (https://
|
||||
www.open-mpi.org/community/lists/hwloc.php).
|
||||
|
||||
There is also a #hwloc IRC channel on Freenode (irc.freenode.net).
|
||||
|
|
6
src/3rdparty/hwloc/VERSION
vendored
6
src/3rdparty/hwloc/VERSION
vendored
|
@ -8,7 +8,7 @@
|
|||
# Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too.
|
||||
|
||||
major=2
|
||||
minor=2
|
||||
minor=4
|
||||
release=0
|
||||
|
||||
# greek is used for alpha or beta release tags. If it is non-empty,
|
||||
|
@ -22,7 +22,7 @@ greek=
|
|||
|
||||
# The date when this release was created
|
||||
|
||||
date="Mar 30, 2020"
|
||||
date="Nov 26, 2020"
|
||||
|
||||
# If snapshot=1, then use the value from snapshot_version as the
|
||||
# entire hwloc version (i.e., ignore major, minor, release, and
|
||||
|
@ -41,7 +41,7 @@ snapshot_version=${major}.${minor}.${release}${greek}-git
|
|||
# 2. Version numbers are described in the Libtool current:revision:age
|
||||
# format.
|
||||
|
||||
libhwloc_so_version=17:0:2
|
||||
libhwloc_so_version=19:0:4
|
||||
libnetloc_so_version=0:0:0
|
||||
|
||||
# Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj
|
||||
|
|
117
src/3rdparty/hwloc/include/hwloc.h
vendored
117
src/3rdparty/hwloc/include/hwloc.h
vendored
|
@ -2,7 +2,7 @@
|
|||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
|||
* ------------------------------------------------
|
||||
* $tarball_directory/doc/doxygen-doc/
|
||||
* or
|
||||
* http://www.open-mpi.org/projects/hwloc/doc/
|
||||
* https://www.open-mpi.org/projects/hwloc/doc/
|
||||
*=====================================================================
|
||||
*
|
||||
* FAIR WARNING: Do NOT expect to be able to figure out all the
|
||||
|
@ -93,7 +93,7 @@ extern "C" {
|
|||
* Two stable releases of the same series usually have the same ::HWLOC_API_VERSION
|
||||
* even if their HWLOC_VERSION are different.
|
||||
*/
|
||||
#define HWLOC_API_VERSION 0x00020100
|
||||
#define HWLOC_API_VERSION 0x00020400
|
||||
|
||||
/** \brief Indicate at runtime which hwloc API version was used at build time.
|
||||
*
|
||||
|
@ -102,7 +102,7 @@ extern "C" {
|
|||
HWLOC_DECLSPEC unsigned hwloc_get_api_version(void);
|
||||
|
||||
/** \brief Current component and plugin ABI version (see hwloc/plugins.h) */
|
||||
#define HWLOC_COMPONENT_ABI 6
|
||||
#define HWLOC_COMPONENT_ABI 7
|
||||
|
||||
/** @} */
|
||||
|
||||
|
@ -196,7 +196,7 @@ typedef enum {
|
|||
*/
|
||||
HWLOC_OBJ_CORE, /**< \brief Core.
|
||||
* A computation unit (may be shared by several
|
||||
* logical processors).
|
||||
* PUs, aka logical processors).
|
||||
*/
|
||||
HWLOC_OBJ_PU, /**< \brief Processing Unit, or (Logical) Processor.
|
||||
* An execution unit (may share a core with some
|
||||
|
@ -257,22 +257,31 @@ typedef enum {
|
|||
HWLOC_OBJ_BRIDGE, /**< \brief Bridge (filtered out by default).
|
||||
* Any bridge (or PCI switch) that connects the host or an I/O bus,
|
||||
* to another I/O bus.
|
||||
* They are not added to the topology unless I/O discovery
|
||||
* is enabled with hwloc_topology_set_flags().
|
||||
*
|
||||
* Bridges are not added to the topology unless their
|
||||
* filtering is changed (see hwloc_topology_set_type_filter()
|
||||
* and hwloc_topology_set_io_types_filter()).
|
||||
*
|
||||
* I/O objects are not listed in the main children list,
|
||||
* but rather in the dedicated io children list.
|
||||
* I/O objects have NULL CPU and node sets.
|
||||
*/
|
||||
HWLOC_OBJ_PCI_DEVICE, /**< \brief PCI device (filtered out by default).
|
||||
* They are not added to the topology unless I/O discovery
|
||||
* is enabled with hwloc_topology_set_flags().
|
||||
*
|
||||
* PCI devices are not added to the topology unless their
|
||||
* filtering is changed (see hwloc_topology_set_type_filter()
|
||||
* and hwloc_topology_set_io_types_filter()).
|
||||
*
|
||||
* I/O objects are not listed in the main children list,
|
||||
* but rather in the dedicated io children list.
|
||||
* I/O objects have NULL CPU and node sets.
|
||||
*/
|
||||
HWLOC_OBJ_OS_DEVICE, /**< \brief Operating system device (filtered out by default).
|
||||
* They are not added to the topology unless I/O discovery
|
||||
* is enabled with hwloc_topology_set_flags().
|
||||
*
|
||||
* OS devices are not added to the topology unless their
|
||||
* filtering is changed (see hwloc_topology_set_type_filter()
|
||||
* and hwloc_topology_set_io_types_filter()).
|
||||
*
|
||||
* I/O objects are not listed in the main children list,
|
||||
* but rather in the dedicated io children list.
|
||||
* I/O objects have NULL CPU and node sets.
|
||||
|
@ -282,6 +291,10 @@ typedef enum {
|
|||
* Objects without particular meaning, that can e.g. be
|
||||
* added by the application for its own use, or by hwloc
|
||||
* for miscellaneous objects such as MemoryModule (DIMMs).
|
||||
*
|
||||
* They are not added to the topology unless their filtering
|
||||
* is changed (see hwloc_topology_set_type_filter()).
|
||||
*
|
||||
* These objects are not listed in the main children list,
|
||||
* but rather in the dedicated misc children list.
|
||||
* Misc objects may only have Misc objects as children,
|
||||
|
@ -304,7 +317,6 @@ typedef enum {
|
|||
|
||||
HWLOC_OBJ_DIE, /**< \brief Die within a physical package.
|
||||
* A subpart of the physical package, that contains multiple cores.
|
||||
* \hideinitializer
|
||||
*/
|
||||
|
||||
HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */
|
||||
|
@ -338,8 +350,7 @@ typedef enum hwloc_obj_osdev_type_e {
|
|||
HWLOC_OBJ_OSDEV_DMA, /**< \brief Operating system dma engine device.
|
||||
* For instance the "dma0chan0" DMA channel on Linux. */
|
||||
HWLOC_OBJ_OSDEV_COPROC /**< \brief Operating system co-processor device.
|
||||
* For instance "mic0" for a Xeon Phi (MIC) on Linux,
|
||||
* "opencl0d0" for a OpenCL device,
|
||||
* For instance "opencl0d0" for a OpenCL device,
|
||||
* "cuda0" for a CUDA device. */
|
||||
} hwloc_obj_osdev_type_t;
|
||||
|
||||
|
@ -512,7 +523,7 @@ struct hwloc_obj {
|
|||
*
|
||||
* \note Its value must not be changed, hwloc_bitmap_dup() must be used instead.
|
||||
*/
|
||||
hwloc_cpuset_t complete_cpuset; /**< \brief The complete CPU set of logical processors of this object,
|
||||
hwloc_cpuset_t complete_cpuset; /**< \brief The complete CPU set of processors of this object,
|
||||
*
|
||||
* This may include not only the same as the cpuset field, but also some CPUs for
|
||||
* which topology information is unknown or incomplete, some offlines CPUs, and
|
||||
|
@ -533,6 +544,8 @@ struct hwloc_obj {
|
|||
* between this object and the NUMA node objects).
|
||||
*
|
||||
* In the end, these nodes are those that are close to the current object.
|
||||
* Function hwloc_get_local_numanode_objs() may be used to list those NUMA
|
||||
* nodes more precisely.
|
||||
*
|
||||
* If the ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED configuration flag is set,
|
||||
* some of these nodes may not be allowed for allocation,
|
||||
|
@ -1929,7 +1942,31 @@ enum hwloc_topology_flags_e {
|
|||
* would result in the same behavior.
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES = (1UL<<2)
|
||||
HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES = (1UL<<2),
|
||||
|
||||
/** \brief Import support from the imported topology.
|
||||
*
|
||||
* When importing a XML topology from a remote machine, binding is
|
||||
* disabled by default (see ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM).
|
||||
* This disabling is also marked by putting zeroes in the corresponding
|
||||
* supported feature bits reported by hwloc_topology_get_support().
|
||||
*
|
||||
* The flag ::HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT actually imports
|
||||
* support bits from the remote machine. It also sets the flag
|
||||
* \p imported_support in the struct hwloc_topology_misc_support array.
|
||||
* If the imported XML did not contain any support information
|
||||
* (exporter hwloc is too old), this flag is not set.
|
||||
*
|
||||
* Note that these supported features are only relevant for the hwloc
|
||||
* installation that actually exported the XML topology
|
||||
* (it may vary with the operating system, or with how hwloc was compiled).
|
||||
*
|
||||
* Note that setting this flag however does not enable binding for the
|
||||
* locally imported hwloc topology, it only reports what the remote
|
||||
* hwloc and machine support.
|
||||
*
|
||||
*/
|
||||
HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT = (1UL<<3)
|
||||
};
|
||||
|
||||
/** \brief Set OR'ed flags to non-yet-loaded topology.
|
||||
|
@ -1972,6 +2009,8 @@ struct hwloc_topology_discovery_support {
|
|||
unsigned char disallowed_pu;
|
||||
/** \brief Detecting and identifying NUMA nodes that are not available to the current process is supported. */
|
||||
unsigned char disallowed_numa;
|
||||
/** \brief Detecting the efficiency of CPU kinds is supported, see \ref hwlocality_cpukinds. */
|
||||
unsigned char cpukind_efficiency;
|
||||
};
|
||||
|
||||
/** \brief Flags describing actual PU binding support for this topology.
|
||||
|
@ -2042,6 +2081,13 @@ struct hwloc_topology_membind_support {
|
|||
unsigned char get_area_memlocation;
|
||||
};
|
||||
|
||||
/** \brief Flags describing miscellaneous features.
|
||||
*/
|
||||
struct hwloc_topology_misc_support {
|
||||
/** Support was imported when importing another topology, see ::HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT. */
|
||||
unsigned char imported_support;
|
||||
};
|
||||
|
||||
/** \brief Set of flags describing actual support for this topology.
|
||||
*
|
||||
* This is retrieved with hwloc_topology_get_support() and will be valid until
|
||||
|
@ -2052,6 +2098,7 @@ struct hwloc_topology_support {
|
|||
struct hwloc_topology_discovery_support *discovery;
|
||||
struct hwloc_topology_cpubind_support *cpubind;
|
||||
struct hwloc_topology_membind_support *membind;
|
||||
struct hwloc_topology_misc_support *misc;
|
||||
};
|
||||
|
||||
/** \brief Retrieve the topology support.
|
||||
|
@ -2062,6 +2109,18 @@ struct hwloc_topology_support {
|
|||
* call may still fail in some corner cases.
|
||||
*
|
||||
* These features are also listed by hwloc-info \--support
|
||||
*
|
||||
* The reported features are what the current topology supports
|
||||
* on the current machine. If the topology was exported to XML
|
||||
* from another machine and later imported here, support still
|
||||
* describes what is supported for this imported topology after
|
||||
* import. By default, binding will be reported as unsupported
|
||||
* in this case (see ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM).
|
||||
*
|
||||
* Topology flag ::HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT may be used
|
||||
* to report the supported features of the original remote machine
|
||||
* instead. If it was successfully imported, \p imported_support
|
||||
* will be set in the struct hwloc_topology_misc_support array.
|
||||
*/
|
||||
HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(hwloc_topology_t __hwloc_restrict topology);
|
||||
|
||||
|
@ -2108,8 +2167,8 @@ enum hwloc_type_filter_e {
|
|||
*
|
||||
* It is only useful for I/O object types.
|
||||
* For ::HWLOC_OBJ_PCI_DEVICE and ::HWLOC_OBJ_OS_DEVICE, it means that only objects
|
||||
* of major/common kinds are kept (storage, network, OpenFabrics, Intel MICs, CUDA,
|
||||
* OpenCL, NVML, and displays).
|
||||
* of major/common kinds are kept (storage, network, OpenFabrics, CUDA,
|
||||
* OpenCL, RSMI, NVML, and displays).
|
||||
* Also, only OS devices directly attached on PCI (e.g. no USB) are reported.
|
||||
* For ::HWLOC_OBJ_BRIDGE, it means that bridges are kept only if they have children.
|
||||
*
|
||||
|
@ -2371,6 +2430,22 @@ HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_group_object(hwloc_topology_t t
|
|||
*/
|
||||
HWLOC_DECLSPEC int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src);
|
||||
|
||||
/** \brief Refresh internal structures after topology modification.
|
||||
*
|
||||
* Modifying the topology (by restricting, adding objects, modifying structures
|
||||
* such as distances or memory attributes, etc.) may cause some internal caches
|
||||
* to become invalid. These caches are automatically refreshed when accessed
|
||||
* but this refreshing is not thread-safe.
|
||||
*
|
||||
* This function is not thread-safe either, but it is a good way to end a
|
||||
* non-thread-safe phase of topology modification. Once this refresh is done,
|
||||
* multiple threads may concurrently consult the topology, objects, distances,
|
||||
* attributes, etc.
|
||||
*
|
||||
* See also \ref threadsafety
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_topology_refresh(hwloc_topology_t topology);
|
||||
|
||||
/** @} */
|
||||
|
||||
|
||||
|
@ -2386,6 +2461,12 @@ HWLOC_DECLSPEC int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src
|
|||
/* inline code of some functions above */
|
||||
#include "hwloc/inlines.h"
|
||||
|
||||
/* memory attributes */
|
||||
#include "hwloc/memattrs.h"
|
||||
|
||||
/* kinds of CPU cores */
|
||||
#include "hwloc/cpukinds.h"
|
||||
|
||||
/* exporting to XML or synthetic */
|
||||
#include "hwloc/export.h"
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2019 Inria. All rights reserved.
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -11,9 +11,9 @@
|
|||
#ifndef HWLOC_CONFIG_H
|
||||
#define HWLOC_CONFIG_H
|
||||
|
||||
#define HWLOC_VERSION "2.2.0"
|
||||
#define HWLOC_VERSION "2.4.0"
|
||||
#define HWLOC_VERSION_MAJOR 2
|
||||
#define HWLOC_VERSION_MINOR 2
|
||||
#define HWLOC_VERSION_MINOR 4
|
||||
#define HWLOC_VERSION_RELEASE 0
|
||||
#define HWLOC_VERSION_GREEK ""
|
||||
|
||||
|
|
4
src/3rdparty/hwloc/include/hwloc/bitmap.h
vendored
4
src/3rdparty/hwloc/include/hwloc/bitmap.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2018 Inria. All rights reserved.
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -231,7 +231,7 @@ HWLOC_DECLSPEC int hwloc_bitmap_clr_range(hwloc_bitmap_t bitmap, unsigned begin,
|
|||
/** \brief Keep a single index among those set in bitmap \p bitmap
|
||||
*
|
||||
* May be useful before binding so that the process does not
|
||||
* have a chance of migrating between multiple logical CPUs
|
||||
* have a chance of migrating between multiple processors
|
||||
* in the original mask.
|
||||
* Instead of running the task on any PU inside the given CPU set,
|
||||
* the operating system scheduler will be forced to run it on a single
|
||||
|
|
188
src/3rdparty/hwloc/include/hwloc/cpukinds.h
vendored
Normal file
188
src/3rdparty/hwloc/include/hwloc/cpukinds.h
vendored
Normal file
|
@ -0,0 +1,188 @@
|
|||
/*
|
||||
* Copyright © 2020 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Kinds of CPU cores.
|
||||
*/
|
||||
|
||||
#ifndef HWLOC_CPUKINDS_H
|
||||
#define HWLOC_CPUKINDS_H
|
||||
|
||||
#include "hwloc.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#elif 0
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \defgroup hwlocality_cpukinds Kinds of CPU cores
|
||||
*
|
||||
* Platforms with heterogeneous CPUs may have some cores with
|
||||
* different features or frequencies.
|
||||
* This API exposes identical PUs in sets called CPU kinds.
|
||||
* Each PU of the topology may only be in a single kind.
|
||||
*
|
||||
* The number of kinds may be obtained with hwloc_cpukinds_get_nr().
|
||||
* If the platform is homogeneous, there may be a single kind
|
||||
* with all PUs.
|
||||
* If the platform or operating system does not expose any
|
||||
* information about CPU cores, there may be no kind at all.
|
||||
*
|
||||
* The index of the kind that describes a given CPU set
|
||||
* (if any, and not partially)
|
||||
* may be obtained with hwloc_cpukinds_get_by_cpuset().
|
||||
*
|
||||
* From the index of a kind, it is possible to retrieve information
|
||||
* with hwloc_cpukinds_get_info():
|
||||
* an abstracted efficiency value,
|
||||
* and an array of info attributes
|
||||
* (for instance the "CoreType" and "FrequencyMaxMHz",
|
||||
* see \ref topoattrs_cpukinds).
|
||||
*
|
||||
* A higher efficiency value means intrinsic greater performance
|
||||
* (and possibly less performance/power efficiency).
|
||||
* Kinds with lower efficiency are ranked first:
|
||||
* Passing 0 as \p kind_index to hwloc_cpukinds_get_info() will
|
||||
* return information about the less efficient CPU kind.
|
||||
*
|
||||
* When available, efficiency values are gathered from the operating
|
||||
* system (when \p cpukind_efficiency is set in the
|
||||
* struct hwloc_topology_discovery_support array, only on Windows 10 for now).
|
||||
* Otherwise hwloc tries to compute efficiencies
|
||||
* by comparing CPU kinds using frequencies (on ARM),
|
||||
* or core types and frequencies (on other architectures).
|
||||
* The environment variable HWLOC_CPUKINDS_RANKING may be used
|
||||
* to change this heuristics, see \ref envvar.
|
||||
*
|
||||
* If hwloc fails to rank any kind, for instance because the operating
|
||||
* system does not expose efficiencies and core frequencies,
|
||||
* all kinds will have an unknown efficiency (\c -1),
|
||||
* and they are not indexed/ordered in any specific way.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Get the number of different kinds of CPU cores in the topology.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \return The number of CPU kinds (positive integer) on success.
|
||||
* \return \c 0 if no information about kinds was found.
|
||||
* \return \c -1 with \p errno set to \c EINVAL if \p flags is invalid.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_cpukinds_get_nr(hwloc_topology_t topology,
|
||||
unsigned long flags);
|
||||
|
||||
/** \brief Get the index of the CPU kind that contains CPUs listed in \p cpuset.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \return The index of the CPU kind (positive integer or 0) on success.
|
||||
* \return \c -1 with \p errno set to \c EXDEV if \p cpuset is
|
||||
* only partially included in the some kind.
|
||||
* \return \c -1 with \p errno set to \c ENOENT if \p cpuset is
|
||||
* not included in any kind, even partially.
|
||||
* \return \c -1 with \p errno set to \c EINVAL if parameters are invalid.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_cpukinds_get_by_cpuset(hwloc_topology_t topology,
|
||||
hwloc_const_bitmap_t cpuset,
|
||||
unsigned long flags);
|
||||
|
||||
/** \brief Get the CPU set and infos about a CPU kind in the topology.
|
||||
*
|
||||
* \p kind_index identifies one kind of CPU between 0 and the number
|
||||
* of kinds returned by hwloc_cpukinds_get_nr() minus 1.
|
||||
*
|
||||
* If not \c NULL, the bitmap \p cpuset will be filled with
|
||||
* the set of PUs of this kind.
|
||||
*
|
||||
* The integer pointed by \p efficiency, if not \c NULL will, be filled
|
||||
* with the ranking of this kind of CPU in term of efficiency (see above).
|
||||
* It ranges from \c 0 to the number of kinds
|
||||
* (as reported by hwloc_cpukinds_get_nr()) minus 1.
|
||||
*
|
||||
* Kinds with lower efficiency are reported first.
|
||||
*
|
||||
* If there is a single kind in the topology, its efficiency \c 0.
|
||||
* If the efficiency of some kinds of cores is unknown,
|
||||
* the efficiency of all kinds is set to \c -1,
|
||||
* and kinds are reported in no specific order.
|
||||
*
|
||||
* The array of info attributes (for instance the "CoreType",
|
||||
* "FrequencyMaxMHz" or "FrequencyBaseMHz", see \ref topoattrs_cpukinds)
|
||||
* and its length are returned in \p infos or \p nr_infos.
|
||||
* The array belongs to the topology, it should not be freed or modified.
|
||||
*
|
||||
* If \p nr_infos or \p infos is \c NULL, no info is returned.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \return \c 0 on success.
|
||||
* \return \c -1 with \p errno set to \c ENOENT if \p kind_index does not match any CPU kind.
|
||||
* \return \c -1 with \p errno set to \c EINVAL if parameters are invalid.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_cpukinds_get_info(hwloc_topology_t topology,
|
||||
unsigned kind_index,
|
||||
hwloc_bitmap_t cpuset,
|
||||
int *efficiency,
|
||||
unsigned *nr_infos, struct hwloc_info_s **infos,
|
||||
unsigned long flags);
|
||||
|
||||
/** \brief Register a kind of CPU in the topology.
|
||||
*
|
||||
* Mark the PUs listed in \p cpuset as being of the same kind
|
||||
* with respect to the given attributes.
|
||||
*
|
||||
* \p forced_efficiency should be \c -1 if unknown.
|
||||
* Otherwise it is an abstracted efficiency value to enforce
|
||||
* the ranking of all kinds if all of them have valid (and
|
||||
* different) efficiencies.
|
||||
*
|
||||
* The array \p infos of size \p nr_infos may be used to provide
|
||||
* info names and values describing this kind of PUs.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* Parameters \p cpuset and \p infos will be duplicated internally,
|
||||
* the caller is responsible for freeing them.
|
||||
*
|
||||
* If \p cpuset overlaps with some existing kinds, those might get
|
||||
* modified or split. For instance if existing kind A contains
|
||||
* PUs 0 and 1, and one registers another kind for PU 1 and 2,
|
||||
* there will be 3 resulting kinds:
|
||||
* existing kind A is restricted to only PU 0;
|
||||
* new kind B contains only PU 1 and combines information from A
|
||||
* and from the newly-registered kind;
|
||||
* new kind C contains only PU 2 and only gets information from
|
||||
* the newly-registered kind.
|
||||
*
|
||||
* \note The efficiency \p forced_efficiency provided to this function
|
||||
* may be different from the one reported later by hwloc_cpukinds_get_info()
|
||||
* because hwloc will scale efficiency values down to
|
||||
* between 0 and the number of kinds minus 1.
|
||||
*
|
||||
* \return \c 0 on success.
|
||||
* \return \c -1 with \p errno set to \c EINVAL if some parameters are invalid,
|
||||
* for instance if \p cpuset is \c NULL or empty.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_cpukinds_register(hwloc_topology_t topology,
|
||||
hwloc_bitmap_t cpuset,
|
||||
int forced_efficiency,
|
||||
unsigned nr_infos, struct hwloc_info_s *infos,
|
||||
unsigned long flags);
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* HWLOC_CPUKINDS_H */
|
4
src/3rdparty/hwloc/include/hwloc/cuda.h
vendored
4
src/3rdparty/hwloc/include/hwloc/cuda.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2010-2017 Inria. All rights reserved.
|
||||
* Copyright © 2010-2020 Inria. All rights reserved.
|
||||
* Copyright © 2010-2011 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -72,7 +72,7 @@ hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused
|
|||
return 0;
|
||||
}
|
||||
|
||||
/** \brief Get the CPU set of logical processors that are physically
|
||||
/** \brief Get the CPU set of processors that are physically
|
||||
* close to device \p cudevice.
|
||||
*
|
||||
* Return the CPU set describing the locality of the CUDA device \p cudevice.
|
||||
|
|
4
src/3rdparty/hwloc/include/hwloc/cudart.h
vendored
4
src/3rdparty/hwloc/include/hwloc/cudart.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2010-2017 Inria. All rights reserved.
|
||||
* Copyright © 2010-2020 Inria. All rights reserved.
|
||||
* Copyright © 2010-2011 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -69,7 +69,7 @@ hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unus
|
|||
return 0;
|
||||
}
|
||||
|
||||
/** \brief Get the CPU set of logical processors that are physically
|
||||
/** \brief Get the CPU set of processors that are physically
|
||||
* close to device \p idx.
|
||||
*
|
||||
* Return the CPU set describing the locality of the CUDA device
|
||||
|
|
6
src/3rdparty/hwloc/include/hwloc/diff.h
vendored
6
src/3rdparty/hwloc/include/hwloc/diff.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2013-2018 Inria. All rights reserved.
|
||||
* Copyright © 2013-2020 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -110,7 +110,7 @@ union hwloc_topology_diff_obj_attr_u {
|
|||
*/
|
||||
typedef enum hwloc_topology_diff_type_e {
|
||||
/** \brief An object attribute was changed.
|
||||
* The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_s.
|
||||
* The union is a hwloc_topology_diff_u::hwloc_topology_diff_obj_attr_s.
|
||||
*/
|
||||
HWLOC_TOPOLOGY_DIFF_OBJ_ATTR,
|
||||
|
||||
|
@ -119,7 +119,7 @@ typedef enum hwloc_topology_diff_type_e {
|
|||
* this object has not been checked.
|
||||
* hwloc_topology_diff_build() will return 1.
|
||||
*
|
||||
* The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_too_complex_s.
|
||||
* The union is a hwloc_topology_diff_u::hwloc_topology_diff_too_complex_s.
|
||||
*/
|
||||
HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX
|
||||
} hwloc_topology_diff_type_t;
|
||||
|
|
5
src/3rdparty/hwloc/include/hwloc/distances.h
vendored
5
src/3rdparty/hwloc/include/hwloc/distances.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2010-2019 Inria. All rights reserved.
|
||||
* Copyright © 2010-2020 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -34,6 +34,7 @@ extern "C" {
|
|||
* It corresponds to the latency for accessing the memory of one node
|
||||
* from a core in another node.
|
||||
* The corresponding kind is ::HWLOC_DISTANCES_KIND_FROM_OS | ::HWLOC_DISTANCES_KIND_FROM_USER.
|
||||
* The name of this distances structure is "NUMALatency".
|
||||
*
|
||||
* The matrix may also contain bandwidths between random sets of objects,
|
||||
* possibly provided by the user, as specified in the \p kind attribute.
|
||||
|
@ -144,6 +145,8 @@ hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
|
|||
/** \brief Retrieve a distance matrix with the given name.
|
||||
*
|
||||
* Usually only one distances structure may match a given name.
|
||||
*
|
||||
* The name of the most common structure is "NUMALatency".
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2013 inria. All rights reserved.
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -22,7 +22,7 @@
|
|||
|
||||
#include <assert.h>
|
||||
|
||||
#if !defined _GNU_SOURCE || !defined _SCHED_H || (!defined CPU_SETSIZE && !defined sched_priority)
|
||||
#if !defined _GNU_SOURCE || (!defined _SCHED_H && !defined _SCHED_H_) || (!defined CPU_SETSIZE && !defined sched_priority)
|
||||
#error Please make sure to include sched.h before including glibc-sched.h, and define _GNU_SOURCE before any inclusion of sched.h
|
||||
#endif
|
||||
|
||||
|
|
10
src/3rdparty/hwloc/include/hwloc/helper.h
vendored
10
src/3rdparty/hwloc/include/hwloc/helper.h
vendored
|
@ -872,8 +872,8 @@ hwloc_distrib(hwloc_topology_t topology,
|
|||
unsigned chunk, weight;
|
||||
hwloc_obj_t root = roots[flags & HWLOC_DISTRIB_FLAG_REVERSE ? n_roots-1-i : i];
|
||||
hwloc_cpuset_t cpuset = root->cpuset;
|
||||
if (root->type == HWLOC_OBJ_NUMANODE)
|
||||
/* NUMANodes have same cpuset as their parent, but we need normal objects below */
|
||||
while (!hwloc_obj_type_is_normal(root->type))
|
||||
/* If memory/io/misc, walk up to normal parent */
|
||||
root = root->parent;
|
||||
weight = (unsigned) hwloc_bitmap_weight(cpuset);
|
||||
if (!weight)
|
||||
|
@ -919,7 +919,7 @@ hwloc_distrib(hwloc_topology_t topology,
|
|||
|
||||
/** \brief Get complete CPU set
|
||||
*
|
||||
* \return the complete CPU set of logical processors of the system.
|
||||
* \return the complete CPU set of processors of the system.
|
||||
*
|
||||
* \note The returned cpuset is not newly allocated and should thus not be
|
||||
* changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
|
||||
|
@ -931,7 +931,7 @@ hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) __hwloc_attribute_
|
|||
|
||||
/** \brief Get topology CPU set
|
||||
*
|
||||
* \return the CPU set of logical processors of the system for which hwloc
|
||||
* \return the CPU set of processors of the system for which hwloc
|
||||
* provides topology information. This is equivalent to the cpuset of the
|
||||
* system object.
|
||||
*
|
||||
|
@ -945,7 +945,7 @@ hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_
|
|||
|
||||
/** \brief Get allowed CPU set
|
||||
*
|
||||
* \return the CPU set of allowed logical processors of the system.
|
||||
* \return the CPU set of allowed processors of the system.
|
||||
*
|
||||
* \note If the topology flag ::HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED was not set,
|
||||
* this is identical to hwloc_topology_get_topology_cpuset(), which means
|
||||
|
|
455
src/3rdparty/hwloc/include/hwloc/memattrs.h
vendored
Normal file
455
src/3rdparty/hwloc/include/hwloc/memattrs.h
vendored
Normal file
|
@ -0,0 +1,455 @@
|
|||
/*
|
||||
* Copyright © 2019-2020 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Memory node attributes.
|
||||
*/
|
||||
|
||||
#ifndef HWLOC_MEMATTR_H
|
||||
#define HWLOC_MEMATTR_H
|
||||
|
||||
#include "hwloc.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#elif 0
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \defgroup hwlocality_memattrs Comparing memory node attributes for finding where to allocate on
|
||||
*
|
||||
* Platforms with heterogeneous memory require ways to decide whether
|
||||
* a buffer should be allocated on "fast" memory (such as HBM),
|
||||
* "normal" memory (DDR) or even "slow" but large-capacity memory
|
||||
* (non-volatile memory).
|
||||
* These memory nodes are called "Targets" while the CPU accessing them
|
||||
* is called the "Initiator". Access performance depends on their
|
||||
* locality (NUMA platforms) as well as the intrinsic performance
|
||||
* of the targets (heterogeneous platforms).
|
||||
*
|
||||
* The following attributes describe the performance of memory accesses
|
||||
* from an Initiator to a memory Target, for instance their latency
|
||||
* or bandwidth.
|
||||
* Initiators performing these memory accesses are usually some PUs or Cores
|
||||
* (described as a CPU set).
|
||||
* Hence a Core may choose where to allocate a memory buffer by comparing
|
||||
* the attributes of different target memory nodes nearby.
|
||||
*
|
||||
* There are also some attributes that are system-wide.
|
||||
* Their value does not depend on a specific initiator performing
|
||||
* an access.
|
||||
* The memory node Capacity is an example of such attribute without
|
||||
* initiator.
|
||||
*
|
||||
* One way to use this API is to start with a cpuset describing the Cores where
|
||||
* a program is bound. The best target NUMA node for allocating memory in this
|
||||
* program on these Cores may be obtained by passing this cpuset as an initiator
|
||||
* to hwloc_memattr_get_best_target() with the relevant memory attribute.
|
||||
* For instance, if the code is latency limited, use the Latency attribute.
|
||||
*
|
||||
* A more flexible approach consists in getting the list of local NUMA nodes
|
||||
* by passing this cpuset to hwloc_get_local_numanode_objs().
|
||||
* Attribute values for these nodes, if any, may then be obtained with
|
||||
* hwloc_memattr_get_value() and manually compared with the desired criteria.
|
||||
*
|
||||
* \note The API also supports specific objects as initiator,
|
||||
* but it is currently not used internally by hwloc.
|
||||
* Users may for instance use it to provide custom performance
|
||||
* values for host memory accesses performed by GPUs.
|
||||
*
|
||||
* \note The interface actually also accepts targets that are not NUMA nodes.
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Memory node attributes. */
|
||||
enum hwloc_memattr_id_e {
|
||||
/** \brief "Capacity".
|
||||
* The capacity is returned in bytes
|
||||
* (local_memory attribute in objects).
|
||||
*
|
||||
* Best capacity nodes are nodes with <b>higher capacity</b>.
|
||||
*
|
||||
* No initiator is involved when looking at this attribute.
|
||||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST.
|
||||
*/
|
||||
HWLOC_MEMATTR_ID_CAPACITY = 0,
|
||||
|
||||
/** \brief "Locality".
|
||||
* The locality is returned as the number of PUs in that locality
|
||||
* (e.g. the weight of its cpuset).
|
||||
*
|
||||
* Best locality nodes are nodes with <b>smaller locality</b>
|
||||
* (nodes that are local to very few PUs).
|
||||
* Poor locality nodes are nodes with larger locality
|
||||
* (nodes that are local to the entire machine).
|
||||
*
|
||||
* No initiator is involved when looking at this attribute.
|
||||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST.
|
||||
*/
|
||||
HWLOC_MEMATTR_ID_LOCALITY = 1,
|
||||
|
||||
/** \brief "Bandwidth".
|
||||
* The bandwidth is returned in MiB/s, as seen from the given initiator location.
|
||||
* Best bandwidth nodes are nodes with <b>higher bandwidth</b>.
|
||||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST
|
||||
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR.
|
||||
*/
|
||||
HWLOC_MEMATTR_ID_BANDWIDTH = 2,
|
||||
|
||||
/** \brief "Latency".
|
||||
* The latency is returned as nanoseconds, as seen from the given initiator location.
|
||||
* Best latency nodes are nodes with <b>smaller latency</b>.
|
||||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_LOWER_FIRST
|
||||
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR.
|
||||
*/
|
||||
HWLOC_MEMATTR_ID_LATENCY = 3
|
||||
|
||||
/* TODO read vs write, persistence? */
|
||||
};
|
||||
|
||||
/** \brief A memory attribute identifier.
|
||||
* May be either one of ::hwloc_memattr_id_e or a new id returned by hwloc_memattr_register().
|
||||
*/
|
||||
typedef unsigned hwloc_memattr_id_t;
|
||||
|
||||
/** \brief Return the identifier of the memory attribute with the given name.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_by_name(hwloc_topology_t topology,
|
||||
const char *name,
|
||||
hwloc_memattr_id_t *id);
|
||||
|
||||
|
||||
/** \brief Type of location. */
|
||||
enum hwloc_location_type_e {
|
||||
/** \brief Location is given as a cpuset, in the location cpuset union field. \hideinitializer */
|
||||
HWLOC_LOCATION_TYPE_CPUSET = 1,
|
||||
/** \brief Location is given as an object, in the location object union field. \hideinitializer */
|
||||
HWLOC_LOCATION_TYPE_OBJECT = 0
|
||||
};
|
||||
|
||||
/** \brief Where to measure attributes from. */
|
||||
struct hwloc_location {
|
||||
/** \brief Type of location. */
|
||||
enum hwloc_location_type_e type;
|
||||
/** \brief Actual location. */
|
||||
union hwloc_location_u {
|
||||
/** \brief Location as a cpuset, when the location type is ::HWLOC_LOCATION_TYPE_CPUSET. */
|
||||
hwloc_cpuset_t cpuset;
|
||||
/** \brief Location as an object, when the location type is ::HWLOC_LOCATION_TYPE_OBJECT. */
|
||||
hwloc_obj_t object;
|
||||
} location;
|
||||
};
|
||||
|
||||
|
||||
/** \brief Flags for selecting target NUMA nodes. */
|
||||
enum hwloc_local_numanode_flag_e {
|
||||
/** \brief Select NUMA nodes whose locality is larger than the given cpuset.
|
||||
* For instance, if a single PU (or its cpuset) is given in \p initiator,
|
||||
* select all nodes close to the package that contains this PU.
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY = (1UL<<0),
|
||||
|
||||
/** \brief Select NUMA nodes whose locality is smaller than the given cpuset.
|
||||
* For instance, if a package (or its cpuset) is given in \p initiator,
|
||||
* also select nodes that are attached to only a half of that package.
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY = (1UL<<1),
|
||||
|
||||
/** \brief Select all NUMA nodes in the topology.
|
||||
* The initiator \p initiator is ignored.
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_LOCAL_NUMANODE_FLAG_ALL = (1UL<<2)
|
||||
};
|
||||
|
||||
/** \brief Return an array of local NUMA nodes.
|
||||
*
|
||||
* By default only select the NUMA nodes whose locality is exactly
|
||||
* the given \p location. More nodes may be selected if additional flags
|
||||
* are given as a OR'ed set of ::hwloc_local_numanode_flag_e.
|
||||
*
|
||||
* If \p location is given as an explicit object, its CPU set is used
|
||||
* to find NUMA nodes with the corresponding locality.
|
||||
* If the object does not have a CPU set (e.g. I/O object), the CPU
|
||||
* parent (where the I/O object is attached) is used.
|
||||
*
|
||||
* On input, \p nr points to the number of nodes that may be stored
|
||||
* in the \p nodes array.
|
||||
* On output, \p nr will be changed to the number of stored nodes,
|
||||
* or the number of nodes that would have been stored if there were
|
||||
* enough room.
|
||||
*
|
||||
* \note Some of these NUMA nodes may not have any memory attribute
|
||||
* values and hence not be reported as actual targets in other functions.
|
||||
*
|
||||
* \note The number of NUMA nodes in the topology (obtained by
|
||||
* hwloc_bitmap_weight() on the root object nodeset) may be used
|
||||
* to allocate the \p nodes array.
|
||||
*
|
||||
* \note When an object CPU set is given as locality, for instance a Package,
|
||||
* and when flags contain both ::HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY
|
||||
* and ::HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY,
|
||||
* the returned array corresponds to the nodeset of that object.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_get_local_numanode_objs(hwloc_topology_t topology,
|
||||
struct hwloc_location *location,
|
||||
unsigned *nr,
|
||||
hwloc_obj_t *nodes,
|
||||
unsigned long flags);
|
||||
|
||||
|
||||
|
||||
/** \brief Return an attribute value for a specific target NUMA node.
|
||||
*
|
||||
* If the attribute does not relate to a specific initiator
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
|
||||
* location \p initiator is ignored and may be \c NULL.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when refering to accesses performed by CPU cores.
|
||||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
|
||||
* but users may for instance use it to provide custom information about
|
||||
* host memory accesses performed by GPUs.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_value(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
hwloc_obj_t target_node,
|
||||
struct hwloc_location *initiator,
|
||||
unsigned long flags,
|
||||
hwloc_uint64_t *value);
|
||||
|
||||
/** \brief Return the best target NUMA node for the given attribute and initiator.
|
||||
*
|
||||
* If the attribute does not relate to a specific initiator
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
|
||||
* location \p initiator is ignored and may be \c NULL.
|
||||
*
|
||||
* If \p value is non \c NULL, the corresponding value is returned there.
|
||||
*
|
||||
* If multiple targets have the same attribute values, only one is
|
||||
* returned (and there is no way to clarify how that one is chosen).
|
||||
* Applications that want to detect targets with identical/similar
|
||||
* values, or that want to look at values for multiple attributes,
|
||||
* should rather get all values using hwloc_memattr_get_value()
|
||||
* and manually select the target they consider the best.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* If there are no matching targets, \c -1 is returned with \p errno set to \c ENOENT;
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when refering to accesses performed by CPU cores.
|
||||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
|
||||
* but users may for instance use it to provide custom information about
|
||||
* host memory accesses performed by GPUs.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_best_target(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
struct hwloc_location *initiator,
|
||||
unsigned long flags,
|
||||
hwloc_obj_t *best_target, hwloc_uint64_t *value);
|
||||
|
||||
/** \brief Return the best initiator for the given attribute and target NUMA node.
|
||||
*
|
||||
* If the attribute does not relate to a specific initiator
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
|
||||
* \c -1 is returned and \p errno is set to \c EINVAL.
|
||||
*
|
||||
* If \p value is non \c NULL, the corresponding value is returned there.
|
||||
*
|
||||
* If multiple initiators have the same attribute values, only one is
|
||||
* returned (and there is no way to clarify how that one is chosen).
|
||||
* Applications that want to detect initiators with identical/similar
|
||||
* values, or that want to look at values for multiple attributes,
|
||||
* should rather get all values using hwloc_memattr_get_value()
|
||||
* and manually select the initiator they consider the best.
|
||||
*
|
||||
* The returned initiator should not be modified or freed,
|
||||
* it belongs to the topology.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* If there are no matching initiators, \c -1 is returned with \p errno set to \c ENOENT;
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_best_initiator(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
hwloc_obj_t target,
|
||||
unsigned long flags,
|
||||
struct hwloc_location *best_initiator, hwloc_uint64_t *value);
|
||||
|
||||
/** @} */
|
||||
|
||||
|
||||
/** \defgroup hwlocality_memattrs_manage Managing memory attributes
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Return the name of a memory attribute.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_name(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
const char **name);
|
||||
|
||||
/** \brief Return the flags of the given attribute.
|
||||
*
|
||||
* Flags are a OR'ed set of ::hwloc_memattr_flag_e.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_flags(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
unsigned long *flags);
|
||||
|
||||
/** \brief Memory attribute flags.
|
||||
* Given to hwloc_memattr_register() and returned by hwloc_memattr_get_flags().
|
||||
*/
|
||||
enum hwloc_memattr_flag_e {
|
||||
/** \brief The best nodes for this memory attribute are those with the higher values.
|
||||
* For instance Bandwidth.
|
||||
*/
|
||||
HWLOC_MEMATTR_FLAG_HIGHER_FIRST = (1UL<<0),
|
||||
/** \brief The best nodes for this memory attribute are those with the lower values.
|
||||
* For instance Latency.
|
||||
*/
|
||||
HWLOC_MEMATTR_FLAG_LOWER_FIRST = (1UL<<1),
|
||||
/** \brief The value returned for this memory attribute depends on the given initiator.
|
||||
* For instance Bandwidth and Latency, but not Capacity.
|
||||
*/
|
||||
HWLOC_MEMATTR_FLAG_NEED_INITIATOR = (1UL<<2)
|
||||
};
|
||||
|
||||
/** \brief Register a new memory attribute.
|
||||
*
|
||||
* Add a specific memory attribute that is not defined in ::hwloc_memattr_id_e.
|
||||
* Flags are a OR'ed set of ::hwloc_memattr_flag_e. It must contain at least
|
||||
* one of ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST or ::HWLOC_MEMATTR_FLAG_LOWER_FIRST.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_register(hwloc_topology_t topology,
|
||||
const char *name,
|
||||
unsigned long flags,
|
||||
hwloc_memattr_id_t *id);
|
||||
|
||||
/** \brief Set an attribute value for a specific target NUMA node.
|
||||
*
|
||||
* If the attribute does not relate to a specific initiator
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
|
||||
* location \p initiator is ignored and may be \c NULL.
|
||||
*
|
||||
* The initiator will be copied into the topology,
|
||||
* the caller should free anything allocated to store the initiator,
|
||||
* for instance the cpuset.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when refering to accesses performed by CPU cores.
|
||||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
|
||||
* but users may for instance use it to provide custom information about
|
||||
* host memory accesses performed by GPUs.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_set_value(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
hwloc_obj_t target_node,
|
||||
struct hwloc_location *initiator,
|
||||
unsigned long flags,
|
||||
hwloc_uint64_t value);
|
||||
|
||||
/** \brief Return the target NUMA nodes that have some values for a given attribute.
|
||||
*
|
||||
* Return targets for the given attribute in the \p targets array
|
||||
* (for the given initiator if any).
|
||||
* If \p values is not \c NULL, the corresponding attribute values
|
||||
* are stored in the array it points to.
|
||||
*
|
||||
* On input, \p nr points to the number of targets that may be stored
|
||||
* in the array \p targets (and \p values).
|
||||
* On output, \p nr points to the number of targets (and values) that
|
||||
* were actually found, even if some of them couldn't be stored in the array.
|
||||
* Targets that couldn't be stored are ignored, but the function still
|
||||
* returns success (\c 0). The caller may find out by comparing the value pointed
|
||||
* by \p nr before and after the function call.
|
||||
*
|
||||
* The returned targets should not be modified or freed,
|
||||
* they belong to the topology.
|
||||
*
|
||||
* Argument \p initiator is ignored if the attribute does not relate to a specific
|
||||
* initiator (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR).
|
||||
* Otherwise \p initiator may be non \c NULL to report only targets
|
||||
* that have a value for that initiator.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \note This function is meant for tools and debugging (listing internal information)
|
||||
* rather than for application queries. Applications should rather select useful
|
||||
* NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute
|
||||
* values.
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when refering to accesses performed by CPU cores.
|
||||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
|
||||
* but users may for instance use it to provide custom information about
|
||||
* host memory accesses performed by GPUs.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_targets(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
struct hwloc_location *initiator,
|
||||
unsigned long flags,
|
||||
unsigned *nrp, hwloc_obj_t *targets, hwloc_uint64_t *values);
|
||||
|
||||
/** \brief Return the initiators that have values for a given attribute for a specific target NUMA node.
|
||||
*
|
||||
* Return initiators for the given attribute and target node in the
|
||||
* \p initiators array.
|
||||
* If \p values is not \c NULL, the corresponding attribute values
|
||||
* are stored in the array it points to.
|
||||
*
|
||||
* On input, \p nr points to the number of initiators that may be stored
|
||||
* in the array \p initiators (and \p values).
|
||||
* On output, \p nr points to the number of initiators (and values) that
|
||||
* were actually found, even if some of them couldn't be stored in the array.
|
||||
* Initiators that couldn't be stored are ignored, but the function still
|
||||
* returns success (\c 0). The caller may find out by comparing the value pointed
|
||||
* by \p nr before and after the function call.
|
||||
*
|
||||
* The returned initiators should not be modified or freed,
|
||||
* they belong to the topology.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* If the attribute does not relate to a specific initiator
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
|
||||
* no initiator is returned.
|
||||
*
|
||||
* \note This function is meant for tools and debugging (listing internal information)
|
||||
* rather than for application queries. Applications should rather select useful
|
||||
* NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute
|
||||
* values for some relevant initiators.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_initiators(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
hwloc_obj_t target_node,
|
||||
unsigned long flags,
|
||||
unsigned *nr, struct hwloc_location *initiators, hwloc_uint64_t *values);
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* HWLOC_MEMATTR_H */
|
4
src/3rdparty/hwloc/include/hwloc/nvml.h
vendored
4
src/3rdparty/hwloc/include/hwloc/nvml.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2012-2016 Inria. All rights reserved.
|
||||
* Copyright © 2012-2020 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -36,7 +36,7 @@ extern "C" {
|
|||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Get the CPU set of logical processors that are physically
|
||||
/** \brief Get the CPU set of processors that are physically
|
||||
* close to NVML device \p device.
|
||||
*
|
||||
* Return the CPU set describing the locality of the NVML device \p device.
|
||||
|
|
4
src/3rdparty/hwloc/include/hwloc/opencl.h
vendored
4
src/3rdparty/hwloc/include/hwloc/opencl.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2012-2019 Inria. All rights reserved.
|
||||
* Copyright © 2012-2020 Inria. All rights reserved.
|
||||
* Copyright © 2013, 2018 Université Bordeaux. All right reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
@ -109,7 +109,7 @@ hwloc_opencl_get_device_pci_busid(cl_device_id device,
|
|||
return -1;
|
||||
}
|
||||
|
||||
/** \brief Get the CPU set of logical processors that are physically
|
||||
/** \brief Get the CPU set of processors that are physically
|
||||
* close to OpenCL device \p device.
|
||||
*
|
||||
* Return the CPU set describing the locality of the OpenCL device \p device.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2016 Inria. All rights reserved.
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -41,7 +41,7 @@ extern "C" {
|
|||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Get the CPU set of logical processors that are physically
|
||||
/** \brief Get the CPU set of processors that are physically
|
||||
* close to device \p ibdev.
|
||||
*
|
||||
* Return the CPU set describing the locality of the OpenFabrics
|
||||
|
|
30
src/3rdparty/hwloc/include/hwloc/plugins.h
vendored
30
src/3rdparty/hwloc/include/hwloc/plugins.h
vendored
|
@ -313,7 +313,13 @@ struct hwloc_component {
|
|||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Check whether insertion errors are hidden */
|
||||
HWLOC_DECLSPEC int hwloc_hide_errors(void);
|
||||
|
||||
/** \brief Add an object to the topology.
|
||||
*
|
||||
* Insert new object \p obj in the topology starting under existing object \p root
|
||||
* (if \c NULL, the topology root object is used).
|
||||
*
|
||||
* It is sorted along the tree of other objects according to the inclusion of
|
||||
* cpusets, to eventually be added as a child of the smallest object including
|
||||
|
@ -327,32 +333,20 @@ struct hwloc_component {
|
|||
*
|
||||
* This shall only be called before levels are built.
|
||||
*
|
||||
* In case of error, hwloc_report_os_error() is called.
|
||||
*
|
||||
* The caller should check whether the object type is filtered-out before calling this function.
|
||||
*
|
||||
* The topology cpuset/nodesets will be enlarged to include the object sets.
|
||||
*
|
||||
* \p reason is a unique string identifying where and why this insertion call was performed
|
||||
* (it will be displayed in case of internal insertion error).
|
||||
*
|
||||
* Returns the object on success.
|
||||
* Returns NULL and frees obj on error.
|
||||
* Returns another object and frees obj if it was merged with an identical pre-existing object.
|
||||
*/
|
||||
HWLOC_DECLSPEC struct hwloc_obj *hwloc_insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj);
|
||||
|
||||
/** \brief Type of error callbacks during object insertion */
|
||||
typedef void (*hwloc_report_error_t)(const char * msg, int line);
|
||||
/** \brief Report an insertion error from a backend */
|
||||
HWLOC_DECLSPEC void hwloc_report_os_error(const char * msg, int line);
|
||||
/** \brief Check whether insertion errors are hidden */
|
||||
HWLOC_DECLSPEC int hwloc_hide_errors(void);
|
||||
|
||||
/** \brief Add an object to the topology and specify which error callback to use.
|
||||
*
|
||||
* This function is similar to hwloc_insert_object_by_cpuset() but it allows specifying
|
||||
* where to start insertion from (if \p root is NULL, the topology root object is used),
|
||||
* and specifying the error callback.
|
||||
*/
|
||||
HWLOC_DECLSPEC struct hwloc_obj *hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root, hwloc_obj_t obj, hwloc_report_error_t report_error);
|
||||
HWLOC_DECLSPEC hwloc_obj_t
|
||||
hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root,
|
||||
hwloc_obj_t obj, const char *reason);
|
||||
|
||||
/** \brief Insert an object somewhere in the topology.
|
||||
*
|
||||
|
|
83
src/3rdparty/hwloc/include/hwloc/rename.h
vendored
83
src/3rdparty/hwloc/include/hwloc/rename.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright © 2010-2019 Inria. All rights reserved.
|
||||
* Copyright © 2010-2020 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -119,6 +119,7 @@ extern "C" {
|
|||
#define HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WITH_DISALLOWED)
|
||||
#define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM)
|
||||
#define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)
|
||||
#define HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IMPORT_SUPPORT)
|
||||
|
||||
#define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid)
|
||||
#define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic)
|
||||
|
@ -134,6 +135,7 @@ extern "C" {
|
|||
#define hwloc_topology_discovery_support HWLOC_NAME(topology_discovery_support)
|
||||
#define hwloc_topology_cpubind_support HWLOC_NAME(topology_cpubind_support)
|
||||
#define hwloc_topology_membind_support HWLOC_NAME(topology_membind_support)
|
||||
#define hwloc_topology_misc_support HWLOC_NAME(topology_misc_support)
|
||||
#define hwloc_topology_support HWLOC_NAME(topology_support)
|
||||
#define hwloc_topology_get_support HWLOC_NAME(topology_get_support)
|
||||
|
||||
|
@ -170,6 +172,7 @@ extern "C" {
|
|||
#define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object)
|
||||
#define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object)
|
||||
#define hwloc_obj_add_other_obj_sets HWLOC_NAME(obj_add_other_obj_sets)
|
||||
#define hwloc_topology_refresh HWLOC_NAME(topology_refresh)
|
||||
|
||||
#define hwloc_topology_get_depth HWLOC_NAME(topology_get_depth)
|
||||
#define hwloc_get_type_depth HWLOC_NAME(get_type_depth)
|
||||
|
@ -367,6 +370,51 @@ extern "C" {
|
|||
#define hwloc_cpuset_to_nodeset HWLOC_NAME(cpuset_to_nodeset)
|
||||
#define hwloc_cpuset_from_nodeset HWLOC_NAME(cpuset_from_nodeset)
|
||||
|
||||
/* memattrs.h */
|
||||
|
||||
#define hwloc_memattr_id_e HWLOC_NAME(memattr_id_e)
|
||||
#define HWLOC_MEMATTR_ID_CAPACITY HWLOC_NAME_CAPS(MEMATTR_ID_CAPACITY)
|
||||
#define HWLOC_MEMATTR_ID_LOCALITY HWLOC_NAME_CAPS(MEMATTR_ID_LOCALITY)
|
||||
#define HWLOC_MEMATTR_ID_BANDWIDTH HWLOC_NAME_CAPS(MEMATTR_ID_BANDWIDTH)
|
||||
#define HWLOC_MEMATTR_ID_LATENCY HWLOC_NAME_CAPS(MEMATTR_ID_LATENCY)
|
||||
|
||||
#define hwloc_memattr_id_t HWLOC_NAME(memattr_id_t)
|
||||
#define hwloc_memattr_get_by_name HWLOC_NAME(memattr_get_by_name)
|
||||
|
||||
#define hwloc_location HWLOC_NAME(location)
|
||||
#define hwloc_location_type_e HWLOC_NAME(location_type_e)
|
||||
#define HWLOC_LOCATION_TYPE_OBJECT HWLOC_NAME_CAPS(LOCATION_TYPE_OBJECT)
|
||||
#define HWLOC_LOCATION_TYPE_CPUSET HWLOC_NAME_CAPS(LOCATION_TYPE_CPUSET)
|
||||
#define hwloc_location_u HWLOC_NAME(location_u)
|
||||
|
||||
#define hwloc_memattr_get_value HWLOC_NAME(memattr_get_value)
|
||||
#define hwloc_memattr_get_best_target HWLOC_NAME(memattr_get_best_target)
|
||||
#define hwloc_memattr_get_best_initiator HWLOC_NAME(memattr_get_best_initiator)
|
||||
|
||||
#define hwloc_local_numanode_flag_e HWLOC_NAME(local_numanode_flag_e)
|
||||
#define HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_LARGER_LOCALITY)
|
||||
#define HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY)
|
||||
#define HWLOC_LOCAL_NUMANODE_FLAG_ALL HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_ALL)
|
||||
#define hwloc_get_local_numanode_objs HWLOC_NAME(get_local_numanode_objs)
|
||||
|
||||
#define hwloc_memattr_get_name HWLOC_NAME(memattr_get_name)
|
||||
#define hwloc_memattr_get_flags HWLOC_NAME(memattr_get_flags)
|
||||
#define hwloc_memattr_flag_e HWLOC_NAME(memattr_flag_e)
|
||||
#define HWLOC_MEMATTR_FLAG_HIGHER_FIRST HWLOC_NAME_CAPS(MEMATTR_FLAG_HIGHER_FIRST)
|
||||
#define HWLOC_MEMATTR_FLAG_LOWER_FIRST HWLOC_NAME_CAPS(MEMATTR_FLAG_LOWER_FIRST)
|
||||
#define HWLOC_MEMATTR_FLAG_NEED_INITIATOR HWLOC_NAME_CAPS(MEMATTR_FLAG_NEED_INITIATOR)
|
||||
#define hwloc_memattr_register HWLOC_NAME(memattr_register)
|
||||
#define hwloc_memattr_set_value HWLOC_NAME(memattr_set_value)
|
||||
#define hwloc_memattr_get_targets HWLOC_NAME(memattr_get_targets)
|
||||
#define hwloc_memattr_get_initiators HWLOC_NAME(memattr_get_initiators)
|
||||
|
||||
/* cpukinds.h */
|
||||
|
||||
#define hwloc_cpukinds_get_nr HWLOC_NAME(cpukinds_get_nr)
|
||||
#define hwloc_cpukinds_get_by_cpuset HWLOC_NAME(cpukinds_get_by_cpuset)
|
||||
#define hwloc_cpukinds_get_info HWLOC_NAME(cpukinds_get_info)
|
||||
#define hwloc_cpukinds_register HWLOC_NAME(cpukinds_register)
|
||||
|
||||
/* export.h */
|
||||
|
||||
#define hwloc_topology_export_xml_flags_e HWLOC_NAME(topology_export_xml_flags_e)
|
||||
|
@ -510,6 +558,12 @@ extern "C" {
|
|||
#define hwloc_nvml_get_device_osdev HWLOC_NAME(nvml_get_device_osdev)
|
||||
#define hwloc_nvml_get_device_osdev_by_index HWLOC_NAME(nvml_get_device_osdev_by_index)
|
||||
|
||||
/* rsmi.h */
|
||||
|
||||
#define hwloc_rsmi_get_device_cpuset HWLOC_NAME(rsmi_get_device_cpuset)
|
||||
#define hwloc_rsmi_get_device_osdev HWLOC_NAME(rsmi_get_device_osdev)
|
||||
#define hwloc_rsmi_get_device_osdev_by_index HWLOC_NAME(rsmi_get_device_osdev_by_index)
|
||||
|
||||
/* gl.h */
|
||||
|
||||
#define hwloc_gl_get_display_osdev_by_port_device HWLOC_NAME(gl_get_display_osdev_by_port_device)
|
||||
|
@ -547,9 +601,6 @@ extern "C" {
|
|||
|
||||
#define hwloc_plugin_check_namespace HWLOC_NAME(plugin_check_namespace)
|
||||
|
||||
#define hwloc_insert_object_by_cpuset HWLOC_NAME(insert_object_by_cpuset)
|
||||
#define hwloc_report_error_t HWLOC_NAME(report_error_t)
|
||||
#define hwloc_report_os_error HWLOC_NAME(report_os_error)
|
||||
#define hwloc_hide_errors HWLOC_NAME(hide_errors)
|
||||
#define hwloc__insert_object_by_cpuset HWLOC_NAME(_insert_object_by_cpuset)
|
||||
#define hwloc_insert_object_by_parent HWLOC_NAME(insert_object_by_parent)
|
||||
|
@ -683,6 +734,7 @@ extern "C" {
|
|||
#define hwloc_cuda_component HWLOC_NAME(cuda_component)
|
||||
#define hwloc_gl_component HWLOC_NAME(gl_component)
|
||||
#define hwloc_nvml_component HWLOC_NAME(nvml_component)
|
||||
#define hwloc_rsmi_component HWLOC_NAME(rsmi_component)
|
||||
#define hwloc_opencl_component HWLOC_NAME(opencl_component)
|
||||
#define hwloc_pci_component HWLOC_NAME(pci_component)
|
||||
|
||||
|
@ -691,6 +743,8 @@ extern "C" {
|
|||
|
||||
/* private/private.h */
|
||||
|
||||
#define hwloc_internal_location_s HWLOC_NAME(internal_location_s)
|
||||
|
||||
#define hwloc_special_level_s HWLOC_NAME(special_level_s)
|
||||
|
||||
#define hwloc_pci_forced_locality_s HWLOC_NAME(pci_forced_locality_s)
|
||||
|
@ -713,6 +767,8 @@ extern "C" {
|
|||
|
||||
#define hwloc__attach_memory_object HWLOC_NAME(insert_memory_object)
|
||||
|
||||
#define hwloc_get_obj_by_type_and_gp_index HWLOC_NAME(get_obj_by_type_and_gp_index)
|
||||
|
||||
#define hwloc_pci_discovery_init HWLOC_NAME(pci_discovery_init)
|
||||
#define hwloc_pci_discovery_prepare HWLOC_NAME(pci_discovery_prepare)
|
||||
#define hwloc_pci_discovery_exit HWLOC_NAME(pci_discovery_exit)
|
||||
|
@ -723,6 +779,7 @@ extern "C" {
|
|||
#define hwloc__add_info_nodup HWLOC_NAME(_add_info_nodup)
|
||||
#define hwloc__move_infos HWLOC_NAME(_move_infos)
|
||||
#define hwloc__free_infos HWLOC_NAME(_free_infos)
|
||||
#define hwloc__tma_dup_infos HWLOC_NAME(_tma_dup_infos)
|
||||
|
||||
#define hwloc_binding_hooks HWLOC_NAME(binding_hooks)
|
||||
#define hwloc_set_native_binding_hooks HWLOC_NAME(set_native_binding_hooks)
|
||||
|
@ -764,6 +821,24 @@ extern "C" {
|
|||
#define hwloc_internal_distances_add_by_index HWLOC_NAME(internal_distances_add_by_index)
|
||||
#define hwloc_internal_distances_invalidate_cached_objs HWLOC_NAME(hwloc_internal_distances_invalidate_cached_objs)
|
||||
|
||||
#define hwloc_internal_memattr_s HWLOC_NAME(internal_memattr_s)
|
||||
#define hwloc_internal_memattr_target_s HWLOC_NAME(internal_memattr_target_s)
|
||||
#define hwloc_internal_memattr_initiator_s HWLOC_NAME(internal_memattr_initiator_s)
|
||||
#define hwloc_internal_memattrs_init HWLOC_NAME(internal_memattrs_init)
|
||||
#define hwloc_internal_memattrs_prepare HWLOC_NAME(internal_memattrs_prepare)
|
||||
#define hwloc_internal_memattrs_dup HWLOC_NAME(internal_memattrs_dup)
|
||||
#define hwloc_internal_memattrs_destroy HWLOC_NAME(internal_memattrs_destroy)
|
||||
#define hwloc_internal_memattrs_need_refresh HWLOC_NAME(internal_memattrs_need_refresh)
|
||||
#define hwloc_internal_memattrs_refresh HWLOC_NAME(internal_memattrs_refresh)
|
||||
|
||||
#define hwloc_internal_cpukind_s HWLOC_NAME(internal_cpukind_s)
|
||||
#define hwloc_internal_cpukinds_init HWLOC_NAME(internal_cpukinds_init)
|
||||
#define hwloc_internal_cpukinds_destroy HWLOC_NAME(internal_cpukinds_destroy)
|
||||
#define hwloc_internal_cpukinds_dup HWLOC_NAME(internal_cpukinds_dup)
|
||||
#define hwloc_internal_cpukinds_register HWLOC_NAME(internal_cpukinds_register)
|
||||
#define hwloc_internal_cpukinds_rank HWLOC_NAME(internal_cpukinds_rank)
|
||||
#define hwloc_internal_cpukinds_restrict HWLOC_NAME(internal_cpukinds_restrict)
|
||||
|
||||
#define hwloc_encode_to_base64 HWLOC_NAME(encode_to_base64)
|
||||
#define hwloc_decode_from_base64 HWLOC_NAME(decode_from_base64)
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
/*
|
||||
* Copyright © 2009, 2011, 2012 CNRS. All rights reserved.
|
||||
* Copyright © 2009-2018 Inria. All rights reserved.
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009, 2011, 2012, 2015 Université Bordeaux. All rights reserved.
|
||||
* Copyright © 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
|
@ -575,7 +575,7 @@
|
|||
#define PACKAGE "hwloc"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT "http://www.open-mpi.org/projects/hwloc/"
|
||||
#define PACKAGE_BUGREPORT "https://www.open-mpi.org/projects/hwloc/"
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "hwloc"
|
||||
|
@ -668,5 +668,9 @@
|
|||
/* Define this to the thread ID type */
|
||||
#define hwloc_thread_t HANDLE
|
||||
|
||||
/* Define to 1 if you have the declaration of `GetModuleFileName', and to 0 if
|
||||
you don't. */
|
||||
#define HAVE_DECL_GETMODULEFILENAME 1
|
||||
|
||||
|
||||
#endif /* HWLOC_CONFIGURE_H */
|
||||
|
|
20
src/3rdparty/hwloc/include/private/debug.h
vendored
20
src/3rdparty/hwloc/include/private/debug.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2017 Inria. All rights reserved.
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009, 2011 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -19,6 +19,10 @@
|
|||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#ifdef ANDROID
|
||||
extern void JNIDebug(char *text);
|
||||
#endif
|
||||
|
||||
/* Compile-time assertion */
|
||||
#define HWLOC_BUILD_ASSERT(condition) ((void)sizeof(char[1 - 2*!(condition)]))
|
||||
|
||||
|
@ -44,9 +48,17 @@ static __hwloc_inline void hwloc_debug(const char *s __hwloc_attribute_unused, .
|
|||
{
|
||||
#ifdef HWLOC_DEBUG
|
||||
if (hwloc_debug_enabled()) {
|
||||
#ifdef ANDROID
|
||||
char buffer[256];
|
||||
#endif
|
||||
va_list ap;
|
||||
va_start(ap, s);
|
||||
#ifdef ANDROID
|
||||
vsprintf(buffer, s, ap);
|
||||
JNIDebug(buffer);
|
||||
#else
|
||||
vfprintf(stderr, s, ap);
|
||||
#endif
|
||||
va_end(ap);
|
||||
}
|
||||
#endif
|
||||
|
@ -57,21 +69,21 @@ static __hwloc_inline void hwloc_debug(const char *s __hwloc_attribute_unused, .
|
|||
if (hwloc_debug_enabled()) { \
|
||||
char *s; \
|
||||
hwloc_bitmap_asprintf(&s, bitmap); \
|
||||
fprintf(stderr, fmt, s); \
|
||||
hwloc_debug(fmt, s); \
|
||||
free(s); \
|
||||
} } while (0)
|
||||
#define hwloc_debug_1arg_bitmap(fmt, arg1, bitmap) do { \
|
||||
if (hwloc_debug_enabled()) { \
|
||||
char *s; \
|
||||
hwloc_bitmap_asprintf(&s, bitmap); \
|
||||
fprintf(stderr, fmt, arg1, s); \
|
||||
hwloc_debug(fmt, arg1, s); \
|
||||
free(s); \
|
||||
} } while (0)
|
||||
#define hwloc_debug_2args_bitmap(fmt, arg1, arg2, bitmap) do { \
|
||||
if (hwloc_debug_enabled()) { \
|
||||
char *s; \
|
||||
hwloc_bitmap_asprintf(&s, bitmap); \
|
||||
fprintf(stderr, fmt, arg1, arg2, s); \
|
||||
hwloc_debug(fmt, arg1, arg2, s); \
|
||||
free(s); \
|
||||
} } while (0)
|
||||
#else
|
||||
|
|
|
@ -30,6 +30,7 @@ HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component;
|
|||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_rsmi_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component;
|
||||
|
||||
|
|
82
src/3rdparty/hwloc/include/private/private.h
vendored
82
src/3rdparty/hwloc/include/private/private.h
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2019 Inria. All rights reserved.
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012, 2020 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
*
|
||||
|
@ -40,7 +40,19 @@
|
|||
#endif
|
||||
#include <string.h>
|
||||
|
||||
#define HWLOC_TOPOLOGY_ABI 0x20100 /* version of the layout of struct topology */
|
||||
#define HWLOC_TOPOLOGY_ABI 0x20400 /* version of the layout of struct topology */
|
||||
|
||||
struct hwloc_internal_location_s {
|
||||
enum hwloc_location_type_e type;
|
||||
union {
|
||||
struct {
|
||||
hwloc_obj_t obj; /* cached between refreshes */
|
||||
uint64_t gp_index;
|
||||
hwloc_obj_type_t type;
|
||||
} object; /* if type == HWLOC_LOCATION_TYPE_OBJECT */
|
||||
hwloc_cpuset_t cpuset; /* if type == HWLOC_LOCATION_TYPE_CPUSET */
|
||||
} location;
|
||||
};
|
||||
|
||||
/*****************************************************
|
||||
* WARNING:
|
||||
|
@ -163,6 +175,50 @@ struct hwloc_topology {
|
|||
} *first_dist, *last_dist;
|
||||
unsigned next_dist_id;
|
||||
|
||||
/* memory attributes */
|
||||
unsigned nr_memattrs;
|
||||
struct hwloc_internal_memattr_s {
|
||||
/* memattr info */
|
||||
char *name; /* TODO unit is implicit, in the documentation of standard attributes, or in the name? */
|
||||
unsigned long flags;
|
||||
#define HWLOC_IMATTR_FLAG_STATIC_NAME (1U<<0) /* no need to free name */
|
||||
#define HWLOC_IMATTR_FLAG_CACHE_VALID (1U<<1) /* target and initiator are valid */
|
||||
#define HWLOC_IMATTR_FLAG_CONVENIENCE (1U<<2) /* convenience attribute reporting values from non-memattr attributes (R/O and no actual targets stored) */
|
||||
unsigned iflags;
|
||||
|
||||
/* array of values */
|
||||
unsigned nr_targets;
|
||||
struct hwloc_internal_memattr_target_s {
|
||||
/* target object */
|
||||
hwloc_obj_t obj; /* cached between refreshes */
|
||||
hwloc_obj_type_t type;
|
||||
unsigned os_index; /* only used temporarily during discovery when there's no obj/gp_index yet */
|
||||
hwloc_uint64_t gp_index;
|
||||
|
||||
/* value if there are no initiator for this attr */
|
||||
hwloc_uint64_t noinitiator_value;
|
||||
/* initiators otherwise */
|
||||
unsigned nr_initiators;
|
||||
struct hwloc_internal_memattr_initiator_s {
|
||||
struct hwloc_internal_location_s initiator;
|
||||
hwloc_uint64_t value;
|
||||
} *initiators;
|
||||
} *targets;
|
||||
} *memattrs;
|
||||
|
||||
/* hybridcpus */
|
||||
unsigned nr_cpukinds;
|
||||
unsigned nr_cpukinds_allocated;
|
||||
struct hwloc_internal_cpukind_s {
|
||||
hwloc_cpuset_t cpuset;
|
||||
#define HWLOC_CPUKIND_EFFICIENCY_UNKNOWN -1
|
||||
int efficiency;
|
||||
int forced_efficiency; /* returned by the hardware or OS if any */
|
||||
hwloc_uint64_t ranking_value; /* internal value for ranking */
|
||||
unsigned nr_infos;
|
||||
struct hwloc_info_s *infos;
|
||||
} *cpukinds;
|
||||
|
||||
int grouping;
|
||||
int grouping_verbose;
|
||||
unsigned grouping_nbaccuracies;
|
||||
|
@ -240,8 +296,9 @@ extern void hwloc_topology_clear(struct hwloc_topology *topology);
|
|||
|
||||
/* insert memory object as memory child of normal parent */
|
||||
extern struct hwloc_obj * hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent,
|
||||
hwloc_obj_t obj,
|
||||
hwloc_report_error_t report_error);
|
||||
hwloc_obj_t obj, const char *reason);
|
||||
|
||||
extern hwloc_obj_t hwloc_get_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index);
|
||||
|
||||
extern void hwloc_pci_discovery_init(struct hwloc_topology *topology);
|
||||
extern void hwloc_pci_discovery_prepare(struct hwloc_topology *topology);
|
||||
|
@ -261,6 +318,7 @@ extern hwloc_obj_t hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_t
|
|||
extern int hwloc__add_info(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value);
|
||||
extern int hwloc__add_info_nodup(struct hwloc_info_s **infosp, unsigned *countp, const char *name, const char *value, int replace);
|
||||
extern int hwloc__move_infos(struct hwloc_info_s **dst_infosp, unsigned *dst_countp, struct hwloc_info_s **src_infosp, unsigned *src_countp);
|
||||
extern int hwloc__tma_dup_infos(struct hwloc_tma *tma, struct hwloc_info_s **dst_infosp, unsigned *dst_countp, struct hwloc_info_s *src_infos, unsigned src_count);
|
||||
extern void hwloc__free_infos(struct hwloc_info_s *infos, unsigned count);
|
||||
|
||||
/* set native OS binding hooks */
|
||||
|
@ -354,6 +412,22 @@ extern int hwloc_internal_distances_add(hwloc_topology_t topology, const char *n
|
|||
extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags);
|
||||
extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology);
|
||||
|
||||
extern void hwloc_internal_memattrs_init(hwloc_topology_t topology);
|
||||
extern void hwloc_internal_memattrs_prepare(hwloc_topology_t topology);
|
||||
extern void hwloc_internal_memattrs_destroy(hwloc_topology_t topology);
|
||||
extern void hwloc_internal_memattrs_need_refresh(hwloc_topology_t topology);
|
||||
extern void hwloc_internal_memattrs_refresh(hwloc_topology_t topology);
|
||||
extern int hwloc_internal_memattrs_dup(hwloc_topology_t new, hwloc_topology_t old);
|
||||
extern int hwloc_internal_memattr_set_value(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_type_t target_type, hwloc_uint64_t target_gp_index, unsigned target_os_index, struct hwloc_internal_location_s *initiator, hwloc_uint64_t value);
|
||||
|
||||
extern void hwloc_internal_cpukinds_init(hwloc_topology_t topology);
|
||||
extern int hwloc_internal_cpukinds_rank(hwloc_topology_t topology);
|
||||
extern void hwloc_internal_cpukinds_destroy(hwloc_topology_t topology);
|
||||
extern int hwloc_internal_cpukinds_dup(hwloc_topology_t new, hwloc_topology_t old);
|
||||
#define HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY (1<<0)
|
||||
extern int hwloc_internal_cpukinds_register(hwloc_topology_t topology, hwloc_cpuset_t cpuset, int forced_efficiency, const struct hwloc_info_s *infos, unsigned nr_infos, unsigned long flags);
|
||||
extern void hwloc_internal_cpukinds_restrict(hwloc_topology_t topology);
|
||||
|
||||
/* encode src buffer into target buffer.
|
||||
* targsize must be at least 4*((srclength+2)/3)+1.
|
||||
* target will be 0-terminated.
|
||||
|
|
2
src/3rdparty/hwloc/include/private/xml.h
vendored
2
src/3rdparty/hwloc/include/private/xml.h
vendored
|
@ -46,7 +46,7 @@ struct hwloc_xml_backend_data_s {
|
|||
int (*find_child)(struct hwloc__xml_import_state_s * state, struct hwloc__xml_import_state_s * childstate, char **tagp);
|
||||
int (*close_tag)(struct hwloc__xml_import_state_s * state); /* look for an explicit closing tag </name> */
|
||||
void (*close_child)(struct hwloc__xml_import_state_s * state);
|
||||
int (*get_content)(struct hwloc__xml_import_state_s * state, char **beginp, size_t expected_length); /* return 0 on empty content (and sets beginp to empty string), 1 on actual content, -1 on error or unexpected content length */
|
||||
int (*get_content)(struct hwloc__xml_import_state_s * state, const char **beginp, size_t expected_length); /* return 0 on empty content (and sets beginp to empty string), 1 on actual content, -1 on error or unexpected content length */
|
||||
void (*close_content)(struct hwloc__xml_import_state_s * state);
|
||||
char * msgprefix;
|
||||
void *data; /* libxml2 doc, or nolibxml buffer */
|
||||
|
|
3
src/3rdparty/hwloc/src/bind.c
vendored
3
src/3rdparty/hwloc/src/bind.c
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2019 Inria. All rights reserved.
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010, 2012 Université Bordeaux
|
||||
* Copyright © 2011-2015 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -921,5 +921,6 @@ hwloc_set_binding_hooks(struct hwloc_topology *topology)
|
|||
DO(mem,get_area_membind);
|
||||
DO(mem,get_area_memlocation);
|
||||
DO(mem,alloc_membind);
|
||||
#undef DO
|
||||
}
|
||||
}
|
||||
|
|
4
src/3rdparty/hwloc/src/bitmap.c
vendored
4
src/3rdparty/hwloc/src/bitmap.c
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2018 Inria. All rights reserved.
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -818,7 +818,7 @@ int hwloc_bitmap_nr_ulongs(const struct hwloc_bitmap_s *set)
|
|||
return -1;
|
||||
|
||||
last = hwloc_bitmap_last(set);
|
||||
return (last + HWLOC_BITS_PER_LONG-1)/HWLOC_BITS_PER_LONG;
|
||||
return (last + HWLOC_BITS_PER_LONG)/HWLOC_BITS_PER_LONG;
|
||||
}
|
||||
|
||||
int hwloc_bitmap_only(struct hwloc_bitmap_s * set, unsigned cpu)
|
||||
|
|
649
src/3rdparty/hwloc/src/cpukinds.c
vendored
Normal file
649
src/3rdparty/hwloc/src/cpukinds.c
vendored
Normal file
|
@ -0,0 +1,649 @@
|
|||
/*
|
||||
* Copyright © 2020 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
#include "private/autogen/config.h"
|
||||
#include "hwloc.h"
|
||||
#include "private/private.h"
|
||||
#include "private/debug.h"
|
||||
|
||||
|
||||
/*****************
|
||||
* Basics
|
||||
*/
|
||||
|
||||
void
|
||||
hwloc_internal_cpukinds_init(struct hwloc_topology *topology)
|
||||
{
|
||||
topology->cpukinds = NULL;
|
||||
topology->nr_cpukinds = 0;
|
||||
topology->nr_cpukinds_allocated = 0;
|
||||
}
|
||||
|
||||
void
|
||||
hwloc_internal_cpukinds_destroy(struct hwloc_topology *topology)
|
||||
{
|
||||
unsigned i;
|
||||
for(i=0; i<topology->nr_cpukinds; i++) {
|
||||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
|
||||
hwloc_bitmap_free(kind->cpuset);
|
||||
hwloc__free_infos(kind->infos, kind->nr_infos);
|
||||
}
|
||||
free(topology->cpukinds);
|
||||
topology->cpukinds = NULL;
|
||||
topology->nr_cpukinds = 0;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_internal_cpukinds_dup(hwloc_topology_t new, hwloc_topology_t old)
|
||||
{
|
||||
struct hwloc_tma *tma = new->tma;
|
||||
struct hwloc_internal_cpukind_s *kinds;
|
||||
unsigned i;
|
||||
|
||||
kinds = hwloc_tma_malloc(tma, old->nr_cpukinds * sizeof(*kinds));
|
||||
if (!kinds)
|
||||
return -1;
|
||||
new->cpukinds = kinds;
|
||||
new->nr_cpukinds = old->nr_cpukinds;
|
||||
memcpy(kinds, old->cpukinds, old->nr_cpukinds * sizeof(*kinds));
|
||||
|
||||
for(i=0;i<old->nr_cpukinds; i++) {
|
||||
kinds[i].cpuset = hwloc_bitmap_tma_dup(tma, old->cpukinds[i].cpuset);
|
||||
if (!kinds[i].cpuset) {
|
||||
new->nr_cpukinds = i;
|
||||
goto failed;
|
||||
}
|
||||
if (hwloc__tma_dup_infos(tma,
|
||||
&kinds[i].infos, &kinds[i].nr_infos,
|
||||
old->cpukinds[i].infos, old->cpukinds[i].nr_infos) < 0) {
|
||||
assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */
|
||||
hwloc_bitmap_free(kinds[i].cpuset);
|
||||
new->nr_cpukinds = i;
|
||||
goto failed;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
failed:
|
||||
hwloc_internal_cpukinds_destroy(new);
|
||||
return -1;
|
||||
}
|
||||
|
||||
void
|
||||
hwloc_internal_cpukinds_restrict(hwloc_topology_t topology)
|
||||
{
|
||||
unsigned i;
|
||||
int removed = 0;
|
||||
for(i=0; i<topology->nr_cpukinds; i++) {
|
||||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
|
||||
hwloc_bitmap_and(kind->cpuset, kind->cpuset, hwloc_get_root_obj(topology)->cpuset);
|
||||
if (hwloc_bitmap_iszero(kind->cpuset)) {
|
||||
hwloc_bitmap_free(kind->cpuset);
|
||||
hwloc__free_infos(kind->infos, kind->nr_infos);
|
||||
memmove(kind, kind+1, (topology->nr_cpukinds - i - 1)*sizeof(*kind));
|
||||
i--;
|
||||
topology->nr_cpukinds--;
|
||||
removed = 1;
|
||||
}
|
||||
}
|
||||
if (removed)
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
}
|
||||
|
||||
|
||||
/********************
|
||||
* Registering
|
||||
*/
|
||||
|
||||
static __hwloc_inline int
|
||||
hwloc__cpukind_check_duplicate_info(struct hwloc_internal_cpukind_s *kind,
|
||||
const char *name, const char *value)
|
||||
{
|
||||
unsigned i;
|
||||
for(i=0; i<kind->nr_infos; i++)
|
||||
if (!strcmp(kind->infos[i].name, name)
|
||||
&& !strcmp(kind->infos[i].value, value))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __hwloc_inline void
|
||||
hwloc__cpukind_add_infos(struct hwloc_internal_cpukind_s *kind,
|
||||
const struct hwloc_info_s *infos, unsigned nr_infos)
|
||||
{
|
||||
unsigned i;
|
||||
for(i=0; i<nr_infos; i++) {
|
||||
if (hwloc__cpukind_check_duplicate_info(kind, infos[i].name, infos[i].value))
|
||||
continue;
|
||||
hwloc__add_info(&kind->infos, &kind->nr_infos, infos[i].name, infos[i].value);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_internal_cpukinds_register(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
|
||||
int forced_efficiency,
|
||||
const struct hwloc_info_s *infos, unsigned nr_infos,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct hwloc_internal_cpukind_s *kinds;
|
||||
unsigned i, max, bits, oldnr, newnr;
|
||||
|
||||
if (hwloc_bitmap_iszero(cpuset)) {
|
||||
hwloc_bitmap_free(cpuset);
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (flags & ~HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* TODO: for now, only windows provides a forced efficiency.
|
||||
* if another backend ever provides a conflicting value, the first backend value will be kept.
|
||||
* (user-provided values are not an issue, they are meant to overwrite)
|
||||
*/
|
||||
|
||||
/* If we have N kinds currently, we may need 2N+1 kinds after inserting the new one:
|
||||
* - each existing kind may get split into which PUs are in the new kind and which aren't.
|
||||
* - some PUs might not have been in any kind yet.
|
||||
*/
|
||||
max = 2 * topology->nr_cpukinds + 1;
|
||||
/* Allocate the power-of-two above 2N+1. */
|
||||
bits = hwloc_flsl(max-1) + 1;
|
||||
max = 1U<<bits;
|
||||
/* Allocate 8 minimum to avoid multiple reallocs */
|
||||
if (max < 8)
|
||||
max = 8;
|
||||
|
||||
/* Create or enlarge the array of kinds if needed */
|
||||
kinds = topology->cpukinds;
|
||||
if (max > topology->nr_cpukinds_allocated) {
|
||||
kinds = realloc(kinds, max * sizeof(*kinds));
|
||||
if (!kinds) {
|
||||
hwloc_bitmap_free(cpuset);
|
||||
return -1;
|
||||
}
|
||||
memset(&kinds[topology->nr_cpukinds_allocated], 0, (max - topology->nr_cpukinds_allocated) * sizeof(*kinds));
|
||||
topology->nr_cpukinds_allocated = max;
|
||||
topology->cpukinds = kinds;
|
||||
}
|
||||
|
||||
newnr = oldnr = topology->nr_cpukinds;
|
||||
for(i=0; i<oldnr; i++) {
|
||||
int res = hwloc_bitmap_compare_inclusion(cpuset, kinds[i].cpuset);
|
||||
if (res == HWLOC_BITMAP_INTERSECTS || res == HWLOC_BITMAP_INCLUDED) {
|
||||
/* new kind with intersection of cpusets and union of infos */
|
||||
kinds[newnr].cpuset = hwloc_bitmap_alloc();
|
||||
kinds[newnr].efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN;
|
||||
kinds[newnr].forced_efficiency = forced_efficiency;
|
||||
hwloc_bitmap_and(kinds[newnr].cpuset, cpuset, kinds[i].cpuset);
|
||||
hwloc__cpukind_add_infos(&kinds[newnr], kinds[i].infos, kinds[i].nr_infos);
|
||||
hwloc__cpukind_add_infos(&kinds[newnr], infos, nr_infos);
|
||||
/* remove cpuset PUs from the existing kind that we just split */
|
||||
hwloc_bitmap_andnot(kinds[i].cpuset, kinds[i].cpuset, kinds[newnr].cpuset);
|
||||
/* clear cpuset PUs that were taken care of */
|
||||
hwloc_bitmap_andnot(cpuset, cpuset, kinds[newnr].cpuset);
|
||||
|
||||
newnr++;
|
||||
|
||||
} else if (res == HWLOC_BITMAP_CONTAINS
|
||||
|| res == HWLOC_BITMAP_EQUAL) {
|
||||
/* append new info to existing smaller (or equal) kind */
|
||||
hwloc__cpukind_add_infos(&kinds[i], infos, nr_infos);
|
||||
if ((flags & HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY)
|
||||
|| kinds[i].forced_efficiency == HWLOC_CPUKIND_EFFICIENCY_UNKNOWN)
|
||||
kinds[i].forced_efficiency = forced_efficiency;
|
||||
/* clear cpuset PUs that were taken care of */
|
||||
hwloc_bitmap_andnot(cpuset, cpuset, kinds[i].cpuset);
|
||||
|
||||
} else {
|
||||
assert(res == HWLOC_BITMAP_DIFFERENT);
|
||||
/* nothing to do */
|
||||
}
|
||||
|
||||
/* don't compare with anything else if already empty */
|
||||
if (hwloc_bitmap_iszero(cpuset))
|
||||
break;
|
||||
}
|
||||
|
||||
/* add a final kind with remaining PUs if any */
|
||||
if (!hwloc_bitmap_iszero(cpuset)) {
|
||||
kinds[newnr].cpuset = cpuset;
|
||||
kinds[newnr].efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN;
|
||||
kinds[newnr].forced_efficiency = forced_efficiency;
|
||||
hwloc__cpukind_add_infos(&kinds[newnr], infos, nr_infos);
|
||||
newnr++;
|
||||
} else {
|
||||
hwloc_bitmap_free(cpuset);
|
||||
}
|
||||
|
||||
topology->nr_cpukinds = newnr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_cpukinds_register(hwloc_topology_t topology, hwloc_cpuset_t _cpuset,
|
||||
int forced_efficiency,
|
||||
unsigned nr_infos, struct hwloc_info_s *infos,
|
||||
unsigned long flags)
|
||||
{
|
||||
hwloc_bitmap_t cpuset;
|
||||
int err;
|
||||
|
||||
if (flags) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!_cpuset || hwloc_bitmap_iszero(_cpuset)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
cpuset = hwloc_bitmap_dup(_cpuset);
|
||||
if (!cpuset)
|
||||
return -1;
|
||||
|
||||
if (forced_efficiency < 0)
|
||||
forced_efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN;
|
||||
|
||||
err = hwloc_internal_cpukinds_register(topology, cpuset, forced_efficiency, infos, nr_infos, HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*********************
|
||||
* Ranking
|
||||
*/
|
||||
|
||||
static int
|
||||
hwloc__cpukinds_check_duplicate_rankings(struct hwloc_topology *topology)
|
||||
{
|
||||
unsigned i,j;
|
||||
for(i=0; i<topology->nr_cpukinds; i++)
|
||||
for(j=i+1; j<topology->nr_cpukinds; j++)
|
||||
if (topology->cpukinds[i].forced_efficiency == topology->cpukinds[j].forced_efficiency)
|
||||
/* if any duplicate, fail */
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__cpukinds_try_rank_by_forced_efficiency(struct hwloc_topology *topology)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
hwloc_debug("Trying to rank cpukinds by forced efficiency...\n");
|
||||
for(i=0; i<topology->nr_cpukinds; i++) {
|
||||
if (topology->cpukinds[i].forced_efficiency == HWLOC_CPUKIND_EFFICIENCY_UNKNOWN)
|
||||
/* if any unknown, fail */
|
||||
return -1;
|
||||
topology->cpukinds[i].ranking_value = topology->cpukinds[i].forced_efficiency;
|
||||
}
|
||||
|
||||
return hwloc__cpukinds_check_duplicate_rankings(topology);
|
||||
}
|
||||
|
||||
struct hwloc_cpukinds_info_summary {
|
||||
int have_max_freq;
|
||||
int have_base_freq;
|
||||
int have_intel_core_type;
|
||||
struct hwloc_cpukind_info_summary {
|
||||
unsigned intel_core_type; /* 1 for atom, 2 for core */
|
||||
unsigned max_freq, base_freq; /* MHz, hence < 100000 */
|
||||
} * summaries;
|
||||
};
|
||||
|
||||
static void
|
||||
hwloc__cpukinds_summarize_info(struct hwloc_topology *topology,
|
||||
struct hwloc_cpukinds_info_summary *summary)
|
||||
{
|
||||
unsigned i, j;
|
||||
|
||||
summary->have_max_freq = 1;
|
||||
summary->have_base_freq = 1;
|
||||
summary->have_intel_core_type = 1;
|
||||
|
||||
for(i=0; i<topology->nr_cpukinds; i++) {
|
||||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
|
||||
for(j=0; j<kind->nr_infos; j++) {
|
||||
struct hwloc_info_s *info = &kind->infos[j];
|
||||
if (!strcmp(info->name, "FrequencyMaxMHz")) {
|
||||
summary->summaries[i].max_freq = atoi(info->value);
|
||||
} else if (!strcmp(info->name, "FrequencyBaseMHz")) {
|
||||
summary->summaries[i].base_freq = atoi(info->value);
|
||||
} else if (!strcmp(info->name, "CoreType")) {
|
||||
if (!strcmp(info->value, "IntelAtom"))
|
||||
summary->summaries[i].intel_core_type = 1;
|
||||
else if (!strcmp(info->value, "IntelCore"))
|
||||
summary->summaries[i].intel_core_type = 2;
|
||||
}
|
||||
}
|
||||
hwloc_debug("cpukind #%u has intel_core_type %u max_freq %u base_freq %u\n",
|
||||
i, summary->summaries[i].intel_core_type,
|
||||
summary->summaries[i].max_freq, summary->summaries[i].base_freq);
|
||||
if (!summary->summaries[i].base_freq)
|
||||
summary->have_base_freq = 0;
|
||||
if (!summary->summaries[i].max_freq)
|
||||
summary->have_max_freq = 0;
|
||||
if (!summary->summaries[i].intel_core_type)
|
||||
summary->have_intel_core_type = 0;
|
||||
}
|
||||
}
|
||||
|
||||
enum hwloc_cpukinds_ranking {
|
||||
HWLOC_CPUKINDS_RANKING_DEFAULT, /* forced + frequency on ARM, forced + coretype_frequency otherwise */
|
||||
HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY, /* default without forced */
|
||||
HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY,
|
||||
HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY,
|
||||
HWLOC_CPUKINDS_RANKING_CORETYPE,
|
||||
HWLOC_CPUKINDS_RANKING_FREQUENCY,
|
||||
HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX,
|
||||
HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE,
|
||||
HWLOC_CPUKINDS_RANKING_NONE
|
||||
};
|
||||
|
||||
static int
|
||||
hwloc__cpukinds_try_rank_by_info(struct hwloc_topology *topology,
|
||||
enum hwloc_cpukinds_ranking heuristics,
|
||||
struct hwloc_cpukinds_info_summary *summary)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
if (HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY == heuristics) {
|
||||
hwloc_debug("Trying to rank cpukinds by coretype+frequency...\n");
|
||||
/* we need intel_core_type + (base or max freq) for all kinds */
|
||||
if (!summary->have_intel_core_type
|
||||
|| (!summary->have_max_freq && !summary->have_base_freq))
|
||||
return -1;
|
||||
/* rank first by coretype (Core>>Atom) then by frequency, base if available, max otherwise */
|
||||
for(i=0; i<topology->nr_cpukinds; i++) {
|
||||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
|
||||
if (summary->have_base_freq)
|
||||
kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].base_freq;
|
||||
else
|
||||
kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].max_freq;
|
||||
}
|
||||
|
||||
} else if (HWLOC_CPUKINDS_RANKING_CORETYPE == heuristics) {
|
||||
hwloc_debug("Trying to rank cpukinds by coretype...\n");
|
||||
/* we need intel_core_type */
|
||||
if (!summary->have_intel_core_type)
|
||||
return -1;
|
||||
/* rank by coretype (Core>>Atom) */
|
||||
for(i=0; i<topology->nr_cpukinds; i++) {
|
||||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
|
||||
kind->ranking_value = (summary->summaries[i].intel_core_type << 20);
|
||||
}
|
||||
|
||||
} else if (HWLOC_CPUKINDS_RANKING_FREQUENCY == heuristics) {
|
||||
hwloc_debug("Trying to rank cpukinds by frequency...\n");
|
||||
/* we need base or max freq for all kinds */
|
||||
if (!summary->have_max_freq && !summary->have_base_freq)
|
||||
return -1;
|
||||
/* rank first by frequency, base if available, max otherwise */
|
||||
for(i=0; i<topology->nr_cpukinds; i++) {
|
||||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
|
||||
if (summary->have_base_freq)
|
||||
kind->ranking_value = summary->summaries[i].base_freq;
|
||||
else
|
||||
kind->ranking_value = summary->summaries[i].max_freq;
|
||||
}
|
||||
|
||||
} else if (HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX == heuristics) {
|
||||
hwloc_debug("Trying to rank cpukinds by frequency max...\n");
|
||||
/* we need max freq for all kinds */
|
||||
if (!summary->have_max_freq)
|
||||
return -1;
|
||||
/* rank first by frequency, base if available, max otherwise */
|
||||
for(i=0; i<topology->nr_cpukinds; i++) {
|
||||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
|
||||
kind->ranking_value = summary->summaries[i].max_freq;
|
||||
}
|
||||
|
||||
} else if (HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE == heuristics) {
|
||||
hwloc_debug("Trying to rank cpukinds by frequency base...\n");
|
||||
/* we need max freq for all kinds */
|
||||
if (!summary->have_base_freq)
|
||||
return -1;
|
||||
/* rank first by frequency, base if available, max otherwise */
|
||||
for(i=0; i<topology->nr_cpukinds; i++) {
|
||||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
|
||||
kind->ranking_value = summary->summaries[i].base_freq;
|
||||
}
|
||||
|
||||
} else assert(0);
|
||||
|
||||
return hwloc__cpukinds_check_duplicate_rankings(topology);
|
||||
}
|
||||
|
||||
static int hwloc__cpukinds_compare_ranking_values(const void *_a, const void *_b)
|
||||
{
|
||||
const struct hwloc_internal_cpukind_s *a = _a;
|
||||
const struct hwloc_internal_cpukind_s *b = _b;
|
||||
return a->ranking_value - b->ranking_value;
|
||||
}
|
||||
|
||||
/* this function requires ranking values to be unique */
|
||||
static void
|
||||
hwloc__cpukinds_finalize_ranking(struct hwloc_topology *topology)
|
||||
{
|
||||
unsigned i;
|
||||
/* sort */
|
||||
qsort(topology->cpukinds, topology->nr_cpukinds, sizeof(*topology->cpukinds), hwloc__cpukinds_compare_ranking_values);
|
||||
/* define our own efficiency between 0 and N-1 */
|
||||
for(i=0; i<topology->nr_cpukinds; i++)
|
||||
topology->cpukinds[i].efficiency = i;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_internal_cpukinds_rank(struct hwloc_topology *topology)
|
||||
{
|
||||
enum hwloc_cpukinds_ranking heuristics;
|
||||
char *env;
|
||||
unsigned i;
|
||||
int err;
|
||||
|
||||
if (!topology->nr_cpukinds)
|
||||
return 0;
|
||||
|
||||
if (topology->nr_cpukinds == 1) {
|
||||
topology->cpukinds[0].efficiency = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_DEFAULT;
|
||||
env = getenv("HWLOC_CPUKINDS_RANKING");
|
||||
if (env) {
|
||||
if (!strcmp(env, "default"))
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_DEFAULT;
|
||||
else if (!strcmp(env, "none"))
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_NONE;
|
||||
else if (!strcmp(env, "coretype+frequency"))
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY;
|
||||
else if (!strcmp(env, "coretype"))
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE;
|
||||
else if (!strcmp(env, "frequency"))
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY;
|
||||
else if (!strcmp(env, "frequency_max"))
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX;
|
||||
else if (!strcmp(env, "frequency_base"))
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE;
|
||||
else if (!strcmp(env, "forced_efficiency"))
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY;
|
||||
else if (!strcmp(env, "no_forced_efficiency"))
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY;
|
||||
else if (!hwloc_hide_errors())
|
||||
fprintf(stderr, "Failed to recognize HWLOC_CPUKINDS_RANKING value %s\n", env);
|
||||
}
|
||||
|
||||
if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT
|
||||
|| heuristics == HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY) {
|
||||
/* default is forced_efficiency first */
|
||||
struct hwloc_cpukinds_info_summary summary;
|
||||
enum hwloc_cpukinds_ranking subheuristics;
|
||||
const char *arch;
|
||||
|
||||
if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT)
|
||||
hwloc_debug("Using default ranking strategy...\n");
|
||||
else
|
||||
hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env);
|
||||
|
||||
if (heuristics != HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY) {
|
||||
err = hwloc__cpukinds_try_rank_by_forced_efficiency(topology);
|
||||
if (!err)
|
||||
goto ready;
|
||||
}
|
||||
|
||||
summary.summaries = calloc(topology->nr_cpukinds, sizeof(*summary.summaries));
|
||||
if (!summary.summaries)
|
||||
goto failed;
|
||||
hwloc__cpukinds_summarize_info(topology, &summary);
|
||||
|
||||
arch = hwloc_obj_get_info_by_name(topology->levels[0][0], "Architecture");
|
||||
/* TODO: rather coretype_frequency only on x86/Intel? */
|
||||
if (arch && (!strncmp(arch, "arm", 3) || !strncmp(arch, "aarch", 5)))
|
||||
/* then frequency on ARM */
|
||||
subheuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY;
|
||||
else
|
||||
/* or coretype+frequency otherwise */
|
||||
subheuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY;
|
||||
|
||||
err = hwloc__cpukinds_try_rank_by_info(topology, subheuristics, &summary);
|
||||
free(summary.summaries);
|
||||
if (!err)
|
||||
goto ready;
|
||||
|
||||
} else if (heuristics == HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY) {
|
||||
hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env);
|
||||
|
||||
err = hwloc__cpukinds_try_rank_by_forced_efficiency(topology);
|
||||
if (!err)
|
||||
goto ready;
|
||||
|
||||
} else if (heuristics != HWLOC_CPUKINDS_RANKING_NONE) {
|
||||
/* custom heuristics */
|
||||
struct hwloc_cpukinds_info_summary summary;
|
||||
|
||||
hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env);
|
||||
|
||||
summary.summaries = calloc(topology->nr_cpukinds, sizeof(*summary.summaries));
|
||||
if (!summary.summaries)
|
||||
goto failed;
|
||||
hwloc__cpukinds_summarize_info(topology, &summary);
|
||||
|
||||
err = hwloc__cpukinds_try_rank_by_info(topology, heuristics, &summary);
|
||||
free(summary.summaries);
|
||||
if (!err)
|
||||
goto ready;
|
||||
}
|
||||
|
||||
failed:
|
||||
/* failed to rank, clear efficiencies */
|
||||
for(i=0; i<topology->nr_cpukinds; i++)
|
||||
topology->cpukinds[i].efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN;
|
||||
hwloc_debug("Failed to rank cpukinds.\n\n");
|
||||
return 0;
|
||||
|
||||
ready:
|
||||
for(i=0; i<topology->nr_cpukinds; i++)
|
||||
hwloc_debug("cpukind #%u got ranking value %llu\n", i, (unsigned long long) topology->cpukinds[i].ranking_value);
|
||||
hwloc__cpukinds_finalize_ranking(topology);
|
||||
#ifdef HWLOC_DEBUG
|
||||
for(i=0; i<topology->nr_cpukinds; i++)
|
||||
assert(topology->cpukinds[i].efficiency == (int) i);
|
||||
#endif
|
||||
hwloc_debug("\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*****************
|
||||
* Consulting
|
||||
*/
|
||||
|
||||
int
|
||||
hwloc_cpukinds_get_nr(hwloc_topology_t topology, unsigned long flags)
|
||||
{
|
||||
if (flags) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return topology->nr_cpukinds;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_cpukinds_get_info(hwloc_topology_t topology,
|
||||
unsigned id,
|
||||
hwloc_bitmap_t cpuset,
|
||||
int *efficiencyp,
|
||||
unsigned *nr_infosp, struct hwloc_info_s **infosp,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct hwloc_internal_cpukind_s *kind;
|
||||
|
||||
if (flags) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (id >= topology->nr_cpukinds) {
|
||||
errno = ENOENT;
|
||||
return -1;
|
||||
}
|
||||
|
||||
kind = &topology->cpukinds[id];
|
||||
|
||||
if (cpuset)
|
||||
hwloc_bitmap_copy(cpuset, kind->cpuset);
|
||||
|
||||
if (efficiencyp)
|
||||
*efficiencyp = kind->efficiency;
|
||||
|
||||
if (nr_infosp && infosp) {
|
||||
*nr_infosp = kind->nr_infos;
|
||||
*infosp = kind->infos;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_cpukinds_get_by_cpuset(hwloc_topology_t topology,
|
||||
hwloc_const_bitmap_t cpuset,
|
||||
unsigned long flags)
|
||||
{
|
||||
unsigned id;
|
||||
|
||||
if (flags) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!cpuset || hwloc_bitmap_iszero(cpuset)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for(id=0; id<topology->nr_cpukinds; id++) {
|
||||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[id];
|
||||
int res = hwloc_bitmap_compare_inclusion(cpuset, kind->cpuset);
|
||||
if (res == HWLOC_BITMAP_EQUAL || res == HWLOC_BITMAP_INCLUDED) {
|
||||
return (int) id;
|
||||
} else if (res == HWLOC_BITMAP_INTERSECTS || res == HWLOC_BITMAP_CONTAINS) {
|
||||
errno = EXDEV;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
errno = ENOENT;
|
||||
return -1;
|
||||
}
|
83
src/3rdparty/hwloc/src/diff.c
vendored
83
src/3rdparty/hwloc/src/diff.c
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2013-2019 Inria. All rights reserved.
|
||||
* Copyright © 2013-2020 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -333,10 +333,8 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1,
|
|||
|
||||
if (!err) {
|
||||
if (SETS_DIFFERENT(allowed_cpuset, topo1, topo2)
|
||||
|| SETS_DIFFERENT(allowed_nodeset, topo1, topo2)) {
|
||||
hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff);
|
||||
err = 1;
|
||||
}
|
||||
|| SETS_DIFFERENT(allowed_nodeset, topo1, topo2))
|
||||
goto roottoocomplex;
|
||||
}
|
||||
|
||||
if (!err) {
|
||||
|
@ -346,33 +344,78 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1,
|
|||
dist1 = topo1->first_dist;
|
||||
dist2 = topo2->first_dist;
|
||||
while (dist1 || dist2) {
|
||||
if (!!dist1 != !!dist2) {
|
||||
hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff);
|
||||
err = 1;
|
||||
break;
|
||||
}
|
||||
if (!!dist1 != !!dist2)
|
||||
goto roottoocomplex;
|
||||
if (dist1->unique_type != dist2->unique_type
|
||||
|| dist1->different_types || dist2->different_types /* too lazy to support this case */
|
||||
|| dist1->nbobjs != dist2->nbobjs
|
||||
|| dist1->kind != dist2->kind
|
||||
|| memcmp(dist1->values, dist2->values, dist1->nbobjs * dist1->nbobjs * sizeof(*dist1->values))) {
|
||||
hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff);
|
||||
err = 1;
|
||||
break;
|
||||
}
|
||||
|| memcmp(dist1->values, dist2->values, dist1->nbobjs * dist1->nbobjs * sizeof(*dist1->values)))
|
||||
goto roottoocomplex;
|
||||
for(i=0; i<dist1->nbobjs; i++)
|
||||
/* gp_index isn't enforced above. so compare logical_index instead, which is enforced. requires distances refresh() above */
|
||||
if (dist1->objs[i]->logical_index != dist2->objs[i]->logical_index) {
|
||||
hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff);
|
||||
err = 1;
|
||||
break;
|
||||
}
|
||||
if (dist1->objs[i]->logical_index != dist2->objs[i]->logical_index)
|
||||
goto roottoocomplex;
|
||||
dist1 = dist1->next;
|
||||
dist2 = dist2->next;
|
||||
}
|
||||
}
|
||||
|
||||
if (!err) {
|
||||
/* memattrs */
|
||||
hwloc_internal_memattrs_refresh(topo1);
|
||||
hwloc_internal_memattrs_refresh(topo2);
|
||||
if (topo1->nr_memattrs != topo2->nr_memattrs)
|
||||
goto roottoocomplex;
|
||||
for(i=0; i<topo1->nr_memattrs; i++) {
|
||||
struct hwloc_internal_memattr_s *imattr1 = &topo1->memattrs[i], *imattr2 = &topo2->memattrs[i];
|
||||
unsigned j;
|
||||
if (strcmp(imattr1->name, imattr2->name)
|
||||
|| imattr1->flags != imattr2->flags
|
||||
|| imattr1->nr_targets != imattr2->nr_targets)
|
||||
goto roottoocomplex;
|
||||
if (i == HWLOC_MEMATTR_ID_CAPACITY
|
||||
|| i == HWLOC_MEMATTR_ID_LOCALITY)
|
||||
/* no need to check virtual attributes, there were refreshed from other topology attributes, checked above */
|
||||
continue;
|
||||
for(j=0; j<imattr1->nr_targets; j++) {
|
||||
struct hwloc_internal_memattr_target_s *imtg1 = &imattr1->targets[j], *imtg2 = &imattr2->targets[j];
|
||||
if (imtg1->type != imtg2->type)
|
||||
goto roottoocomplex;
|
||||
if (imtg1->obj->logical_index != imtg2->obj->logical_index)
|
||||
goto roottoocomplex;
|
||||
if (imattr1->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) {
|
||||
unsigned k;
|
||||
for(k=0; k<imtg1->nr_initiators; k++) {
|
||||
struct hwloc_internal_memattr_initiator_s *imi1 = &imtg1->initiators[k], *imi2 = &imtg2->initiators[k];
|
||||
if (imi1->value != imi2->value
|
||||
|| imi1->initiator.type != imi2->initiator.type)
|
||||
goto roottoocomplex;
|
||||
if (imi1->initiator.type == HWLOC_LOCATION_TYPE_CPUSET) {
|
||||
if (!hwloc_bitmap_isequal(imi1->initiator.location.cpuset, imi2->initiator.location.cpuset))
|
||||
goto roottoocomplex;
|
||||
} else if (imi1->initiator.type == HWLOC_LOCATION_TYPE_OBJECT) {
|
||||
if (imi1->initiator.location.object.type != imi2->initiator.location.object.type)
|
||||
goto roottoocomplex;
|
||||
if (imi1->initiator.location.object.obj->logical_index != imi2->initiator.location.object.obj->logical_index)
|
||||
goto roottoocomplex;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (imtg1->noinitiator_value != imtg2->noinitiator_value)
|
||||
goto roottoocomplex;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
|
||||
roottoocomplex:
|
||||
hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/********************
|
||||
|
|
56
src/3rdparty/hwloc/src/distances.c
vendored
56
src/3rdparty/hwloc/src/distances.c
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2010-2019 Inria. All rights reserved.
|
||||
* Copyright © 2010-2020 Inria. All rights reserved.
|
||||
* Copyright © 2011-2012 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -526,36 +526,6 @@ int hwloc_distances_add(hwloc_topology_t topology,
|
|||
* Refresh objects in distances
|
||||
*/
|
||||
|
||||
static hwloc_obj_t hwloc_find_obj_by_depth_and_gp_index(hwloc_topology_t topology, unsigned depth, uint64_t gp_index)
|
||||
{
|
||||
hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0);
|
||||
while (obj) {
|
||||
if (obj->gp_index == gp_index)
|
||||
return obj;
|
||||
obj = obj->next_cousin;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index)
|
||||
{
|
||||
int depth = hwloc_get_type_depth(topology, type);
|
||||
if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
|
||||
return NULL;
|
||||
if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) {
|
||||
int topodepth = hwloc_topology_get_depth(topology);
|
||||
for(depth=0; depth<topodepth; depth++) {
|
||||
if (hwloc_get_depth_type(topology, depth) == type) {
|
||||
hwloc_obj_t obj = hwloc_find_obj_by_depth_and_gp_index(topology, depth, gp_index);
|
||||
if (obj)
|
||||
return obj;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
return hwloc_find_obj_by_depth_and_gp_index(topology, depth, gp_index);
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc_internal_distances_restrict(hwloc_obj_t *objs,
|
||||
uint64_t *indexes,
|
||||
|
@ -612,7 +582,7 @@ hwloc_internal_distances_refresh_one(hwloc_topology_t topology,
|
|||
else
|
||||
abort();
|
||||
} else {
|
||||
obj = hwloc_find_obj_by_type_and_gp_index(topology, different_types ? different_types[i] : unique_type, indexes[i]);
|
||||
obj = hwloc_get_obj_by_type_and_gp_index(topology, different_types ? different_types[i] : unique_type, indexes[i]);
|
||||
}
|
||||
objs[i] = obj;
|
||||
if (!obj)
|
||||
|
@ -874,26 +844,6 @@ hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
|
|||
* Grouping objects according to distances
|
||||
*/
|
||||
|
||||
static void hwloc_report_user_distance_error(const char *msg, int line)
|
||||
{
|
||||
static int reported = 0;
|
||||
|
||||
if (!reported && !hwloc_hide_errors()) {
|
||||
fprintf(stderr, "****************************************************************************\n");
|
||||
fprintf(stderr, "* hwloc %s was given invalid distances by the user.\n", HWLOC_VERSION);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* %s\n", msg);
|
||||
fprintf(stderr, "* Error occurred in topology.c line %d\n", line);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* Please make sure that distances given through the programming API\n");
|
||||
fprintf(stderr, "* do not contradict any other topology information.\n");
|
||||
fprintf(stderr, "* \n");
|
||||
fprintf(stderr, "* hwloc will now ignore this invalid topology information and continue.\n");
|
||||
fprintf(stderr, "****************************************************************************\n");
|
||||
reported = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static int hwloc_compare_values(uint64_t a, uint64_t b, float accuracy)
|
||||
{
|
||||
if (accuracy != 0.0f && fabsf((float)a-(float)b) < (float)a * accuracy)
|
||||
|
@ -1086,7 +1036,7 @@ hwloc__groups_by_distances(struct hwloc_topology *topology,
|
|||
hwloc_debug_1arg_bitmap("adding Group object with %u objects and cpuset %s\n",
|
||||
groupsizes[i], group_obj->cpuset);
|
||||
res_obj = hwloc__insert_object_by_cpuset(topology, NULL, group_obj,
|
||||
(kind & HWLOC_DISTANCES_KIND_FROM_USER) ? hwloc_report_user_distance_error : hwloc_report_os_error);
|
||||
(kind & HWLOC_DISTANCES_KIND_FROM_USER) ? "distances:fromuser:group" : "distances:group");
|
||||
/* res_obj may be NULL on failure to insert. */
|
||||
if (!res_obj)
|
||||
failed++;
|
||||
|
|
1197
src/3rdparty/hwloc/src/memattrs.c
vendored
Normal file
1197
src/3rdparty/hwloc/src/memattrs.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
4
src/3rdparty/hwloc/src/misc.c
vendored
4
src/3rdparty/hwloc/src/misc.c
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2018 Inria. All rights reserved.
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010 Université Bordeaux
|
||||
* Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -114,7 +114,7 @@ void hwloc_add_uname_info(struct hwloc_topology *topology __hwloc_attribute_unus
|
|||
char *
|
||||
hwloc_progname(struct hwloc_topology *topology __hwloc_attribute_unused)
|
||||
{
|
||||
#if HAVE_DECL_GETMODULEFILENAME
|
||||
#if (defined HAVE_DECL_GETMODULEFILENAME) && HAVE_DECL_GETMODULEFILENAME
|
||||
char name[256], *local_basename;
|
||||
unsigned res = GetModuleFileName(NULL, name, sizeof(name));
|
||||
if (res == sizeof(name) || !res)
|
||||
|
|
62
src/3rdparty/hwloc/src/pci-common.c
vendored
62
src/3rdparty/hwloc/src/pci-common.c
vendored
|
@ -232,7 +232,8 @@ enum hwloc_pci_busid_comparison_e {
|
|||
HWLOC_PCI_BUSID_LOWER,
|
||||
HWLOC_PCI_BUSID_HIGHER,
|
||||
HWLOC_PCI_BUSID_INCLUDED,
|
||||
HWLOC_PCI_BUSID_SUPERSET
|
||||
HWLOC_PCI_BUSID_SUPERSET,
|
||||
HWLOC_PCI_BUSID_EQUAL
|
||||
};
|
||||
|
||||
static enum hwloc_pci_busid_comparison_e
|
||||
|
@ -274,11 +275,8 @@ hwloc_pci_compare_busids(struct hwloc_obj *a, struct hwloc_obj *b)
|
|||
if (a->attr->pcidev.func > b->attr->pcidev.func)
|
||||
return HWLOC_PCI_BUSID_HIGHER;
|
||||
|
||||
/* Should never reach here. Abort on both debug builds and
|
||||
non-debug builds */
|
||||
assert(0);
|
||||
fprintf(stderr, "Bad assertion in hwloc %s:%d (aborting)\n", __FILE__, __LINE__);
|
||||
exit(1);
|
||||
/* Should never reach here. */
|
||||
return HWLOC_PCI_BUSID_EQUAL;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -329,6 +327,23 @@ hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_firs
|
|||
}
|
||||
return;
|
||||
}
|
||||
case HWLOC_PCI_BUSID_EQUAL: {
|
||||
static int reported = 0;
|
||||
if (!reported && !hwloc_hide_errors()) {
|
||||
fprintf(stderr, "*********************************************************\n");
|
||||
fprintf(stderr, "* hwloc %s received invalid PCI information.\n", HWLOC_VERSION);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* Trying to insert PCI object %04x:%02x:%02x.%01x at %04x:%02x:%02x.%01x\n",
|
||||
new->attr->pcidev.domain, new->attr->pcidev.bus, new->attr->pcidev.dev, new->attr->pcidev.func,
|
||||
(*curp)->attr->pcidev.domain, (*curp)->attr->pcidev.bus, (*curp)->attr->pcidev.dev, (*curp)->attr->pcidev.func);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* hwloc will now ignore this object and continue.\n");
|
||||
fprintf(stderr, "*********************************************************\n");
|
||||
reported = 1;
|
||||
}
|
||||
hwloc_free_unlinked_object(new);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* add to the end of the list if higher than everybody */
|
||||
|
@ -425,39 +440,10 @@ hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology,
|
|||
|
||||
static struct hwloc_obj *
|
||||
hwloc_pci_fixup_busid_parent(struct hwloc_topology *topology __hwloc_attribute_unused,
|
||||
struct hwloc_pcidev_attr_s *busid,
|
||||
struct hwloc_obj *parent)
|
||||
struct hwloc_pcidev_attr_s *busid __hwloc_attribute_unused,
|
||||
struct hwloc_obj *parent __hwloc_attribute_unused)
|
||||
{
|
||||
/* Xeon E5v3 in cluster-on-die mode only have PCI on the first NUMA node of each package.
|
||||
* but many dual-processor host report the second PCI hierarchy on 2nd NUMA of first package.
|
||||
*/
|
||||
if (parent->depth >= 2
|
||||
&& parent->type == HWLOC_OBJ_NUMANODE
|
||||
&& parent->sibling_rank == 1 && parent->parent->arity == 2
|
||||
&& parent->parent->type == HWLOC_OBJ_PACKAGE
|
||||
&& parent->parent->sibling_rank == 0 && parent->parent->parent->arity == 2) {
|
||||
const char *cpumodel = hwloc_obj_get_info_by_name(parent->parent, "CPUModel");
|
||||
if (cpumodel && strstr(cpumodel, "Xeon")) {
|
||||
if (!hwloc_hide_errors()) {
|
||||
fprintf(stderr, "****************************************************************************\n");
|
||||
fprintf(stderr, "* hwloc %s has encountered an incorrect PCI locality information.\n", HWLOC_VERSION);
|
||||
fprintf(stderr, "* PCI bus %04x:%02x is supposedly close to 2nd NUMA node of 1st package,\n",
|
||||
busid->domain, busid->bus);
|
||||
fprintf(stderr, "* however hwloc believes this is impossible on this architecture.\n");
|
||||
fprintf(stderr, "* Therefore the PCI bus will be moved to 1st NUMA node of 2nd package.\n");
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* If you feel this fixup is wrong, disable it by setting in your environment\n");
|
||||
fprintf(stderr, "* HWLOC_PCI_%04x_%02x_LOCALCPUS= (empty value), and report the problem\n",
|
||||
busid->domain, busid->bus);
|
||||
fprintf(stderr, "* to the hwloc's user mailing list together with the XML output of lstopo.\n");
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* You may silence this message by setting HWLOC_HIDE_ERRORS=1 in your environment.\n");
|
||||
fprintf(stderr, "****************************************************************************\n");
|
||||
}
|
||||
return parent->parent->next_sibling->first_child;
|
||||
}
|
||||
}
|
||||
|
||||
/* no quirk for now */
|
||||
return parent;
|
||||
}
|
||||
|
||||
|
|
12
src/3rdparty/hwloc/src/shmem.c
vendored
12
src/3rdparty/hwloc/src/shmem.c
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2017-2019 Inria. All rights reserved.
|
||||
* Copyright © 2017-2020 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
|
@ -97,6 +97,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology,
|
|||
* without being able to free() them.
|
||||
*/
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
|
||||
header.header_version = HWLOC_SHMEM_HEADER_VERSION;
|
||||
header.header_length = sizeof(header);
|
||||
|
@ -134,8 +135,9 @@ hwloc_shmem_topology_write(hwloc_topology_t topology,
|
|||
|
||||
assert((char *)mmap_res <= (char *)mmap_address + length);
|
||||
|
||||
/* now refresh the new distances so that adopters can use them without refreshing the R/O shmem mapping */
|
||||
/* now refresh the new distances/memattrs so that adopters can use them without refreshing the R/O shmem mapping */
|
||||
hwloc_internal_distances_refresh(new);
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
|
||||
/* topology is saved, release resources now */
|
||||
munmap(mmap_address, length);
|
||||
|
@ -214,11 +216,13 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
|
|||
new->support.discovery = malloc(sizeof(*new->support.discovery));
|
||||
new->support.cpubind = malloc(sizeof(*new->support.cpubind));
|
||||
new->support.membind = malloc(sizeof(*new->support.membind));
|
||||
if (!new->support.discovery || !new->support.cpubind || !new->support.membind)
|
||||
new->support.misc = malloc(sizeof(*new->support.misc));
|
||||
if (!new->support.discovery || !new->support.cpubind || !new->support.membind || !new->support.misc)
|
||||
goto out_with_support;
|
||||
memcpy(new->support.discovery, old->support.discovery, sizeof(*new->support.discovery));
|
||||
memcpy(new->support.cpubind, old->support.cpubind, sizeof(*new->support.cpubind));
|
||||
memcpy(new->support.membind, old->support.membind, sizeof(*new->support.membind));
|
||||
memcpy(new->support.misc, old->support.misc, sizeof(*new->support.misc));
|
||||
hwloc_set_binding_hooks(new);
|
||||
/* clear userdata callbacks pointing to the writer process' functions */
|
||||
new->userdata_export_cb = NULL;
|
||||
|
@ -236,6 +240,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
|
|||
free(new->support.discovery);
|
||||
free(new->support.cpubind);
|
||||
free(new->support.membind);
|
||||
free(new->support.misc);
|
||||
free(new);
|
||||
out_with_components:
|
||||
hwloc_components_fini();
|
||||
|
@ -252,6 +257,7 @@ hwloc__topology_disadopt(hwloc_topology_t topology)
|
|||
free(topology->support.discovery);
|
||||
free(topology->support.cpubind);
|
||||
free(topology->support.membind);
|
||||
free(topology->support.misc);
|
||||
free(topology);
|
||||
}
|
||||
|
||||
|
|
7
src/3rdparty/hwloc/src/static-components.h
vendored
7
src/3rdparty/hwloc/src/static-components.h
vendored
|
@ -1,9 +1,4 @@
|
|||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_noos_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_synthetic_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_nolibxml_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_windows_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component;
|
||||
#include <private/internal-components.h>
|
||||
static const struct hwloc_component * hwloc_static_components[] = {
|
||||
&hwloc_noos_component,
|
||||
&hwloc_xml_component,
|
||||
|
|
8
src/3rdparty/hwloc/src/topology-synthetic.c
vendored
8
src/3rdparty/hwloc/src/topology-synthetic.c
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2019 Inria. All rights reserved.
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -471,7 +471,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
|||
/* initialize parent arity to 0 so that the levels are not infinite */
|
||||
data->level[count-1].arity = 0;
|
||||
|
||||
while (*pos == ' ')
|
||||
while (*pos == ' ' || *pos == '\n')
|
||||
pos++;
|
||||
|
||||
if (!*pos)
|
||||
|
@ -912,7 +912,7 @@ hwloc_synthetic_insert_attached(struct hwloc_topology *topology,
|
|||
|
||||
hwloc_synthetic_set_attr(&attached->attr, child);
|
||||
|
||||
hwloc_insert_object_by_cpuset(topology, child);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, child, "synthetic:attached");
|
||||
|
||||
hwloc_synthetic_insert_attached(topology, data, attached->next, set);
|
||||
}
|
||||
|
@ -964,7 +964,7 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
|
|||
|
||||
hwloc_synthetic_set_attr(&curlevel->attr, obj);
|
||||
|
||||
hwloc_insert_object_by_cpuset(topology, obj);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "synthetic");
|
||||
}
|
||||
|
||||
hwloc_synthetic_insert_attached(topology, data, curlevel->attached, set);
|
||||
|
|
208
src/3rdparty/hwloc/src/topology-windows.c
vendored
208
src/3rdparty/hwloc/src/topology-windows.c
vendored
|
@ -93,9 +93,10 @@ typedef struct _GROUP_AFFINITY {
|
|||
#endif
|
||||
|
||||
#ifndef HAVE_PROCESSOR_RELATIONSHIP
|
||||
typedef struct _PROCESSOR_RELATIONSHIP {
|
||||
typedef struct HWLOC_PROCESSOR_RELATIONSHIP {
|
||||
BYTE Flags;
|
||||
BYTE Reserved[21];
|
||||
BYTE EfficiencyClass; /* for RelationProcessorCore, higher means greater performance but less efficiency, only available in Win10+ */
|
||||
BYTE Reserved[20];
|
||||
WORD GroupCount;
|
||||
GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY];
|
||||
} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP;
|
||||
|
@ -228,9 +229,12 @@ static PFN_VIRTUALFREEEX VirtualFreeExProc;
|
|||
typedef BOOL (WINAPI *PFN_QUERYWORKINGSETEX)(HANDLE hProcess, PVOID pv, DWORD cb);
|
||||
static PFN_QUERYWORKINGSETEX QueryWorkingSetExProc;
|
||||
|
||||
typedef NTSTATUS (WINAPI *PFN_RTLGETVERSION)(OSVERSIONINFOEX*);
|
||||
PFN_RTLGETVERSION RtlGetVersionProc;
|
||||
|
||||
static void hwloc_win_get_function_ptrs(void)
|
||||
{
|
||||
HMODULE kernel32;
|
||||
HMODULE kernel32, ntdll;
|
||||
|
||||
#if HWLOC_HAVE_GCC_W_CAST_FUNCTION_TYPE
|
||||
#pragma GCC diagnostic ignored "-Wcast-function-type"
|
||||
|
@ -275,6 +279,9 @@ static void hwloc_win_get_function_ptrs(void)
|
|||
QueryWorkingSetExProc = (PFN_QUERYWORKINGSETEX) GetProcAddress(psapi, "QueryWorkingSetEx");
|
||||
}
|
||||
|
||||
ntdll = GetModuleHandle("ntdll");
|
||||
RtlGetVersionProc = (PFN_RTLGETVERSION) GetProcAddress(ntdll, "RtlGetVersion");
|
||||
|
||||
#if HWLOC_HAVE_GCC_W_CAST_FUNCTION_TYPE
|
||||
#pragma GCC diagnostic warning "-Wcast-function-type"
|
||||
#endif
|
||||
|
@ -734,6 +741,88 @@ hwloc_win_get_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unuse
|
|||
}
|
||||
|
||||
|
||||
|
||||
/*************************
|
||||
* Efficiency classes
|
||||
*/
|
||||
|
||||
struct hwloc_win_efficiency_classes {
|
||||
unsigned nr_classes;
|
||||
unsigned nr_classes_allocated;
|
||||
struct hwloc_win_efficiency_class {
|
||||
unsigned value;
|
||||
hwloc_bitmap_t cpuset;
|
||||
} *classes;
|
||||
};
|
||||
|
||||
static void
|
||||
hwloc_win_efficiency_classes_init(struct hwloc_win_efficiency_classes *classes)
|
||||
{
|
||||
classes->classes = NULL;
|
||||
classes->nr_classes_allocated = 0;
|
||||
classes->nr_classes = 0;
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc_win_efficiency_classes_add(struct hwloc_win_efficiency_classes *classes,
|
||||
hwloc_const_bitmap_t cpuset,
|
||||
unsigned value)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
/* look for existing class with that efficiency value */
|
||||
for(i=0; i<classes->nr_classes; i++) {
|
||||
if (classes->classes[i].value == value) {
|
||||
hwloc_bitmap_or(classes->classes[i].cpuset, classes->classes[i].cpuset, cpuset);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* extend the array if needed */
|
||||
if (classes->nr_classes == classes->nr_classes_allocated) {
|
||||
struct hwloc_win_efficiency_class *tmp;
|
||||
unsigned new_nr_allocated = 2*classes->nr_classes_allocated;
|
||||
if (!new_nr_allocated) {
|
||||
#define HWLOC_WIN_EFFICIENCY_CLASSES_DEFAULT_MAX 4 /* 2 should be enough is most cases */
|
||||
new_nr_allocated = HWLOC_WIN_EFFICIENCY_CLASSES_DEFAULT_MAX;
|
||||
}
|
||||
tmp = realloc(classes->classes, new_nr_allocated * sizeof(*classes->classes));
|
||||
if (!tmp)
|
||||
return -1;
|
||||
classes->classes = tmp;
|
||||
classes->nr_classes_allocated = new_nr_allocated;
|
||||
}
|
||||
|
||||
/* add new class */
|
||||
classes->classes[classes->nr_classes].cpuset = hwloc_bitmap_alloc();
|
||||
if (!classes->classes[classes->nr_classes].cpuset)
|
||||
return -1;
|
||||
classes->classes[classes->nr_classes].value = value;
|
||||
hwloc_bitmap_copy(classes->classes[classes->nr_classes].cpuset, cpuset);
|
||||
classes->nr_classes++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc_win_efficiency_classes_register(hwloc_topology_t topology,
|
||||
struct hwloc_win_efficiency_classes *classes)
|
||||
{
|
||||
unsigned i;
|
||||
for(i=0; i<classes->nr_classes; i++) {
|
||||
hwloc_internal_cpukinds_register(topology, classes->classes[i].cpuset, classes->classes[i].value, NULL, 0, 0);
|
||||
classes->classes[i].cpuset = NULL; /* given to cpukinds */
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc_win_efficiency_classes_destroy(struct hwloc_win_efficiency_classes *classes)
|
||||
{
|
||||
unsigned i;
|
||||
for(i=0; i<classes->nr_classes; i++)
|
||||
hwloc_bitmap_free(classes->classes[i].cpuset);
|
||||
free(classes->classes);
|
||||
}
|
||||
|
||||
/*************************
|
||||
* discovery
|
||||
*/
|
||||
|
@ -753,6 +842,12 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||
DWORD length;
|
||||
int gotnuma = 0;
|
||||
int gotnumamemory = 0;
|
||||
OSVERSIONINFOEX osvi;
|
||||
char versionstr[20];
|
||||
char hostname[122] = "";
|
||||
unsigned hostname_size = sizeof(hostname);
|
||||
int has_efficiencyclass = 0;
|
||||
struct hwloc_win_efficiency_classes eclasses;
|
||||
|
||||
assert(dstatus->phase == HWLOC_DISC_PHASE_CPU);
|
||||
|
||||
|
@ -760,6 +855,25 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||
/* somebody discovered things */
|
||||
return -1;
|
||||
|
||||
ZeroMemory(&osvi, sizeof(OSVERSIONINFOEX));
|
||||
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
|
||||
|
||||
if (RtlGetVersionProc) {
|
||||
/* RtlGetVersion() returns the currently-running Windows version */
|
||||
RtlGetVersionProc(&osvi);
|
||||
} else {
|
||||
/* GetVersionEx() and isWindows10OrGreater() depend on what the manifest says
|
||||
* (manifest of the program, not of libhwloc.dll), they may return old versions
|
||||
* if the currently-running Windows is not listed in the manifest.
|
||||
*/
|
||||
GetVersionEx((LPOSVERSIONINFO)&osvi);
|
||||
}
|
||||
|
||||
if (osvi.dwMajorVersion >= 10) {
|
||||
has_efficiencyclass = 1;
|
||||
hwloc_win_efficiency_classes_init(&eclasses);
|
||||
}
|
||||
|
||||
hwloc_alloc_root_sets(topology->levels[0][0]);
|
||||
|
||||
GetSystemInfo(&SystemInfo);
|
||||
|
@ -887,7 +1001,7 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||
default:
|
||||
break;
|
||||
}
|
||||
hwloc_insert_object_by_cpuset(topology, obj);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformation");
|
||||
}
|
||||
|
||||
free(procInfo);
|
||||
|
@ -919,6 +1033,7 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||
(void*) procInfo < (void*) ((uintptr_t) procInfoTotal + length);
|
||||
procInfo = (void*) ((uintptr_t) procInfo + procInfo->Size)) {
|
||||
unsigned num, i;
|
||||
unsigned efficiency_class = 0;
|
||||
GROUP_AFFINITY *GroupMask;
|
||||
|
||||
/* Ignore unknown caches */
|
||||
|
@ -953,6 +1068,11 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||
type = HWLOC_OBJ_CORE;
|
||||
num = procInfo->Processor.GroupCount;
|
||||
GroupMask = procInfo->Processor.GroupMask;
|
||||
if (has_efficiencyclass)
|
||||
/* the EfficiencyClass field didn't exist before Windows10 and recent MSVC headers,
|
||||
* so just access it manually instead of trying to detect it.
|
||||
*/
|
||||
efficiency_class = * ((&procInfo->Processor.Flags) + 1);
|
||||
break;
|
||||
case RelationGroup:
|
||||
/* So strange an interface... */
|
||||
|
@ -981,7 +1101,7 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||
obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id);
|
||||
obj->cpuset = set;
|
||||
obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP;
|
||||
hwloc_insert_object_by_cpuset(topology, obj);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformation:ProcessorGroup");
|
||||
} else
|
||||
hwloc_bitmap_free(set);
|
||||
}
|
||||
|
@ -1005,6 +1125,11 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||
}
|
||||
hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_obj_type_string(type), id, obj->cpuset);
|
||||
switch (type) {
|
||||
case HWLOC_OBJ_CORE: {
|
||||
if (has_efficiencyclass)
|
||||
hwloc_win_efficiency_classes_add(&eclasses, obj->cpuset, efficiency_class);
|
||||
break;
|
||||
}
|
||||
case HWLOC_OBJ_NUMANODE:
|
||||
{
|
||||
ULONGLONG avail;
|
||||
|
@ -1055,7 +1180,7 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||
default:
|
||||
break;
|
||||
}
|
||||
hwloc_insert_object_by_cpuset(topology, obj);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformationEx");
|
||||
}
|
||||
free(procInfoTotal);
|
||||
}
|
||||
|
@ -1076,29 +1201,88 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
|||
hwloc_bitmap_only(obj->cpuset, idx);
|
||||
hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n",
|
||||
idx, obj->cpuset);
|
||||
hwloc_insert_object_by_cpuset(topology, obj);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:ProcessorGroup:pu");
|
||||
} hwloc_bitmap_foreach_end();
|
||||
hwloc_bitmap_free(groups_pu_set);
|
||||
} else {
|
||||
/* no processor groups */
|
||||
SYSTEM_INFO sysinfo;
|
||||
hwloc_obj_t obj;
|
||||
unsigned idx;
|
||||
GetSystemInfo(&sysinfo);
|
||||
for(idx=0; idx<32; idx++)
|
||||
if (sysinfo.dwActiveProcessorMask & (((DWORD_PTR)1)<<idx)) {
|
||||
if (SystemInfo.dwActiveProcessorMask & (((DWORD_PTR)1)<<idx)) {
|
||||
obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, idx);
|
||||
obj->cpuset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_only(obj->cpuset, idx);
|
||||
hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n",
|
||||
idx, obj->cpuset);
|
||||
hwloc_insert_object_by_cpuset(topology, obj);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:pu");
|
||||
}
|
||||
}
|
||||
|
||||
if (has_efficiencyclass) {
|
||||
topology->support.discovery->cpukind_efficiency = 1;
|
||||
hwloc_win_efficiency_classes_register(topology, &eclasses);
|
||||
}
|
||||
|
||||
out:
|
||||
if (has_efficiencyclass)
|
||||
hwloc_win_efficiency_classes_destroy(&eclasses);
|
||||
|
||||
/* emulate uname instead of calling hwloc_add_uname_info() */
|
||||
hwloc_obj_add_info(topology->levels[0][0], "Backend", "Windows");
|
||||
hwloc_add_uname_info(topology, NULL);
|
||||
hwloc_obj_add_info(topology->levels[0][0], "OSName", "Windows");
|
||||
|
||||
#if defined(__CYGWIN__)
|
||||
hwloc_obj_add_info(topology->levels[0][0], "WindowsBuildEnvironment", "Cygwin");
|
||||
#elif defined(__MINGW32__)
|
||||
hwloc_obj_add_info(topology->levels[0][0], "WindowsBuildEnvironment", "MinGW");
|
||||
#endif
|
||||
|
||||
/* see https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-osversioninfoexa */
|
||||
if (osvi.dwMajorVersion == 10) {
|
||||
if (osvi.dwMinorVersion == 0)
|
||||
hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "10");
|
||||
} else if (osvi.dwMajorVersion == 6) {
|
||||
if (osvi.dwMinorVersion == 3)
|
||||
hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "8.1"); /* or "Server 2012 R2" */
|
||||
else if (osvi.dwMinorVersion == 2)
|
||||
hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "8"); /* or "Server 2012" */
|
||||
else if (osvi.dwMinorVersion == 1)
|
||||
hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "7"); /* or "Server 2008 R2" */
|
||||
else if (osvi.dwMinorVersion == 0)
|
||||
hwloc_obj_add_info(topology->levels[0][0], "OSRelease", "Vista"); /* or "Server 2008" */
|
||||
} /* earlier versions are ignored */
|
||||
|
||||
snprintf(versionstr, sizeof(versionstr), "%u.%u.%u", osvi.dwMajorVersion, osvi.dwMinorVersion, osvi.dwBuildNumber);
|
||||
hwloc_obj_add_info(topology->levels[0][0], "OSVersion", versionstr);
|
||||
|
||||
#if !defined(__CYGWIN__)
|
||||
GetComputerName(hostname, &hostname_size);
|
||||
#else
|
||||
gethostname(hostname, hostname_size);
|
||||
#endif
|
||||
if (*hostname)
|
||||
hwloc_obj_add_info(topology->levels[0][0], "Hostname", hostname);
|
||||
|
||||
/* convert to unix-like architecture strings */
|
||||
switch (SystemInfo.wProcessorArchitecture) {
|
||||
case 0:
|
||||
hwloc_obj_add_info(topology->levels[0][0], "Architecture", "i686");
|
||||
break;
|
||||
case 9:
|
||||
hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86_64");
|
||||
break;
|
||||
case 5:
|
||||
hwloc_obj_add_info(topology->levels[0][0], "Architecture", "arm");
|
||||
break;
|
||||
case 12:
|
||||
hwloc_obj_add_info(topology->levels[0][0], "Architecture", "arm64");
|
||||
break;
|
||||
case 6:
|
||||
hwloc_obj_add_info(topology->levels[0][0], "Architecture", "ia64");
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
80
src/3rdparty/hwloc/src/topology-x86.c
vendored
80
src/3rdparty/hwloc/src/topology-x86.c
vendored
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2010-2019 Inria. All rights reserved.
|
||||
* Copyright © 2010-2020 Inria. All rights reserved.
|
||||
* Copyright © 2010-2013 Université Bordeaux
|
||||
* Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -181,6 +181,7 @@ enum hwloc_x86_disc_flags {
|
|||
|
||||
#define has_topoext(features) ((features)[6] & (1 << 22))
|
||||
#define has_x2apic(features) ((features)[4] & (1 << 21))
|
||||
#define has_hybrid(features) ((features)[18] & (1 << 15))
|
||||
|
||||
struct cacheinfo {
|
||||
hwloc_obj_cache_type_t type;
|
||||
|
@ -217,6 +218,9 @@ struct procinfo {
|
|||
unsigned cpustepping;
|
||||
unsigned cpumodelnumber;
|
||||
unsigned cpufamilynumber;
|
||||
|
||||
unsigned hybridcoretype;
|
||||
unsigned hybridnativemodel;
|
||||
};
|
||||
|
||||
enum cpuid_type {
|
||||
|
@ -681,6 +685,15 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
}
|
||||
}
|
||||
|
||||
if (highest_cpuid >= 0x1a && has_hybrid(features)) {
|
||||
/* Get hybrid cpu information from cpuid 0x1a */
|
||||
eax = 0x1a;
|
||||
ecx = 0;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
infos->hybridcoretype = eax >> 24;
|
||||
infos->hybridnativemodel = eax & 0xffffff;
|
||||
}
|
||||
|
||||
/*********************************************************************************
|
||||
* Get the hierarchy of thread, core, die, package, etc. from CPU-specific leaves
|
||||
*/
|
||||
|
@ -751,7 +764,13 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
/* default cacheid value */
|
||||
cache->cacheid = infos->apicid / cache->nbthreads_sharing;
|
||||
|
||||
if (cpuid_type == amd) {
|
||||
if (cpuid_type == intel) {
|
||||
/* round nbthreads_sharing to nearest power of two to build a mask (for clearing lower bits) */
|
||||
unsigned bits = hwloc_flsl(cache->nbthreads_sharing-1);
|
||||
unsigned mask = ~((1U<<bits) - 1);
|
||||
cache->cacheid = infos->apicid & mask;
|
||||
|
||||
} else if (cpuid_type == amd) {
|
||||
/* AMD quirks */
|
||||
if (infos->cpufamilynumber == 0x17
|
||||
&& cache->level == 3 && cache->nbthreads_sharing == 6) {
|
||||
|
@ -872,7 +891,7 @@ hwloc_x86_add_groups(hwloc_topology_t topology,
|
|||
obj->attr->group.dont_merge = dont_merge;
|
||||
hwloc_debug_2args_bitmap("os %s %u has cpuset %s\n",
|
||||
subtype, id, obj_cpuset);
|
||||
hwloc_insert_object_by_cpuset(topology, obj);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "x86:group");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -930,7 +949,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
|
||||
hwloc_debug_1arg_bitmap("os package %u has cpuset %s\n",
|
||||
packageid, package_cpuset);
|
||||
hwloc_insert_object_by_cpuset(topology, package);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, package, "x86:package");
|
||||
|
||||
} else {
|
||||
/* Annotate packages previously-existing packages */
|
||||
|
@ -986,7 +1005,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
hwloc_bitmap_set(node->nodeset, nodeid);
|
||||
hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n",
|
||||
nodeid, node_cpuset);
|
||||
hwloc_insert_object_by_cpuset(topology, node);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, node, "x86:numa");
|
||||
gotnuma++;
|
||||
}
|
||||
}
|
||||
|
@ -1033,7 +1052,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
unknown_obj->attr->group.subkind = level;
|
||||
hwloc_debug_2args_bitmap("os unknown%u %u has cpuset %s\n",
|
||||
level, unknownid, unknown_cpuset);
|
||||
hwloc_insert_object_by_cpuset(topology, unknown_obj);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, unknown_obj, "x86:group:unknown");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1073,7 +1092,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
die->cpuset = die_cpuset;
|
||||
hwloc_debug_1arg_bitmap("os die %u has cpuset %s\n",
|
||||
dieid, die_cpuset);
|
||||
hwloc_insert_object_by_cpuset(topology, die);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, die, "x86:die");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1111,7 +1130,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
core->cpuset = core_cpuset;
|
||||
hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n",
|
||||
coreid, core_cpuset);
|
||||
hwloc_insert_object_by_cpuset(topology, core);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, core, "x86:core");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1125,7 +1144,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
obj->cpuset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_only(obj->cpuset, i);
|
||||
hwloc_debug_1arg_bitmap("PU %u has cpuset %s\n", i, obj->cpuset);
|
||||
hwloc_insert_object_by_cpuset(topology, obj);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "x86:pu");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1208,7 +1227,7 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
|||
hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0");
|
||||
hwloc_debug_2args_bitmap("os L%u cache %u has cpuset %s\n",
|
||||
level, cacheid, cache_cpuset);
|
||||
hwloc_insert_object_by_cpuset(topology, cache);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, cache, "x86:cache");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1274,8 +1293,41 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long
|
|||
hwloc_bitmap_free(orig_cpuset);
|
||||
}
|
||||
|
||||
if (data->apicid_unique)
|
||||
if (data->apicid_unique) {
|
||||
summarize(backend, infos, flags);
|
||||
|
||||
if (has_hybrid(features)) {
|
||||
/* use hybrid info for cpukinds */
|
||||
hwloc_bitmap_t atomset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_t coreset = hwloc_bitmap_alloc();
|
||||
for(i=0; i<nbprocs; i++) {
|
||||
if (infos[i].hybridcoretype == 0x20)
|
||||
hwloc_bitmap_set(atomset, i);
|
||||
else if (infos[i].hybridcoretype == 0x40)
|
||||
hwloc_bitmap_set(coreset, i);
|
||||
}
|
||||
/* register IntelAtom set if any */
|
||||
if (!hwloc_bitmap_iszero(atomset)) {
|
||||
struct hwloc_info_s infoattr;
|
||||
infoattr.name = (char *) "CoreType";
|
||||
infoattr.value = (char *) "IntelAtom";
|
||||
hwloc_internal_cpukinds_register(topology, atomset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
|
||||
/* the cpuset is given to the callee */
|
||||
} else {
|
||||
hwloc_bitmap_free(atomset);
|
||||
}
|
||||
/* register IntelCore set if any */
|
||||
if (!hwloc_bitmap_iszero(coreset)) {
|
||||
struct hwloc_info_s infoattr;
|
||||
infoattr.name = (char *) "CoreType";
|
||||
infoattr.value = (char *) "IntelCore";
|
||||
hwloc_internal_cpukinds_register(topology, coreset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
|
||||
/* the cpuset is given to the callee */
|
||||
} else {
|
||||
hwloc_bitmap_free(coreset);
|
||||
}
|
||||
}
|
||||
}
|
||||
/* if !data->apicid_unique, do nothing and return success, so that the caller does nothing either */
|
||||
|
||||
return 0;
|
||||
|
@ -1354,7 +1406,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
|
|||
unsigned highest_cpuid;
|
||||
unsigned highest_ext_cpuid;
|
||||
/* This stores cpuid features with the same indexing as Linux */
|
||||
unsigned features[10] = { 0 };
|
||||
unsigned features[19] = { 0 };
|
||||
struct procinfo *infos = NULL;
|
||||
enum cpuid_type cpuid_type = unknown;
|
||||
hwloc_x86_os_state_t os_state;
|
||||
|
@ -1381,6 +1433,9 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
|
|||
/* check if binding works */
|
||||
memset(&hooks, 0, sizeof(hooks));
|
||||
support.membind = &memsupport;
|
||||
/* We could just copy the main hooks (except in some corner cases),
|
||||
* but the current overhead is negligible, so just always reget them.
|
||||
*/
|
||||
hwloc_set_native_binding_hooks(&hooks, &support);
|
||||
if (hooks.get_thisthread_cpubind && hooks.set_thisthread_cpubind) {
|
||||
get_cpubind = hooks.get_thisthread_cpubind;
|
||||
|
@ -1451,6 +1506,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
|
|||
ecx = 0;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
features[9] = ebx;
|
||||
features[18] = edx;
|
||||
}
|
||||
|
||||
if (cpuid_type != intel && highest_ext_cpuid >= 0x80000001) {
|
||||
|
|
|
@ -213,7 +213,7 @@ hwloc__nolibxml_import_close_child(hwloc__xml_import_state_t state)
|
|||
|
||||
static int
|
||||
hwloc__nolibxml_import_get_content(hwloc__xml_import_state_t state,
|
||||
char **beginp, size_t expected_length)
|
||||
const char **beginp, size_t expected_length)
|
||||
{
|
||||
hwloc__nolibxml_import_state_data_t nstate = (void*) state->data;
|
||||
char *buffer = nstate->tagbuffer;
|
||||
|
@ -224,7 +224,7 @@ hwloc__nolibxml_import_get_content(hwloc__xml_import_state_t state,
|
|||
if (nstate->closed) {
|
||||
if (expected_length)
|
||||
return -1;
|
||||
*beginp = (char *) "";
|
||||
*beginp = "";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
595
src/3rdparty/hwloc/src/topology-xml.c
vendored
595
src/3rdparty/hwloc/src/topology-xml.c
vendored
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011 Université Bordeaux
|
||||
* Copyright © 2009-2011, 2020 Université Bordeaux
|
||||
* Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
@ -481,11 +481,9 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
hwloc__xml_import_info(struct hwloc_xml_backend_data_s *data,
|
||||
hwloc_obj_t obj,
|
||||
hwloc__xml_import_state_t state)
|
||||
hwloc___xml_import_info(char **infonamep, char **infovaluep,
|
||||
hwloc__xml_import_state_t state)
|
||||
{
|
||||
char *infoname = NULL;
|
||||
char *infovalue = NULL;
|
||||
|
@ -502,6 +500,25 @@ hwloc__xml_import_info(struct hwloc_xml_backend_data_s *data,
|
|||
return -1;
|
||||
}
|
||||
|
||||
*infonamep = infoname;
|
||||
*infovaluep = infovalue;
|
||||
|
||||
return state->global->close_tag(state);
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__xml_import_obj_info(struct hwloc_xml_backend_data_s *data,
|
||||
hwloc_obj_t obj,
|
||||
hwloc__xml_import_state_t state)
|
||||
{
|
||||
char *infoname = NULL;
|
||||
char *infovalue = NULL;
|
||||
int err;
|
||||
|
||||
err = hwloc___xml_import_info(&infoname, &infovalue, state);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
if (infoname) {
|
||||
/* empty strings are ignored by libxml */
|
||||
if (data->version_major < 2 &&
|
||||
|
@ -518,7 +535,7 @@ hwloc__xml_import_info(struct hwloc_xml_backend_data_s *data,
|
|||
}
|
||||
}
|
||||
|
||||
return state->global->close_tag(state);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -694,14 +711,15 @@ hwloc__xml_import_userdata(hwloc_topology_t topology __hwloc_attribute_unused, h
|
|||
}
|
||||
|
||||
if (!topology->userdata_import_cb) {
|
||||
char *buffer;
|
||||
const char *buffer;
|
||||
size_t reallength = encoded ? BASE64_ENCODED_LENGTH(length) : length;
|
||||
ret = state->global->get_content(state, &buffer, reallength);
|
||||
if (ret < 0)
|
||||
return -1;
|
||||
|
||||
} else if (topology->userdata_not_decoded) {
|
||||
char *buffer, *fakename;
|
||||
const char *buffer;
|
||||
char *fakename;
|
||||
size_t reallength = encoded ? BASE64_ENCODED_LENGTH(length) : length;
|
||||
ret = state->global->get_content(state, &buffer, reallength);
|
||||
if (ret < 0)
|
||||
|
@ -714,7 +732,7 @@ hwloc__xml_import_userdata(hwloc_topology_t topology __hwloc_attribute_unused, h
|
|||
free(fakename);
|
||||
|
||||
} else if (encoded && length) {
|
||||
char *encoded_buffer;
|
||||
const char *encoded_buffer;
|
||||
size_t encoded_length = BASE64_ENCODED_LENGTH(length);
|
||||
ret = state->global->get_content(state, &encoded_buffer, encoded_length);
|
||||
if (ret < 0)
|
||||
|
@ -734,7 +752,7 @@ hwloc__xml_import_userdata(hwloc_topology_t topology __hwloc_attribute_unused, h
|
|||
}
|
||||
|
||||
} else { /* always handle length==0 in the non-encoded case */
|
||||
char *buffer = (char *) "";
|
||||
const char *buffer = "";
|
||||
if (length) {
|
||||
ret = state->global->get_content(state, &buffer, length);
|
||||
if (ret < 0)
|
||||
|
@ -888,7 +906,7 @@ hwloc__xml_import_object(hwloc_topology_t topology,
|
|||
}
|
||||
|
||||
} else if (!strcmp(tag, "info")) {
|
||||
ret = hwloc__xml_import_info(data, obj, &childstate);
|
||||
ret = hwloc__xml_import_obj_info(data, obj, &childstate);
|
||||
} else if (data->version_major < 2 && !strcmp(tag, "distances")) {
|
||||
ret = hwloc__xml_v1import_distances(data, obj, &childstate);
|
||||
} else if (!strcmp(tag, "userdata")) {
|
||||
|
@ -1238,6 +1256,80 @@ hwloc__xml_import_object(hwloc_topology_t topology,
|
|||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__xml_v2import_support(hwloc_topology_t topology,
|
||||
hwloc__xml_import_state_t state)
|
||||
{
|
||||
char *name = NULL;
|
||||
int value = 1; /* value is optional */
|
||||
while (1) {
|
||||
char *attrname, *attrvalue;
|
||||
if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
|
||||
break;
|
||||
if (!strcmp(attrname, "name"))
|
||||
name = attrvalue;
|
||||
else if (!strcmp(attrname, "value"))
|
||||
value = atoi(attrvalue);
|
||||
else {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring unknown support attribute %s\n",
|
||||
state->global->msgprefix, attrname);
|
||||
}
|
||||
}
|
||||
|
||||
if (name && topology->flags & HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT) {
|
||||
#ifdef HWLOC_DEBUG
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_support) == 4*sizeof(void*));
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_discovery_support) == 6);
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_cpubind_support) == 11);
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 15);
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_misc_support) == 1);
|
||||
#endif
|
||||
|
||||
#define DO(_cat,_name) if (!strcmp(#_cat "." #_name, name)) topology->support._cat->_name = value
|
||||
DO(discovery,pu);
|
||||
else DO(discovery,numa);
|
||||
else DO(discovery,numa_memory);
|
||||
else DO(discovery,disallowed_pu);
|
||||
else DO(discovery,disallowed_numa);
|
||||
else DO(discovery,cpukind_efficiency);
|
||||
else DO(cpubind,set_thisproc_cpubind);
|
||||
else DO(cpubind,get_thisproc_cpubind);
|
||||
else DO(cpubind,set_proc_cpubind);
|
||||
else DO(cpubind,get_proc_cpubind);
|
||||
else DO(cpubind,set_thisthread_cpubind);
|
||||
else DO(cpubind,get_thisthread_cpubind);
|
||||
else DO(cpubind,set_thread_cpubind);
|
||||
else DO(cpubind,get_thread_cpubind);
|
||||
else DO(cpubind,get_thisproc_last_cpu_location);
|
||||
else DO(cpubind,get_proc_last_cpu_location);
|
||||
else DO(cpubind,get_thisthread_last_cpu_location);
|
||||
else DO(membind,set_thisproc_membind);
|
||||
else DO(membind,get_thisproc_membind);
|
||||
else DO(membind,set_proc_membind);
|
||||
else DO(membind,get_proc_membind);
|
||||
else DO(membind,set_thisthread_membind);
|
||||
else DO(membind,get_thisthread_membind);
|
||||
else DO(membind,set_area_membind);
|
||||
else DO(membind,get_area_membind);
|
||||
else DO(membind,alloc_membind);
|
||||
else DO(membind,firsttouch_membind);
|
||||
else DO(membind,bind_membind);
|
||||
else DO(membind,interleave_membind);
|
||||
else DO(membind,nexttouch_membind);
|
||||
else DO(membind,migrate_membind);
|
||||
else DO(membind,get_area_memlocation);
|
||||
|
||||
else if (!strcmp("custom.exported_support", name))
|
||||
/* support was exported in a custom/fake field, mark it as imported here */
|
||||
topology->support.misc->imported_support = 1;
|
||||
|
||||
#undef DO
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__xml_v2import_distances(hwloc_topology_t topology,
|
||||
hwloc__xml_import_state_t state,
|
||||
|
@ -1317,7 +1409,8 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
|
|||
nr_u64values = 0;
|
||||
while (1) {
|
||||
struct hwloc__xml_import_state_s childstate;
|
||||
char *attrname, *attrvalue, *tag, *buffer;
|
||||
char *attrname, *attrvalue, *tag;
|
||||
const char *buffer;
|
||||
int length;
|
||||
int is_index = 0;
|
||||
int is_u64values = 0;
|
||||
|
@ -1356,7 +1449,7 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
|
|||
|
||||
if (is_index) {
|
||||
/* get indexes */
|
||||
char *tmp, *tmp2;
|
||||
const char *tmp, *tmp2;
|
||||
if (nr_indexes >= nbobjs) {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: %s with more than %u indexes\n",
|
||||
|
@ -1398,7 +1491,7 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
|
|||
|
||||
} else if (is_u64values) {
|
||||
/* get uint64_t values */
|
||||
char *tmp;
|
||||
const char *tmp;
|
||||
if (nr_u64values >= nbobjs*nbobjs) {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: %s with more than %u u64values\n",
|
||||
|
@ -1491,6 +1584,259 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
|
|||
#undef _TAG_NAME
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__xml_import_memattr_value(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t id,
|
||||
unsigned long flags,
|
||||
hwloc__xml_import_state_t state)
|
||||
{
|
||||
char *target_obj_gp_index_s = NULL;
|
||||
char *target_obj_type_s = NULL;
|
||||
hwloc_uint64_t target_obj_gp_index;
|
||||
char *value_s = NULL;
|
||||
hwloc_uint64_t value;
|
||||
char *initiator_cpuset_s = NULL;
|
||||
char *initiator_obj_gp_index_s = NULL;
|
||||
char *initiator_obj_type_s = NULL;
|
||||
hwloc_obj_type_t target_obj_type = HWLOC_OBJ_TYPE_NONE;
|
||||
|
||||
while (1) {
|
||||
char *attrname, *attrvalue;
|
||||
if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
|
||||
break;
|
||||
if (!strcmp(attrname, "target_obj_gp_index"))
|
||||
target_obj_gp_index_s = attrvalue;
|
||||
else if (!strcmp(attrname, "target_obj_type"))
|
||||
target_obj_type_s = attrvalue;
|
||||
else if (!strcmp(attrname, "value"))
|
||||
value_s = attrvalue;
|
||||
else if (!strcmp(attrname, "initiator_cpuset"))
|
||||
initiator_cpuset_s = attrvalue;
|
||||
else if (!strcmp(attrname, "initiator_obj_gp_index"))
|
||||
initiator_obj_gp_index_s = attrvalue;
|
||||
else if (!strcmp(attrname, "initiator_obj_type"))
|
||||
initiator_obj_type_s = attrvalue;
|
||||
else {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring unknown memattr_value attribute %s\n",
|
||||
state->global->msgprefix, attrname);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!target_obj_type_s) {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring memattr_value without target_obj_type.\n",
|
||||
state->global->msgprefix);
|
||||
return -1;
|
||||
}
|
||||
if (hwloc_type_sscanf(target_obj_type_s, &target_obj_type, NULL, 0) < 0) {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: failed to identify memattr_value target object type %s\n",
|
||||
state->global->msgprefix, target_obj_type_s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!value_s || !target_obj_gp_index_s) {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring memattr_value without value and target_obj_gp_index\n",
|
||||
state->global->msgprefix);
|
||||
return -1;
|
||||
}
|
||||
target_obj_gp_index = strtoull(target_obj_gp_index_s, NULL, 10);
|
||||
value = strtoull(value_s, NULL, 10);
|
||||
|
||||
if (flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) {
|
||||
/* add a value with initiator */
|
||||
struct hwloc_internal_location_s loc;
|
||||
if (!initiator_cpuset_s && (!initiator_obj_gp_index_s || !initiator_obj_type_s)) {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring memattr_value without initiator attributes\n",
|
||||
state->global->msgprefix);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* setup the initiator */
|
||||
if (initiator_cpuset_s) {
|
||||
loc.type = HWLOC_LOCATION_TYPE_CPUSET;
|
||||
loc.location.cpuset = hwloc_bitmap_alloc();
|
||||
if (!loc.location.cpuset) {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: failed to allocated memattr_value initiator cpuset\n",
|
||||
state->global->msgprefix);
|
||||
return -1;
|
||||
}
|
||||
hwloc_bitmap_sscanf(loc.location.cpuset, initiator_cpuset_s);
|
||||
} else {
|
||||
loc.type = HWLOC_LOCATION_TYPE_OBJECT;
|
||||
loc.location.object.gp_index = strtoull(initiator_obj_gp_index_s, NULL, 10);
|
||||
if (hwloc_type_sscanf(initiator_obj_type_s, &loc.location.object.type, NULL, 0) < 0) {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: failed to identify memattr_value initiator object type %s\n",
|
||||
state->global->msgprefix, initiator_obj_type_s);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
hwloc_internal_memattr_set_value(topology, id, target_obj_type, target_obj_gp_index, (unsigned)-1, &loc, value);
|
||||
|
||||
if (loc.type == HWLOC_LOCATION_TYPE_CPUSET)
|
||||
hwloc_bitmap_free(loc.location.cpuset);
|
||||
|
||||
} else {
|
||||
/* add a value without initiator */
|
||||
hwloc_internal_memattr_set_value(topology, id, target_obj_type, target_obj_gp_index, (unsigned)-1, NULL, value);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__xml_import_memattr(hwloc_topology_t topology,
|
||||
hwloc__xml_import_state_t state)
|
||||
{
|
||||
char *name = NULL;
|
||||
unsigned long flags = (unsigned long) -1;
|
||||
hwloc_memattr_id_t id = (hwloc_memattr_id_t) -1;
|
||||
int ret;
|
||||
|
||||
while (1) {
|
||||
char *attrname, *attrvalue;
|
||||
if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
|
||||
break;
|
||||
if (!strcmp(attrname, "name"))
|
||||
name = attrvalue;
|
||||
else if (!strcmp(attrname, "flags"))
|
||||
flags = strtoul(attrvalue, NULL, 10);
|
||||
else {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring unknown memattr attribute %s\n",
|
||||
state->global->msgprefix, attrname);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (name && flags != (unsigned long) -1) {
|
||||
hwloc_memattr_id_t _id;
|
||||
|
||||
ret = hwloc_memattr_get_by_name(topology, name, &_id);
|
||||
if (ret < 0) {
|
||||
/* register a new attribute */
|
||||
ret = hwloc_memattr_register(topology, name, flags, &_id);
|
||||
if (!ret)
|
||||
id = _id;
|
||||
} else {
|
||||
/* check the flags of the existing attribute */
|
||||
unsigned long mflags;
|
||||
ret = hwloc_memattr_get_flags(topology, _id, &mflags);
|
||||
if (!ret && mflags == flags)
|
||||
id = _id;
|
||||
}
|
||||
/* if there's no matching attribute, id is -1 and values will be ignored below */
|
||||
}
|
||||
|
||||
while (1) {
|
||||
struct hwloc__xml_import_state_s childstate;
|
||||
char *tag;
|
||||
|
||||
ret = state->global->find_child(state, &childstate, &tag);
|
||||
if (ret <= 0)
|
||||
break;
|
||||
|
||||
if (!strcmp(tag, "memattr_value")) {
|
||||
ret = hwloc__xml_import_memattr_value(topology, id, flags, &childstate);
|
||||
} else {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: memattr with unrecognized child %s\n",
|
||||
state->global->msgprefix, tag);
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
state->global->close_child(&childstate);
|
||||
}
|
||||
|
||||
return state->global->close_tag(state);
|
||||
|
||||
error:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__xml_import_cpukind(hwloc_topology_t topology,
|
||||
hwloc__xml_import_state_t state)
|
||||
{
|
||||
hwloc_bitmap_t cpuset = NULL;
|
||||
int forced_efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN;
|
||||
unsigned nr_infos = 0;
|
||||
struct hwloc_info_s *infos = NULL;
|
||||
int ret;
|
||||
|
||||
while (1) {
|
||||
char *attrname, *attrvalue;
|
||||
if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
|
||||
break;
|
||||
if (!strcmp(attrname, "cpuset")) {
|
||||
if (!cpuset)
|
||||
cpuset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_sscanf(cpuset, attrvalue);
|
||||
} else if (!strcmp(attrname, "forced_efficiency")) {
|
||||
forced_efficiency = atoi(attrvalue);
|
||||
} else {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring unknown cpukind attribute %s\n",
|
||||
state->global->msgprefix, attrname);
|
||||
hwloc_bitmap_free(cpuset);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
while (1) {
|
||||
struct hwloc__xml_import_state_s childstate;
|
||||
char *tag;
|
||||
|
||||
ret = state->global->find_child(state, &childstate, &tag);
|
||||
if (ret <= 0)
|
||||
break;
|
||||
|
||||
if (!strcmp(tag, "info")) {
|
||||
char *infoname = NULL;
|
||||
char *infovalue = NULL;
|
||||
ret = hwloc___xml_import_info(&infoname, &infovalue, &childstate);
|
||||
if (!ret && infoname && infovalue)
|
||||
hwloc__add_info(&infos, &nr_infos, infoname, infovalue);
|
||||
} else {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: cpukind with unrecognized child %s\n",
|
||||
state->global->msgprefix, tag);
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
state->global->close_child(&childstate);
|
||||
}
|
||||
|
||||
if (!cpuset) {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring cpukind without cpuset\n",
|
||||
state->global->msgprefix);
|
||||
goto error;
|
||||
}
|
||||
|
||||
hwloc_internal_cpukinds_register(topology, cpuset, forced_efficiency, infos, nr_infos, HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY);
|
||||
|
||||
return state->global->close_tag(state);
|
||||
|
||||
error:
|
||||
hwloc__free_infos(infos, nr_infos);
|
||||
hwloc_bitmap_free(cpuset);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__xml_import_diff_one(hwloc__xml_import_state_t state,
|
||||
hwloc_topology_diff_t *firstdiffp,
|
||||
|
@ -1759,6 +2105,18 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
|
|||
ret = hwloc__xml_v2import_distances(topology, &childstate, 1);
|
||||
if (ret < 0)
|
||||
goto failed;
|
||||
} else if (!strcmp(tag, "support")) {
|
||||
ret = hwloc__xml_v2import_support(topology, &childstate);
|
||||
if (ret < 0)
|
||||
goto failed;
|
||||
} else if (!strcmp(tag, "memattr")) {
|
||||
ret = hwloc__xml_import_memattr(topology, &childstate);
|
||||
if (ret < 0)
|
||||
goto failed;
|
||||
} else if (!strcmp(tag, "cpukind")) {
|
||||
ret = hwloc__xml_import_cpukind(topology, &childstate);
|
||||
if (ret < 0)
|
||||
goto failed;
|
||||
} else {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring unknown tag `%s' after root object.\n",
|
||||
|
@ -1864,12 +2222,14 @@ done:
|
|||
/* keep the "Backend" information intact */
|
||||
/* we could add "BackendSource=XML" to notify that XML was used between the actual backend and here */
|
||||
|
||||
topology->support.discovery->pu = 1;
|
||||
topology->support.discovery->disallowed_pu = 1;
|
||||
if (data->nbnumanodes) {
|
||||
topology->support.discovery->numa = 1;
|
||||
topology->support.discovery->numa_memory = 1; // FIXME
|
||||
topology->support.discovery->disallowed_numa = 1;
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT)) {
|
||||
topology->support.discovery->pu = 1;
|
||||
topology->support.discovery->disallowed_pu = 1;
|
||||
if (data->nbnumanodes) {
|
||||
topology->support.discovery->numa = 1;
|
||||
topology->support.discovery->numa_memory = 1; // FIXME
|
||||
topology->support.discovery->disallowed_numa = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (data->look_done)
|
||||
|
@ -2620,9 +2980,199 @@ hwloc__xml_v2export_distances(hwloc__xml_export_state_t parentstate, hwloc_topol
|
|||
hwloc___xml_v2export_distances(parentstate, dist);
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc__xml_v2export_support(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology)
|
||||
{
|
||||
struct hwloc__xml_export_state_s state;
|
||||
char tmp[11];
|
||||
|
||||
#ifdef HWLOC_DEBUG
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_support) == 4*sizeof(void*));
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_discovery_support) == 6);
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_cpubind_support) == 11);
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 15);
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_misc_support) == 1);
|
||||
#endif
|
||||
|
||||
#define DO(_cat,_name) do { \
|
||||
if (topology->support._cat->_name) { \
|
||||
parentstate->new_child(parentstate, &state, "support"); \
|
||||
state.new_prop(&state, "name", #_cat "." #_name); \
|
||||
if (topology->support._cat->_name != 1) { \
|
||||
sprintf(tmp, "%u", topology->support._cat->_name); \
|
||||
state.new_prop(&state, "value", tmp); \
|
||||
} \
|
||||
state.end_object(&state, "support"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
DO(discovery,pu);
|
||||
DO(discovery,numa);
|
||||
DO(discovery,numa_memory);
|
||||
DO(discovery,disallowed_pu);
|
||||
DO(discovery,disallowed_numa);
|
||||
DO(discovery,cpukind_efficiency);
|
||||
DO(cpubind,set_thisproc_cpubind);
|
||||
DO(cpubind,get_thisproc_cpubind);
|
||||
DO(cpubind,set_proc_cpubind);
|
||||
DO(cpubind,get_proc_cpubind);
|
||||
DO(cpubind,set_thisthread_cpubind);
|
||||
DO(cpubind,get_thisthread_cpubind);
|
||||
DO(cpubind,set_thread_cpubind);
|
||||
DO(cpubind,get_thread_cpubind);
|
||||
DO(cpubind,get_thisproc_last_cpu_location);
|
||||
DO(cpubind,get_proc_last_cpu_location);
|
||||
DO(cpubind,get_thisthread_last_cpu_location);
|
||||
DO(membind,set_thisproc_membind);
|
||||
DO(membind,get_thisproc_membind);
|
||||
DO(membind,set_proc_membind);
|
||||
DO(membind,get_proc_membind);
|
||||
DO(membind,set_thisthread_membind);
|
||||
DO(membind,get_thisthread_membind);
|
||||
DO(membind,set_area_membind);
|
||||
DO(membind,get_area_membind);
|
||||
DO(membind,alloc_membind);
|
||||
DO(membind,firsttouch_membind);
|
||||
DO(membind,bind_membind);
|
||||
DO(membind,interleave_membind);
|
||||
DO(membind,nexttouch_membind);
|
||||
DO(membind,migrate_membind);
|
||||
DO(membind,get_area_memlocation);
|
||||
|
||||
/* misc.imported_support would be meaningless in the remote importer,
|
||||
* but the importer needs to know whether we exported support or not
|
||||
* (in case there are no support bit set at all),
|
||||
* use a custom/fake field to do so.
|
||||
*/
|
||||
parentstate->new_child(parentstate, &state, "support");
|
||||
state.new_prop(&state, "name", "custom.exported_support");
|
||||
state.end_object(&state, "support");
|
||||
|
||||
#undef DO
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc__xml_export_memattr_target(hwloc__xml_export_state_t state,
|
||||
struct hwloc_internal_memattr_s *imattr,
|
||||
struct hwloc_internal_memattr_target_s *imtg)
|
||||
{
|
||||
struct hwloc__xml_export_state_s vstate;
|
||||
char tmp[255];
|
||||
|
||||
if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) {
|
||||
/* export all initiators */
|
||||
unsigned k;
|
||||
for(k=0; k<imtg->nr_initiators; k++) {
|
||||
struct hwloc_internal_memattr_initiator_s *imi = &imtg->initiators[k];
|
||||
state->new_child(state, &vstate, "memattr_value");
|
||||
vstate.new_prop(&vstate, "target_obj_type", hwloc_obj_type_string(imtg->type));
|
||||
snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imtg->gp_index);
|
||||
vstate.new_prop(&vstate, "target_obj_gp_index", tmp);
|
||||
snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imi->value);
|
||||
vstate.new_prop(&vstate, "value", tmp);
|
||||
switch (imi->initiator.type) {
|
||||
case HWLOC_LOCATION_TYPE_OBJECT:
|
||||
snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imi->initiator.location.object.gp_index);
|
||||
vstate.new_prop(&vstate, "initiator_obj_gp_index", tmp);
|
||||
vstate.new_prop(&vstate, "initiator_obj_type", hwloc_obj_type_string(imi->initiator.location.object.type));
|
||||
break;
|
||||
case HWLOC_LOCATION_TYPE_CPUSET: {
|
||||
char *setstring;
|
||||
hwloc_bitmap_asprintf(&setstring, imi->initiator.location.cpuset);
|
||||
if (setstring)
|
||||
vstate.new_prop(&vstate, "initiator_cpuset", setstring);
|
||||
free(setstring);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
vstate.end_object(&vstate, "memattr_value");
|
||||
}
|
||||
} else {
|
||||
/* just export the global value */
|
||||
state->new_child(state, &vstate, "memattr_value");
|
||||
vstate.new_prop(&vstate, "target_obj_type", hwloc_obj_type_string(imtg->type));
|
||||
snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imtg->gp_index);
|
||||
vstate.new_prop(&vstate, "target_obj_gp_index", tmp);
|
||||
snprintf(tmp, sizeof(tmp), "%llu", (unsigned long long) imtg->noinitiator_value);
|
||||
vstate.new_prop(&vstate, "value", tmp);
|
||||
vstate.end_object(&vstate, "memattr_value");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc__xml_export_memattrs(hwloc__xml_export_state_t state, hwloc_topology_t topology)
|
||||
{
|
||||
unsigned id;
|
||||
for(id=0; id<topology->nr_memattrs; id++) {
|
||||
struct hwloc_internal_memattr_s *imattr;
|
||||
struct hwloc__xml_export_state_s mstate;
|
||||
char tmp[255];
|
||||
unsigned j;
|
||||
|
||||
if (id == HWLOC_MEMATTR_ID_CAPACITY || id == HWLOC_MEMATTR_ID_LOCALITY)
|
||||
/* no need to export virtual memattrs */
|
||||
continue;
|
||||
|
||||
imattr = &topology->memattrs[id];
|
||||
if ((id == HWLOC_MEMATTR_ID_LATENCY || id == HWLOC_MEMATTR_ID_BANDWIDTH)
|
||||
&& !imattr->nr_targets)
|
||||
/* no need to export target-less attributes for initial attributes, no release support attributes without those definitions */
|
||||
continue;
|
||||
|
||||
state->new_child(state, &mstate, "memattr");
|
||||
mstate.new_prop(&mstate, "name", imattr->name);
|
||||
snprintf(tmp, sizeof(tmp), "%lu", imattr->flags);
|
||||
mstate.new_prop(&mstate, "flags", tmp);
|
||||
|
||||
for(j=0; j<imattr->nr_targets; j++)
|
||||
hwloc__xml_export_memattr_target(&mstate, imattr, &imattr->targets[j]);
|
||||
|
||||
mstate.end_object(&mstate, "memattr");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc__xml_export_cpukinds(hwloc__xml_export_state_t state, hwloc_topology_t topology)
|
||||
{
|
||||
unsigned i;
|
||||
for(i=0; i<topology->nr_cpukinds; i++) {
|
||||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
|
||||
struct hwloc__xml_export_state_s cstate;
|
||||
char *setstring;
|
||||
unsigned j;
|
||||
|
||||
state->new_child(state, &cstate, "cpukind");
|
||||
hwloc_bitmap_asprintf(&setstring, kind->cpuset);
|
||||
cstate.new_prop(&cstate, "cpuset", setstring);
|
||||
free(setstring);
|
||||
if (kind->forced_efficiency != HWLOC_CPUKIND_EFFICIENCY_UNKNOWN) {
|
||||
char tmp[11];
|
||||
snprintf(tmp, sizeof(tmp), "%d", kind->forced_efficiency);
|
||||
cstate.new_prop(&cstate, "forced_efficiency", tmp);
|
||||
}
|
||||
|
||||
for(j=0; j<kind->nr_infos; j++) {
|
||||
char *name = hwloc__xml_export_safestrdup(kind->infos[j].name);
|
||||
char *value = hwloc__xml_export_safestrdup(kind->infos[j].value);
|
||||
struct hwloc__xml_export_state_s istate;
|
||||
cstate.new_child(&cstate, &istate, "info");
|
||||
istate.new_prop(&istate, "name", name);
|
||||
istate.new_prop(&istate, "value", value);
|
||||
istate.end_object(&istate, "info");
|
||||
free(name);
|
||||
free(value);
|
||||
}
|
||||
|
||||
cstate.end_object(&cstate, "cpukind");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t topology, unsigned long flags)
|
||||
{
|
||||
char *env;
|
||||
hwloc_obj_t root = hwloc_get_root_obj(topology);
|
||||
|
||||
if (flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) {
|
||||
|
@ -2665,6 +3215,11 @@ hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t top
|
|||
} else {
|
||||
hwloc__xml_v2export_object (state, topology, root, flags);
|
||||
hwloc__xml_v2export_distances (state, topology);
|
||||
env = getenv("HWLOC_XML_EXPORT_SUPPORT");
|
||||
if (!env || atoi(env))
|
||||
hwloc__xml_v2export_support(state, topology);
|
||||
hwloc__xml_export_memattrs(state, topology);
|
||||
hwloc__xml_export_cpukinds(state, topology);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
234
src/3rdparty/hwloc/src/topology.c
vendored
234
src/3rdparty/hwloc/src/topology.c
vendored
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2019 Inria. All rights reserved.
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012, 2020 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
|
@ -75,16 +75,49 @@ int hwloc_hide_errors(void)
|
|||
return hide;
|
||||
}
|
||||
|
||||
void hwloc_report_os_error(const char *msg, int line)
|
||||
|
||||
/* format the obj info to print in error messages */
|
||||
static void
|
||||
report_insert_error_format_obj(char *buf, size_t buflen, hwloc_obj_t obj)
|
||||
{
|
||||
char typestr[64];
|
||||
char *cpusetstr;
|
||||
char *nodesetstr = NULL;
|
||||
|
||||
hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0);
|
||||
hwloc_bitmap_asprintf(&cpusetstr, obj->cpuset);
|
||||
if (obj->nodeset) /* may be missing during insert */
|
||||
hwloc_bitmap_asprintf(&nodesetstr, obj->nodeset);
|
||||
if (obj->os_index != HWLOC_UNKNOWN_INDEX)
|
||||
snprintf(buf, buflen, "%s (P#%u cpuset %s%s%s)",
|
||||
typestr, obj->os_index, cpusetstr,
|
||||
nodesetstr ? " nodeset " : "",
|
||||
nodesetstr ? nodesetstr : "");
|
||||
else
|
||||
snprintf(buf, buflen, "%s (cpuset %s%s%s)",
|
||||
typestr, cpusetstr,
|
||||
nodesetstr ? " nodeset " : "",
|
||||
nodesetstr ? nodesetstr : "");
|
||||
free(cpusetstr);
|
||||
free(nodesetstr);
|
||||
}
|
||||
|
||||
static void report_insert_error(hwloc_obj_t new, hwloc_obj_t old, const char *msg, const char *reason)
|
||||
{
|
||||
static int reported = 0;
|
||||
|
||||
if (!reported && !hwloc_hide_errors()) {
|
||||
if (reason && !reported && !hwloc_hide_errors()) {
|
||||
char newstr[512];
|
||||
char oldstr[512];
|
||||
report_insert_error_format_obj(newstr, sizeof(newstr), new);
|
||||
report_insert_error_format_obj(oldstr, sizeof(oldstr), old);
|
||||
|
||||
fprintf(stderr, "****************************************************************************\n");
|
||||
fprintf(stderr, "* hwloc %s received invalid information from the operating system.\n", HWLOC_VERSION);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* %s\n", msg);
|
||||
fprintf(stderr, "* Error occurred in topology.c line %d\n", line);
|
||||
fprintf(stderr, "* Failed with: %s\n", msg);
|
||||
fprintf(stderr, "* while inserting %s at %s\n", newstr, oldstr);
|
||||
fprintf(stderr, "* coming from: %s\n", reason);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* The following FAQ entry in the hwloc documentation may help:\n");
|
||||
fprintf(stderr, "* What should I do when hwloc reports \"operating system\" warnings?\n");
|
||||
|
@ -264,7 +297,7 @@ hwloc_setup_pu_level(struct hwloc_topology *topology,
|
|||
|
||||
hwloc_debug_2args_bitmap("cpu %u (os %u) has cpuset %s\n",
|
||||
cpu, oscpu, obj->cpuset);
|
||||
hwloc_insert_object_by_cpuset(topology, obj);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "core:pulevel");
|
||||
|
||||
cpu++;
|
||||
}
|
||||
|
@ -347,16 +380,18 @@ hwloc_debug_print_object(int indent __hwloc_attribute_unused, hwloc_obj_t obj)
|
|||
static void
|
||||
hwloc_debug_print_objects(int indent __hwloc_attribute_unused, hwloc_obj_t obj)
|
||||
{
|
||||
hwloc_obj_t child;
|
||||
hwloc_debug_print_object(indent, obj);
|
||||
for_each_child (child, obj)
|
||||
hwloc_debug_print_objects(indent + 1, child);
|
||||
for_each_memory_child (child, obj)
|
||||
hwloc_debug_print_objects(indent + 1, child);
|
||||
for_each_io_child (child, obj)
|
||||
hwloc_debug_print_objects(indent + 1, child);
|
||||
for_each_misc_child (child, obj)
|
||||
hwloc_debug_print_objects(indent + 1, child);
|
||||
if (hwloc_debug_enabled() >= 2) {
|
||||
hwloc_obj_t child;
|
||||
hwloc_debug_print_object(indent, obj);
|
||||
for_each_child (child, obj)
|
||||
hwloc_debug_print_objects(indent + 1, child);
|
||||
for_each_memory_child (child, obj)
|
||||
hwloc_debug_print_objects(indent + 1, child);
|
||||
for_each_io_child (child, obj)
|
||||
hwloc_debug_print_objects(indent + 1, child);
|
||||
for_each_misc_child (child, obj)
|
||||
hwloc_debug_print_objects(indent + 1, child);
|
||||
}
|
||||
}
|
||||
#else /* !HWLOC_DEBUG */
|
||||
#define hwloc_debug_print_object(indent, obj) do { /* nothing */ } while (0)
|
||||
|
@ -472,29 +507,33 @@ int hwloc_obj_add_info(hwloc_obj_t obj, const char *name, const char *value)
|
|||
}
|
||||
|
||||
/* This function may be called with topology->tma set, it cannot free() or realloc() */
|
||||
static int hwloc__tma_dup_infos(struct hwloc_tma *tma, hwloc_obj_t new, hwloc_obj_t src)
|
||||
int hwloc__tma_dup_infos(struct hwloc_tma *tma,
|
||||
struct hwloc_info_s **newip, unsigned *newcp,
|
||||
struct hwloc_info_s *oldi, unsigned oldc)
|
||||
{
|
||||
struct hwloc_info_s *newi;
|
||||
unsigned i, j;
|
||||
new->infos = hwloc_tma_calloc(tma, src->infos_count * sizeof(*src->infos));
|
||||
if (!new->infos)
|
||||
newi = hwloc_tma_calloc(tma, oldc * sizeof(*newi));
|
||||
if (!newi)
|
||||
return -1;
|
||||
for(i=0; i<src->infos_count; i++) {
|
||||
new->infos[i].name = hwloc_tma_strdup(tma, src->infos[i].name);
|
||||
new->infos[i].value = hwloc_tma_strdup(tma, src->infos[i].value);
|
||||
if (!new->infos[i].name || !new->infos[i].value)
|
||||
for(i=0; i<oldc; i++) {
|
||||
newi[i].name = hwloc_tma_strdup(tma, oldi[i].name);
|
||||
newi[i].value = hwloc_tma_strdup(tma, oldi[i].value);
|
||||
if (!newi[i].name || !newi[i].value)
|
||||
goto failed;
|
||||
}
|
||||
new->infos_count = src->infos_count;
|
||||
*newip = newi;
|
||||
*newcp = oldc;
|
||||
return 0;
|
||||
|
||||
failed:
|
||||
assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */
|
||||
for(j=0; j<=i; j++) {
|
||||
free(new->infos[i].name);
|
||||
free(new->infos[i].value);
|
||||
free(newi[i].name);
|
||||
free(newi[i].value);
|
||||
}
|
||||
free(new->infos);
|
||||
new->infos = NULL;
|
||||
free(newi);
|
||||
*newip = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -812,7 +851,7 @@ hwloc__duplicate_object(struct hwloc_topology *newtopology,
|
|||
newobj->nodeset = hwloc_bitmap_tma_dup(tma, src->nodeset);
|
||||
newobj->complete_nodeset = hwloc_bitmap_tma_dup(tma, src->complete_nodeset);
|
||||
|
||||
hwloc__tma_dup_infos(tma, newobj, src);
|
||||
hwloc__tma_dup_infos(tma, &newobj->infos, &newobj->infos_count, src->infos, src->infos_count);
|
||||
|
||||
/* find our level */
|
||||
if (src->depth < 0) {
|
||||
|
@ -970,6 +1009,7 @@ hwloc__topology_dup(hwloc_topology_t *newp,
|
|||
memcpy(new->support.discovery, old->support.discovery, sizeof(*old->support.discovery));
|
||||
memcpy(new->support.cpubind, old->support.cpubind, sizeof(*old->support.cpubind));
|
||||
memcpy(new->support.membind, old->support.membind, sizeof(*old->support.membind));
|
||||
memcpy(new->support.misc, old->support.misc, sizeof(*old->support.misc));
|
||||
|
||||
new->allowed_cpuset = hwloc_bitmap_tma_dup(tma, old->allowed_cpuset);
|
||||
new->allowed_nodeset = hwloc_bitmap_tma_dup(tma, old->allowed_nodeset);
|
||||
|
@ -1008,6 +1048,14 @@ hwloc__topology_dup(hwloc_topology_t *newp,
|
|||
if (err < 0)
|
||||
goto out_with_topology;
|
||||
|
||||
err = hwloc_internal_memattrs_dup(new, old);
|
||||
if (err < 0)
|
||||
goto out_with_topology;
|
||||
|
||||
err = hwloc_internal_cpukinds_dup(new, old);
|
||||
if (err < 0)
|
||||
goto out_with_topology;
|
||||
|
||||
/* we connected everything during duplication */
|
||||
new->modified = 0;
|
||||
|
||||
|
@ -1229,31 +1277,6 @@ hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* format the obj info to print in error messages */
|
||||
static void
|
||||
hwloc__report_error_format_obj(char *buf, size_t buflen, hwloc_obj_t obj)
|
||||
{
|
||||
char typestr[64];
|
||||
char *cpusetstr;
|
||||
char *nodesetstr = NULL;
|
||||
hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0);
|
||||
hwloc_bitmap_asprintf(&cpusetstr, obj->cpuset);
|
||||
if (obj->nodeset) /* may be missing during insert */
|
||||
hwloc_bitmap_asprintf(&nodesetstr, obj->nodeset);
|
||||
if (obj->os_index != HWLOC_UNKNOWN_INDEX)
|
||||
snprintf(buf, buflen, "%s (P#%u cpuset %s%s%s)",
|
||||
typestr, obj->os_index, cpusetstr,
|
||||
nodesetstr ? " nodeset " : "",
|
||||
nodesetstr ? nodesetstr : "");
|
||||
else
|
||||
snprintf(buf, buflen, "%s (cpuset %s%s%s)",
|
||||
typestr, cpusetstr,
|
||||
nodesetstr ? " nodeset " : "",
|
||||
nodesetstr ? nodesetstr : "");
|
||||
free(cpusetstr);
|
||||
free(nodesetstr);
|
||||
}
|
||||
|
||||
/*
|
||||
* How to insert objects into the topology.
|
||||
*
|
||||
|
@ -1390,9 +1413,9 @@ hwloc__insert_try_merge_group(hwloc_obj_t old, hwloc_obj_t new)
|
|||
*/
|
||||
static struct hwloc_obj *
|
||||
hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur, hwloc_obj_t obj,
|
||||
hwloc_report_error_t report_error)
|
||||
const char *reason)
|
||||
{
|
||||
hwloc_obj_t child, next_child = NULL;
|
||||
hwloc_obj_t child, next_child = NULL, tmp;
|
||||
/* These will always point to the pointer to their next last child. */
|
||||
hwloc_obj_t *cur_children = &cur->first_child;
|
||||
hwloc_obj_t *obj_children = &obj->first_child;
|
||||
|
@ -1430,18 +1453,10 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur
|
|||
|
||||
case HWLOC_OBJ_INCLUDED:
|
||||
/* OBJ is strictly contained is some child of CUR, go deeper. */
|
||||
return hwloc___insert_object_by_cpuset(topology, child, obj, report_error);
|
||||
return hwloc___insert_object_by_cpuset(topology, child, obj, reason);
|
||||
|
||||
case HWLOC_OBJ_INTERSECTS:
|
||||
if (report_error) {
|
||||
char childstr[512];
|
||||
char objstr[512];
|
||||
char msg[1100];
|
||||
hwloc__report_error_format_obj(objstr, sizeof(objstr), obj);
|
||||
hwloc__report_error_format_obj(childstr, sizeof(childstr), child);
|
||||
snprintf(msg, sizeof(msg), "%s intersects with %s without inclusion!", objstr, childstr);
|
||||
report_error(msg, __LINE__);
|
||||
}
|
||||
report_insert_error(obj, child, "intersection without inclusion", reason);
|
||||
goto putback;
|
||||
|
||||
case HWLOC_OBJ_DIFFERENT:
|
||||
|
@ -1464,6 +1479,8 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur
|
|||
if (setres == HWLOC_OBJ_EQUAL) {
|
||||
obj->memory_first_child = child->memory_first_child;
|
||||
child->memory_first_child = NULL;
|
||||
for(tmp=obj->memory_first_child; tmp; tmp = tmp->next_sibling)
|
||||
tmp->parent = obj;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1483,7 +1500,9 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur
|
|||
return obj;
|
||||
|
||||
putback:
|
||||
/* Put-back OBJ children in CUR and return an error. */
|
||||
/* OBJ cannot be inserted.
|
||||
* Put-back OBJ children in CUR and return an error.
|
||||
*/
|
||||
if (putp)
|
||||
cur_children = putp; /* No need to try to insert before where OBJ was supposed to go */
|
||||
else
|
||||
|
@ -1492,12 +1511,12 @@ hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur
|
|||
while ((child = obj->first_child) != NULL) {
|
||||
/* Remove from OBJ */
|
||||
obj->first_child = child->next_sibling;
|
||||
obj->parent = cur;
|
||||
/* Find child position in CUR, and insert. */
|
||||
/* Find child position in CUR, and reinsert it. */
|
||||
while (*cur_children && hwloc__object_cpusets_compare_first(*cur_children, child) < 0)
|
||||
cur_children = &(*cur_children)->next_sibling;
|
||||
child->next_sibling = *cur_children;
|
||||
*cur_children = child;
|
||||
child->parent = cur;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1521,7 +1540,7 @@ hwloc__find_obj_covering_memory_cpuset(struct hwloc_topology *topology, hwloc_ob
|
|||
|
||||
static struct hwloc_obj *
|
||||
hwloc__find_insert_memory_parent(struct hwloc_topology *topology, hwloc_obj_t obj,
|
||||
hwloc_report_error_t report_error)
|
||||
const char *reason)
|
||||
{
|
||||
hwloc_obj_t parent, group, result;
|
||||
|
||||
|
@ -1573,7 +1592,7 @@ hwloc__find_insert_memory_parent(struct hwloc_topology *topology, hwloc_obj_t ob
|
|||
return parent;
|
||||
}
|
||||
|
||||
result = hwloc__insert_object_by_cpuset(topology, parent, group, report_error);
|
||||
result = hwloc__insert_object_by_cpuset(topology, parent, group, reason);
|
||||
if (!result) {
|
||||
/* failed to insert, fallback to larger parent */
|
||||
return parent;
|
||||
|
@ -1586,8 +1605,7 @@ hwloc__find_insert_memory_parent(struct hwloc_topology *topology, hwloc_obj_t ob
|
|||
/* only works for MEMCACHE and NUMAnode with a single bit in nodeset */
|
||||
static hwloc_obj_t
|
||||
hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_obj_t parent,
|
||||
hwloc_obj_t obj,
|
||||
hwloc_report_error_t report_error)
|
||||
hwloc_obj_t obj, const char *reason)
|
||||
{
|
||||
hwloc_obj_t *curp = &parent->memory_first_child;
|
||||
unsigned first = hwloc_bitmap_first(obj->nodeset);
|
||||
|
@ -1611,20 +1629,12 @@ hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_o
|
|||
if (obj->type == HWLOC_OBJ_NUMANODE) {
|
||||
if (cur->type == HWLOC_OBJ_NUMANODE) {
|
||||
/* identical NUMA nodes? ignore the new one */
|
||||
if (report_error) {
|
||||
char curstr[512];
|
||||
char objstr[512];
|
||||
char msg[1100];
|
||||
hwloc__report_error_format_obj(curstr, sizeof(curstr), cur);
|
||||
hwloc__report_error_format_obj(objstr, sizeof(objstr), obj);
|
||||
snprintf(msg, sizeof(msg), "%s and %s have identical nodesets!", objstr, curstr);
|
||||
report_error(msg, __LINE__);
|
||||
}
|
||||
report_insert_error(obj, cur, "NUMAnodes with identical nodesets", reason);
|
||||
return NULL;
|
||||
}
|
||||
assert(cur->type == HWLOC_OBJ_MEMCACHE);
|
||||
/* insert the new NUMA node below that existing memcache */
|
||||
return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, report_error);
|
||||
return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, reason);
|
||||
|
||||
} else {
|
||||
assert(obj->type == HWLOC_OBJ_MEMCACHE);
|
||||
|
@ -1637,7 +1647,7 @@ hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_o
|
|||
* (depth starts from the NUMA node).
|
||||
* insert the new memcache below the existing one
|
||||
*/
|
||||
return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, report_error);
|
||||
return hwloc___attach_memory_object_by_nodeset(topology, cur, obj, reason);
|
||||
}
|
||||
/* insert the memcache above the existing memcache or numa node */
|
||||
obj->next_sibling = cur->next_sibling;
|
||||
|
@ -1673,8 +1683,7 @@ hwloc___attach_memory_object_by_nodeset(struct hwloc_topology *topology, hwloc_o
|
|||
*/
|
||||
struct hwloc_obj *
|
||||
hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent,
|
||||
hwloc_obj_t obj,
|
||||
hwloc_report_error_t report_error)
|
||||
hwloc_obj_t obj, const char *reason)
|
||||
{
|
||||
hwloc_obj_t result;
|
||||
|
||||
|
@ -1704,7 +1713,7 @@ hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent,
|
|||
hwloc_bitmap_copy(obj->complete_cpuset, parent->complete_cpuset);
|
||||
#endif
|
||||
|
||||
result = hwloc___attach_memory_object_by_nodeset(topology, parent, obj, report_error);
|
||||
result = hwloc___attach_memory_object_by_nodeset(topology, parent, obj, reason);
|
||||
if (result == obj) {
|
||||
/* Add the bit to the top sets, and to the parent CPU-side object */
|
||||
if (obj->type == HWLOC_OBJ_NUMANODE) {
|
||||
|
@ -1722,8 +1731,7 @@ hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent,
|
|||
/* insertion routine that lets you change the error reporting callback */
|
||||
struct hwloc_obj *
|
||||
hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root,
|
||||
hwloc_obj_t obj,
|
||||
hwloc_report_error_t report_error)
|
||||
hwloc_obj_t obj, const char *reason)
|
||||
{
|
||||
struct hwloc_obj *result;
|
||||
|
||||
|
@ -1740,20 +1748,20 @@ hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root
|
|||
|
||||
if (hwloc__obj_type_is_memory(obj->type)) {
|
||||
if (!root) {
|
||||
root = hwloc__find_insert_memory_parent(topology, obj, report_error);
|
||||
root = hwloc__find_insert_memory_parent(topology, obj, reason);
|
||||
if (!root) {
|
||||
hwloc_free_unlinked_object(obj);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return hwloc__attach_memory_object(topology, root, obj, report_error);
|
||||
return hwloc__attach_memory_object(topology, root, obj, reason);
|
||||
}
|
||||
|
||||
if (!root)
|
||||
/* Start at the top. */
|
||||
root = topology->levels[0][0];
|
||||
|
||||
result = hwloc___insert_object_by_cpuset(topology, root, obj, report_error);
|
||||
result = hwloc___insert_object_by_cpuset(topology, root, obj, reason);
|
||||
if (result && result->type == HWLOC_OBJ_PU) {
|
||||
/* Add the bit to the top sets */
|
||||
if (hwloc_bitmap_isset(result->cpuset, result->os_index))
|
||||
|
@ -1769,12 +1777,6 @@ hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t root
|
|||
|
||||
/* the default insertion routine warns in case of error.
|
||||
* it's used by most backends */
|
||||
struct hwloc_obj *
|
||||
hwloc_insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj)
|
||||
{
|
||||
return hwloc__insert_object_by_cpuset(topology, NULL, obj, hwloc_report_os_error);
|
||||
}
|
||||
|
||||
void
|
||||
hwloc_insert_object_by_parent(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_obj_t obj)
|
||||
{
|
||||
|
@ -1917,6 +1919,7 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t
|
|||
if (hwloc_bitmap_isset(nodeset, numa->os_index))
|
||||
hwloc_bitmap_or(obj->cpuset, obj->cpuset, numa->cpuset);
|
||||
}
|
||||
/* FIXME insert by nodeset to group NUMAs even if CPUless? */
|
||||
|
||||
cmp = hwloc_obj_cmp_sets(obj, root);
|
||||
if (cmp == HWLOC_OBJ_INCLUDED) {
|
||||
|
@ -2047,7 +2050,7 @@ hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_topology *topology,
|
|||
hwloc_bitmap_and(cpuset, cpuset, hwloc_topology_get_topology_cpuset(topology));
|
||||
group_obj->cpuset = hwloc_bitmap_dup(cpuset);
|
||||
group_obj->attr->group.kind = HWLOC_GROUP_KIND_IO;
|
||||
parent = hwloc__insert_object_by_cpuset(topology, largeparent, group_obj, hwloc_report_os_error);
|
||||
parent = hwloc__insert_object_by_cpuset(topology, largeparent, group_obj, "topology:io_parent");
|
||||
if (!parent)
|
||||
/* Failed to insert the Group, maybe a conflicting cpuset */
|
||||
return largeparent;
|
||||
|
@ -3251,7 +3254,7 @@ hwloc_discover(struct hwloc_topology *topology,
|
|||
* produced by hwloc_setup_pu_level()
|
||||
*/
|
||||
|
||||
/* To be able to just use hwloc_insert_object_by_cpuset to insert the object
|
||||
/* To be able to just use hwloc__insert_object_by_cpuset to insert the object
|
||||
* in the topology according to the cpuset, the cpuset field must be
|
||||
* initialized.
|
||||
*/
|
||||
|
@ -3356,7 +3359,7 @@ hwloc_discover(struct hwloc_topology *topology,
|
|||
hwloc_bitmap_set(node->nodeset, 0);
|
||||
memcpy(&node->attr->numanode, &topology->machine_memory, sizeof(topology->machine_memory));
|
||||
memset(&topology->machine_memory, 0, sizeof(topology->machine_memory));
|
||||
hwloc_insert_object_by_cpuset(topology, node);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, node, "core:defaultnumanode");
|
||||
} else {
|
||||
/* if we're sure we found all NUMA nodes without their sizes (x86 backend?),
|
||||
* we could split topology->total_memory in all of them.
|
||||
|
@ -3514,6 +3517,7 @@ hwloc_topology_setup_defaults(struct hwloc_topology *topology)
|
|||
memset(topology->support.discovery, 0, sizeof(*topology->support.discovery));
|
||||
memset(topology->support.cpubind, 0, sizeof(*topology->support.cpubind));
|
||||
memset(topology->support.membind, 0, sizeof(*topology->support.membind));
|
||||
memset(topology->support.misc, 0, sizeof(*topology->support.misc));
|
||||
|
||||
/* Only the System object on top by default */
|
||||
topology->next_gp_index = 1; /* keep 0 as an invalid value */
|
||||
|
@ -3590,6 +3594,7 @@ hwloc__topology_init (struct hwloc_topology **topologyp,
|
|||
topology->support.discovery = hwloc_tma_malloc(tma, sizeof(*topology->support.discovery));
|
||||
topology->support.cpubind = hwloc_tma_malloc(tma, sizeof(*topology->support.cpubind));
|
||||
topology->support.membind = hwloc_tma_malloc(tma, sizeof(*topology->support.membind));
|
||||
topology->support.misc = hwloc_tma_malloc(tma, sizeof(*topology->support.misc));
|
||||
|
||||
topology->nb_levels_allocated = nblevels; /* enough for default 10 levels = Mach+Pack+Die+NUMA+L3+L2+L1d+L1i+Co+PU */
|
||||
topology->levels = hwloc_tma_calloc(tma, topology->nb_levels_allocated * sizeof(*topology->levels));
|
||||
|
@ -3598,6 +3603,8 @@ hwloc__topology_init (struct hwloc_topology **topologyp,
|
|||
hwloc__topology_filter_init(topology);
|
||||
|
||||
hwloc_internal_distances_init(topology);
|
||||
hwloc_internal_memattrs_init(topology);
|
||||
hwloc_internal_cpukinds_init(topology);
|
||||
|
||||
topology->userdata_export_cb = NULL;
|
||||
topology->userdata_import_cb = NULL;
|
||||
|
@ -3691,7 +3698,7 @@ hwloc_topology_set_flags (struct hwloc_topology *topology, unsigned long flags)
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) {
|
||||
if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES|HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
@ -3827,7 +3834,9 @@ hwloc_topology_clear (struct hwloc_topology *topology)
|
|||
{
|
||||
/* no need to set to NULL after free() since callers will call setup_defaults() or just destroy the rest of the topology */
|
||||
unsigned l;
|
||||
hwloc_internal_cpukinds_destroy(topology);
|
||||
hwloc_internal_distances_destroy(topology);
|
||||
hwloc_internal_memattrs_destroy(topology);
|
||||
hwloc_free_object_and_children(topology->levels[0][0]);
|
||||
hwloc_bitmap_free(topology->allowed_cpuset);
|
||||
hwloc_bitmap_free(topology->allowed_nodeset);
|
||||
|
@ -3858,6 +3867,7 @@ hwloc_topology_destroy (struct hwloc_topology *topology)
|
|||
free(topology->support.discovery);
|
||||
free(topology->support.cpubind);
|
||||
free(topology->support.membind);
|
||||
free(topology->support.misc);
|
||||
free(topology);
|
||||
}
|
||||
|
||||
|
@ -3873,7 +3883,9 @@ hwloc_topology_load (struct hwloc_topology *topology)
|
|||
return -1;
|
||||
}
|
||||
|
||||
/* initialize envvar-related things */
|
||||
hwloc_internal_distances_prepare(topology);
|
||||
hwloc_internal_memattrs_prepare(topology);
|
||||
|
||||
if (getenv("HWLOC_XML_USERDATA_NOT_DECODED"))
|
||||
topology->userdata_not_decoded = 1;
|
||||
|
@ -3954,6 +3966,9 @@ hwloc_topology_load (struct hwloc_topology *topology)
|
|||
#endif
|
||||
hwloc_topology_check(topology);
|
||||
|
||||
/* Rank cpukinds */
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
|
||||
/* Mark distances objs arrays as invalid since we may have removed objects
|
||||
* from the topology after adding the distances (remove_empty, etc).
|
||||
* It would be hard to actually verify whether it's needed.
|
||||
|
@ -3964,6 +3979,10 @@ hwloc_topology_load (struct hwloc_topology *topology)
|
|||
*/
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
|
||||
/* Same for memattrs */
|
||||
hwloc_internal_memattrs_need_refresh(topology);
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
|
||||
topology->is_loaded = 1;
|
||||
|
||||
if (topology->backend_phases & HWLOC_DISC_PHASE_TWEAK) {
|
||||
|
@ -4246,10 +4265,12 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_bitmap_t se
|
|||
|
||||
/* some objects may have disappeared, we need to update distances objs arrays */
|
||||
hwloc_internal_distances_invalidate_cached_objs(topology);
|
||||
hwloc_internal_memattrs_need_refresh(topology);
|
||||
|
||||
hwloc_filter_levels_keep_structure(topology);
|
||||
hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]);
|
||||
propagate_total_memory(topology->levels[0][0]);
|
||||
hwloc_internal_cpukinds_restrict(topology);
|
||||
|
||||
#ifndef HWLOC_DEBUG
|
||||
if (getenv("HWLOC_DEBUG_CHECK"))
|
||||
|
@ -4334,6 +4355,15 @@ hwloc_topology_allow(struct hwloc_topology *topology,
|
|||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_topology_refresh(struct hwloc_topology *topology)
|
||||
{
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_topology_is_thissystem(struct hwloc_topology *topology)
|
||||
{
|
||||
|
|
69
src/3rdparty/hwloc/src/traversal.c
vendored
69
src/3rdparty/hwloc/src/traversal.c
vendored
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2019 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010 Université Bordeaux
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010, 2020 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
@ -138,6 +138,37 @@ hwloc_obj_type_is_icache(hwloc_obj_type_t type)
|
|||
return hwloc__obj_type_is_icache(type);
|
||||
}
|
||||
|
||||
static hwloc_obj_t hwloc_get_obj_by_depth_and_gp_index(hwloc_topology_t topology, unsigned depth, uint64_t gp_index)
|
||||
{
|
||||
hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0);
|
||||
while (obj) {
|
||||
if (obj->gp_index == gp_index)
|
||||
return obj;
|
||||
obj = obj->next_cousin;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
hwloc_obj_t hwloc_get_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index)
|
||||
{
|
||||
int depth = hwloc_get_type_depth(topology, type);
|
||||
if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
|
||||
return NULL;
|
||||
if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) {
|
||||
for(depth=1 /* no multiple machine levels */;
|
||||
(unsigned) depth < topology->nb_levels-1 /* no multiple PU levels */;
|
||||
depth++) {
|
||||
if (hwloc_get_depth_type(topology, depth) == type) {
|
||||
hwloc_obj_t obj = hwloc_get_obj_by_depth_and_gp_index(topology, depth, gp_index);
|
||||
if (obj)
|
||||
return obj;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
return hwloc_get_obj_by_depth_and_gp_index(topology, depth, gp_index);
|
||||
}
|
||||
|
||||
unsigned hwloc_get_closest_objs (struct hwloc_topology *topology, struct hwloc_obj *src, struct hwloc_obj **objs, unsigned max)
|
||||
{
|
||||
struct hwloc_obj *parent, *nextparent, **src_objs;
|
||||
|
@ -654,7 +685,11 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t
|
|||
unsigned i;
|
||||
for(i=0; i<obj->infos_count; i++) {
|
||||
struct hwloc_info_s *info = &obj->infos[i];
|
||||
const char *quote = strchr(info->value, ' ') ? "\"" : "";
|
||||
const char *quote;
|
||||
if (strchr(info->value, ' '))
|
||||
quote = "\"";
|
||||
else
|
||||
quote = "";
|
||||
res = hwloc_snprintf(tmp, tmplen, "%s%s=%s%s%s",
|
||||
prefix,
|
||||
info->name,
|
||||
|
@ -673,3 +708,31 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t
|
|||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int hwloc_bitmap_singlify_per_core(hwloc_topology_t topology, hwloc_bitmap_t cpuset, unsigned which)
|
||||
{
|
||||
hwloc_obj_t core = NULL;
|
||||
while ((core = hwloc_get_next_obj_covering_cpuset_by_type(topology, cpuset, HWLOC_OBJ_CORE, core)) != NULL) {
|
||||
/* this core has some PUs in the cpuset, find the index-th one */
|
||||
unsigned i = 0;
|
||||
int pu = -1;
|
||||
do {
|
||||
pu = hwloc_bitmap_next(core->cpuset, pu);
|
||||
if (pu == -1) {
|
||||
/* no which-th PU in cpuset and core, remove the entire core */
|
||||
hwloc_bitmap_andnot(cpuset, cpuset, core->cpuset);
|
||||
break;
|
||||
}
|
||||
if (hwloc_bitmap_isset(cpuset, pu)) {
|
||||
if (i == which) {
|
||||
/* remove the entire core except that exact pu */
|
||||
hwloc_bitmap_andnot(cpuset, cpuset, core->cpuset);
|
||||
hwloc_bitmap_set(cpuset, pu);
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
} while (1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
38
src/3rdparty/libcpuid/CMakeLists.txt
vendored
38
src/3rdparty/libcpuid/CMakeLists.txt
vendored
|
@ -1,38 +0,0 @@
|
|||
cmake_minimum_required (VERSION 2.8)
|
||||
project (cpuid C)
|
||||
|
||||
add_definitions(/DVERSION="0.4.0")
|
||||
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Os")
|
||||
|
||||
set(HEADERS
|
||||
libcpuid.h
|
||||
libcpuid_types.h
|
||||
libcpuid_constants.h
|
||||
libcpuid_internal.h
|
||||
amd_code_t.h
|
||||
intel_code_t.h
|
||||
recog_amd.h
|
||||
recog_intel.h
|
||||
asm-bits.h
|
||||
libcpuid_util.h
|
||||
)
|
||||
|
||||
set(SOURCES
|
||||
cpuid_main.c
|
||||
asm-bits.c
|
||||
recog_amd.c
|
||||
recog_intel.c
|
||||
libcpuid_util.c
|
||||
)
|
||||
|
||||
if (CMAKE_CL_64)
|
||||
enable_language(ASM_MASM)
|
||||
set(SOURCES_ASM masm-x64.asm)
|
||||
endif()
|
||||
|
||||
add_library(cpuid STATIC
|
||||
${HEADERS}
|
||||
${SOURCES}
|
||||
${SOURCES_ASM}
|
||||
)
|
39
src/3rdparty/libcpuid/amd_code_t.h
vendored
39
src/3rdparty/libcpuid/amd_code_t.h
vendored
|
@ -1,39 +0,0 @@
|
|||
/*
|
||||
* Copyright 2016 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains a list of internal codes we use in detection. It is
|
||||
* of no external use and isn't a complete list of AMD products.
|
||||
*/
|
||||
CODE2(OPTERON_800, 1000),
|
||||
CODE(PHENOM),
|
||||
CODE(PHENOM2),
|
||||
CODE(FUSION_C),
|
||||
CODE(FUSION_E),
|
||||
CODE(FUSION_EA),
|
||||
CODE(FUSION_Z),
|
||||
CODE(FUSION_A),
|
||||
|
836
src/3rdparty/libcpuid/asm-bits.c
vendored
836
src/3rdparty/libcpuid/asm-bits.c
vendored
|
@ -1,836 +0,0 @@
|
|||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "libcpuid.h"
|
||||
#include "asm-bits.h"
|
||||
|
||||
int cpuid_exists_by_eflags(void)
|
||||
{
|
||||
#if defined(PLATFORM_X64)
|
||||
return 1; /* CPUID is always present on the x86_64 */
|
||||
#elif defined(PLATFORM_X86)
|
||||
# if defined(COMPILER_GCC) || defined(COMPILER_CLANG)
|
||||
int result;
|
||||
__asm __volatile(
|
||||
" pushfl\n"
|
||||
" pop %%eax\n"
|
||||
" mov %%eax, %%ecx\n"
|
||||
" xor $0x200000, %%eax\n"
|
||||
" push %%eax\n"
|
||||
" popfl\n"
|
||||
" pushfl\n"
|
||||
" pop %%eax\n"
|
||||
" xor %%ecx, %%eax\n"
|
||||
" mov %%eax, %0\n"
|
||||
" push %%ecx\n"
|
||||
" popfl\n"
|
||||
: "=m"(result)
|
||||
: :"eax", "ecx", "memory");
|
||||
return (result != 0);
|
||||
# elif defined(COMPILER_MICROSOFT)
|
||||
int result;
|
||||
__asm {
|
||||
pushfd
|
||||
pop eax
|
||||
mov ecx, eax
|
||||
xor eax, 0x200000
|
||||
push eax
|
||||
popfd
|
||||
pushfd
|
||||
pop eax
|
||||
xor eax, ecx
|
||||
mov result, eax
|
||||
push ecx
|
||||
popfd
|
||||
};
|
||||
return (result != 0);
|
||||
# else
|
||||
return 0;
|
||||
# endif /* COMPILER_MICROSOFT */
|
||||
#elif defined(PLATFORM_ARM)
|
||||
return 0;
|
||||
#else
|
||||
return 0;
|
||||
#endif /* PLATFORM_X86 */
|
||||
}
|
||||
|
||||
#ifdef INLINE_ASM_SUPPORTED
|
||||
/*
|
||||
* with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions
|
||||
* are implemented in separate .asm files. Otherwise, use inline assembly
|
||||
*/
|
||||
void exec_cpuid(uint32_t *regs)
|
||||
{
|
||||
# if defined(COMPILER_GCC) || defined(COMPILER_CLANG)
|
||||
# ifdef PLATFORM_X64
|
||||
__asm __volatile(
|
||||
" mov %0, %%rdi\n"
|
||||
|
||||
" push %%rbx\n"
|
||||
" push %%rcx\n"
|
||||
" push %%rdx\n"
|
||||
|
||||
" mov (%%rdi), %%eax\n"
|
||||
" mov 4(%%rdi), %%ebx\n"
|
||||
" mov 8(%%rdi), %%ecx\n"
|
||||
" mov 12(%%rdi), %%edx\n"
|
||||
|
||||
" cpuid\n"
|
||||
|
||||
" movl %%eax, (%%rdi)\n"
|
||||
" movl %%ebx, 4(%%rdi)\n"
|
||||
" movl %%ecx, 8(%%rdi)\n"
|
||||
" movl %%edx, 12(%%rdi)\n"
|
||||
" pop %%rdx\n"
|
||||
" pop %%rcx\n"
|
||||
" pop %%rbx\n"
|
||||
:
|
||||
:"m"(regs)
|
||||
:"memory", "eax", "rdi"
|
||||
);
|
||||
# elif defined(PLATFORM_X86)
|
||||
__asm __volatile(
|
||||
" mov %0, %%edi\n"
|
||||
|
||||
" push %%ebx\n"
|
||||
" push %%ecx\n"
|
||||
" push %%edx\n"
|
||||
|
||||
" mov (%%edi), %%eax\n"
|
||||
" mov 4(%%edi), %%ebx\n"
|
||||
" mov 8(%%edi), %%ecx\n"
|
||||
" mov 12(%%edi), %%edx\n"
|
||||
|
||||
" cpuid\n"
|
||||
|
||||
" mov %%eax, (%%edi)\n"
|
||||
" mov %%ebx, 4(%%edi)\n"
|
||||
" mov %%ecx, 8(%%edi)\n"
|
||||
" mov %%edx, 12(%%edi)\n"
|
||||
" pop %%edx\n"
|
||||
" pop %%ecx\n"
|
||||
" pop %%ebx\n"
|
||||
:
|
||||
:"m"(regs)
|
||||
:"memory", "eax", "edi"
|
||||
);
|
||||
# elif defined(PLATFORM_ARM)
|
||||
# endif /* COMPILER_GCC */
|
||||
#else
|
||||
# ifdef COMPILER_MICROSOFT
|
||||
__asm {
|
||||
push ebx
|
||||
push ecx
|
||||
push edx
|
||||
push edi
|
||||
mov edi, regs
|
||||
|
||||
mov eax, [edi]
|
||||
mov ebx, [edi+4]
|
||||
mov ecx, [edi+8]
|
||||
mov edx, [edi+12]
|
||||
|
||||
cpuid
|
||||
|
||||
mov [edi], eax
|
||||
mov [edi+4], ebx
|
||||
mov [edi+8], ecx
|
||||
mov [edi+12], edx
|
||||
|
||||
pop edi
|
||||
pop edx
|
||||
pop ecx
|
||||
pop ebx
|
||||
}
|
||||
# else
|
||||
# error "Unsupported compiler"
|
||||
# endif /* COMPILER_MICROSOFT */
|
||||
#endif
|
||||
}
|
||||
#endif /* INLINE_ASSEMBLY_SUPPORTED */
|
||||
|
||||
#ifdef INLINE_ASM_SUPPORTED
|
||||
void cpu_rdtsc(uint64_t* result)
|
||||
{
|
||||
uint32_t low_part, hi_part;
|
||||
#if defined(COMPILER_GCC) || defined(COMPILER_CLANG)
|
||||
#ifdef PLATFORM_ARM
|
||||
low_part = 0;
|
||||
hi_part = 0;
|
||||
#else
|
||||
__asm __volatile (
|
||||
" rdtsc\n"
|
||||
" mov %%eax, %0\n"
|
||||
" mov %%edx, %1\n"
|
||||
:"=m"(low_part), "=m"(hi_part)::"memory", "eax", "edx"
|
||||
);
|
||||
#endif
|
||||
#else
|
||||
# ifdef COMPILER_MICROSOFT
|
||||
__asm {
|
||||
rdtsc
|
||||
mov low_part, eax
|
||||
mov hi_part, edx
|
||||
};
|
||||
# else
|
||||
# error "Unsupported compiler"
|
||||
# endif /* COMPILER_MICROSOFT */
|
||||
#endif /* COMPILER_GCC */
|
||||
*result = (uint64_t)low_part + (((uint64_t) hi_part) << 32);
|
||||
}
|
||||
#endif /* INLINE_ASM_SUPPORTED */
|
||||
|
||||
#ifdef INLINE_ASM_SUPPORTED
|
||||
void busy_sse_loop(int cycles)
|
||||
{
|
||||
# if defined(COMPILER_GCC) || defined(COMPILER_CLANG)
|
||||
#ifndef __APPLE__
|
||||
# define XALIGN ".balign 16\n"
|
||||
#else
|
||||
# define XALIGN ".align 4\n"
|
||||
#endif
|
||||
#ifdef PLATFORM_ARM
|
||||
#else
|
||||
__asm __volatile (
|
||||
" xorps %%xmm0, %%xmm0\n"
|
||||
" xorps %%xmm1, %%xmm1\n"
|
||||
" xorps %%xmm2, %%xmm2\n"
|
||||
" xorps %%xmm3, %%xmm3\n"
|
||||
" xorps %%xmm4, %%xmm4\n"
|
||||
" xorps %%xmm5, %%xmm5\n"
|
||||
" xorps %%xmm6, %%xmm6\n"
|
||||
" xorps %%xmm7, %%xmm7\n"
|
||||
XALIGN
|
||||
/* ".bsLoop:\n" */
|
||||
"1:\n"
|
||||
// 0:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 1:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 2:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 3:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 4:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 5:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 6:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 7:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 8:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
// 9:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//10:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//11:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//12:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//13:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//14:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//15:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//16:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//17:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//18:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//19:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//20:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//21:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//22:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//23:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//24:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//25:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//26:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//27:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//28:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//29:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//30:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
//31:
|
||||
" addps %%xmm1, %%xmm0\n"
|
||||
" addps %%xmm2, %%xmm1\n"
|
||||
" addps %%xmm3, %%xmm2\n"
|
||||
" addps %%xmm4, %%xmm3\n"
|
||||
" addps %%xmm5, %%xmm4\n"
|
||||
" addps %%xmm6, %%xmm5\n"
|
||||
" addps %%xmm7, %%xmm6\n"
|
||||
" addps %%xmm0, %%xmm7\n"
|
||||
|
||||
" dec %%eax\n"
|
||||
/* "jnz .bsLoop\n" */
|
||||
" jnz 1b\n"
|
||||
::"a"(cycles)
|
||||
);
|
||||
#endif
|
||||
#else
|
||||
# ifdef COMPILER_MICROSOFT
|
||||
__asm {
|
||||
mov eax, cycles
|
||||
xorps xmm0, xmm0
|
||||
xorps xmm1, xmm1
|
||||
xorps xmm2, xmm2
|
||||
xorps xmm3, xmm3
|
||||
xorps xmm4, xmm4
|
||||
xorps xmm5, xmm5
|
||||
xorps xmm6, xmm6
|
||||
xorps xmm7, xmm7
|
||||
//--
|
||||
align 16
|
||||
bsLoop:
|
||||
// 0:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 1:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 2:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 3:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 4:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 5:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 6:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 7:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 8:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 9:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 10:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 11:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 12:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 13:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 14:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 15:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 16:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 17:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 18:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 19:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 20:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 21:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 22:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 23:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 24:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 25:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 26:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 27:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 28:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 29:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 30:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
// 31:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
//----------------------
|
||||
dec eax
|
||||
jnz bsLoop
|
||||
}
|
||||
# else
|
||||
# error "Unsupported compiler"
|
||||
# endif /* COMPILER_MICROSOFT */
|
||||
#endif /* COMPILER_GCC */
|
||||
}
|
||||
#endif /* INLINE_ASSEMBLY_SUPPORTED */
|
71
src/3rdparty/libcpuid/asm-bits.h
vendored
71
src/3rdparty/libcpuid/asm-bits.h
vendored
|
@ -1,71 +0,0 @@
|
|||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __ASM_BITS_H__
|
||||
#define __ASM_BITS_H__
|
||||
#include "libcpuid.h"
|
||||
|
||||
/* Determine Compiler: */
|
||||
#if defined(_MSC_VER)
|
||||
#if !defined(COMPILER_MICROSOFT)
|
||||
# define COMPILER_MICROSOFT
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#if !defined(COMPILER_GCC)
|
||||
# define COMPILER_GCC
|
||||
#endif
|
||||
#elif defined(__clang__)
|
||||
#if !defined(COMPILER_CLANG)
|
||||
# define COMPILER_CLANG
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Determine Platform */
|
||||
#if defined(__x86_64__) || defined(_M_AMD64)
|
||||
#if !defined(PLATFORM_X64)
|
||||
# define PLATFORM_X64
|
||||
#endif
|
||||
#elif defined(__i386__) || defined(_M_IX86)
|
||||
#if !defined(PLATFORM_X86)
|
||||
# define PLATFORM_X86
|
||||
#endif
|
||||
#elif defined(__ARMEL__)
|
||||
#if !defined(PLATFORM_ARM)
|
||||
# define PLATFORM_ARM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Under Windows/AMD64 with MSVC, inline assembly isn't supported */
|
||||
#if (((defined(COMPILER_GCC) || defined(COMPILER_CLANG))) && \
|
||||
(defined(PLATFORM_X64) || defined(PLATFORM_X86) || defined(PLATFORM_ARM))) || \
|
||||
(defined(COMPILER_MICROSOFT) && defined(PLATFORM_X86))
|
||||
# define INLINE_ASM_SUPPORTED
|
||||
#endif
|
||||
|
||||
int cpuid_exists_by_eflags(void);
|
||||
void exec_cpuid(uint32_t *regs);
|
||||
void busy_sse_loop(int cycles);
|
||||
|
||||
#endif /* __ASM_BITS_H__ */
|
389
src/3rdparty/libcpuid/cpuid_main.c
vendored
389
src/3rdparty/libcpuid/cpuid_main.c
vendored
|
@ -1,389 +0,0 @@
|
|||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include "libcpuid.h"
|
||||
#include "libcpuid_internal.h"
|
||||
#include "recog_intel.h"
|
||||
#include "recog_amd.h"
|
||||
#include "asm-bits.h"
|
||||
#include "libcpuid_util.h"
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Implementation: */
|
||||
|
||||
static int _libcpiud_errno = ERR_OK;
|
||||
|
||||
int set_error(cpu_error_t err)
|
||||
{
|
||||
_libcpiud_errno = (int) err;
|
||||
return (int) err;
|
||||
}
|
||||
|
||||
static void cpu_id_t_constructor(struct cpu_id_t* id)
|
||||
{
|
||||
memset(id, 0, sizeof(struct cpu_id_t));
|
||||
id->l1_data_cache = id->l1_instruction_cache = id->l2_cache = id->l3_cache = id->l4_cache = -1;
|
||||
id->l1_assoc = id->l2_assoc = id->l3_assoc = id->l4_assoc = -1;
|
||||
id->l1_cacheline = id->l2_cacheline = id->l3_cacheline = id->l4_cacheline = -1;
|
||||
id->sse_size = -1;
|
||||
}
|
||||
|
||||
/* get_total_cpus() system specific code: uses OS routines to determine total number of CPUs */
|
||||
#ifdef __APPLE__
|
||||
#include <unistd.h>
|
||||
#include <mach/clock_types.h>
|
||||
#include <mach/clock.h>
|
||||
#include <mach/mach.h>
|
||||
static int get_total_cpus(void)
|
||||
{
|
||||
kern_return_t kr;
|
||||
host_basic_info_data_t basic_info;
|
||||
host_info_t info = (host_info_t)&basic_info;
|
||||
host_flavor_t flavor = HOST_BASIC_INFO;
|
||||
mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
|
||||
kr = host_info(mach_host_self(), flavor, info, &count);
|
||||
if (kr != KERN_SUCCESS) return 1;
|
||||
return basic_info.avail_cpus;
|
||||
}
|
||||
#define GET_TOTAL_CPUS_DEFINED
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
static int get_total_cpus(void)
|
||||
{
|
||||
SYSTEM_INFO system_info;
|
||||
GetSystemInfo(&system_info);
|
||||
return system_info.dwNumberOfProcessors;
|
||||
}
|
||||
#define GET_TOTAL_CPUS_DEFINED
|
||||
#endif
|
||||
|
||||
#if defined linux || defined __linux__ || defined __sun
|
||||
#include <sys/sysinfo.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static int get_total_cpus(void)
|
||||
{
|
||||
return sysconf(_SC_NPROCESSORS_ONLN);
|
||||
}
|
||||
#define GET_TOTAL_CPUS_DEFINED
|
||||
#endif
|
||||
|
||||
#if defined __FreeBSD__ || defined __OpenBSD__ || defined __NetBSD__ || defined __bsdi__ || defined __QNX__
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
static int get_total_cpus(void)
|
||||
{
|
||||
int mib[2] = { CTL_HW, HW_NCPU };
|
||||
int ncpus;
|
||||
size_t len = sizeof(ncpus);
|
||||
if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) != 0) return 1;
|
||||
return ncpus;
|
||||
}
|
||||
#define GET_TOTAL_CPUS_DEFINED
|
||||
#endif
|
||||
|
||||
#ifndef GET_TOTAL_CPUS_DEFINED
|
||||
static int get_total_cpus(void)
|
||||
{
|
||||
static int warning_printed = 0;
|
||||
if (!warning_printed) {
|
||||
warning_printed = 1;
|
||||
warnf("Your system is not supported by libcpuid -- don't know how to detect the\n");
|
||||
warnf("total number of CPUs on your system. It will be reported as 1.\n");
|
||||
printf("Please use cpu_id_t.logical_cpus field instead.\n");
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#endif /* GET_TOTAL_CPUS_DEFINED */
|
||||
|
||||
|
||||
static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
const struct feature_map_t matchtable_edx1[] = {
|
||||
{ 0, CPU_FEATURE_FPU },
|
||||
{ 1, CPU_FEATURE_VME },
|
||||
{ 2, CPU_FEATURE_DE },
|
||||
{ 3, CPU_FEATURE_PSE },
|
||||
{ 4, CPU_FEATURE_TSC },
|
||||
{ 5, CPU_FEATURE_MSR },
|
||||
{ 6, CPU_FEATURE_PAE },
|
||||
{ 7, CPU_FEATURE_MCE },
|
||||
{ 8, CPU_FEATURE_CX8 },
|
||||
{ 9, CPU_FEATURE_APIC },
|
||||
{ 11, CPU_FEATURE_SEP },
|
||||
{ 12, CPU_FEATURE_MTRR },
|
||||
{ 13, CPU_FEATURE_PGE },
|
||||
{ 14, CPU_FEATURE_MCA },
|
||||
{ 15, CPU_FEATURE_CMOV },
|
||||
{ 16, CPU_FEATURE_PAT },
|
||||
{ 17, CPU_FEATURE_PSE36 },
|
||||
{ 19, CPU_FEATURE_CLFLUSH },
|
||||
{ 23, CPU_FEATURE_MMX },
|
||||
{ 24, CPU_FEATURE_FXSR },
|
||||
{ 25, CPU_FEATURE_SSE },
|
||||
{ 26, CPU_FEATURE_SSE2 },
|
||||
{ 28, CPU_FEATURE_HT },
|
||||
};
|
||||
const struct feature_map_t matchtable_ecx1[] = {
|
||||
{ 0, CPU_FEATURE_PNI },
|
||||
{ 1, CPU_FEATURE_PCLMUL },
|
||||
{ 3, CPU_FEATURE_MONITOR },
|
||||
{ 9, CPU_FEATURE_SSSE3 },
|
||||
{ 12, CPU_FEATURE_FMA3 },
|
||||
{ 13, CPU_FEATURE_CX16 },
|
||||
{ 19, CPU_FEATURE_SSE4_1 },
|
||||
{ 20, CPU_FEATURE_SSE4_2 },
|
||||
{ 22, CPU_FEATURE_MOVBE },
|
||||
{ 23, CPU_FEATURE_POPCNT },
|
||||
{ 25, CPU_FEATURE_AES },
|
||||
{ 26, CPU_FEATURE_XSAVE },
|
||||
{ 27, CPU_FEATURE_OSXSAVE },
|
||||
{ 28, CPU_FEATURE_AVX },
|
||||
{ 29, CPU_FEATURE_F16C },
|
||||
{ 30, CPU_FEATURE_RDRAND },
|
||||
};
|
||||
const struct feature_map_t matchtable_ebx7[] = {
|
||||
{ 3, CPU_FEATURE_BMI1 },
|
||||
{ 5, CPU_FEATURE_AVX2 },
|
||||
{ 8, CPU_FEATURE_BMI2 },
|
||||
};
|
||||
const struct feature_map_t matchtable_edx81[] = {
|
||||
{ 11, CPU_FEATURE_SYSCALL },
|
||||
{ 27, CPU_FEATURE_RDTSCP },
|
||||
{ 29, CPU_FEATURE_LM },
|
||||
};
|
||||
const struct feature_map_t matchtable_ecx81[] = {
|
||||
{ 0, CPU_FEATURE_LAHF_LM },
|
||||
};
|
||||
const struct feature_map_t matchtable_edx87[] = {
|
||||
{ 8, CPU_FEATURE_CONSTANT_TSC },
|
||||
};
|
||||
if (raw->basic_cpuid[0][0] >= 1) {
|
||||
match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data);
|
||||
match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data);
|
||||
}
|
||||
if (raw->basic_cpuid[0][0] >= 7) {
|
||||
match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data);
|
||||
}
|
||||
if (raw->ext_cpuid[0][0] >= 0x80000001) {
|
||||
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
|
||||
match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data);
|
||||
}
|
||||
if (raw->ext_cpuid[0][0] >= 0x80000007) {
|
||||
match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data);
|
||||
}
|
||||
if (data->flags[CPU_FEATURE_SSE]) {
|
||||
/* apply guesswork to check if the SSE unit width is 128 bit */
|
||||
switch (data->vendor) {
|
||||
case VENDOR_AMD:
|
||||
data->sse_size = (data->ext_family >= 16 && data->ext_family != 17) ? 128 : 64;
|
||||
break;
|
||||
case VENDOR_INTEL:
|
||||
data->sse_size = (data->family == 6 && data->ext_model >= 15) ? 128 : 64;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
/* leave the CPU_FEATURE_128BIT_SSE_AUTH 0; the advanced per-vendor detection routines
|
||||
* will set it accordingly if they detect the needed bit */
|
||||
}
|
||||
}
|
||||
|
||||
static cpu_vendor_t cpuid_vendor_identify(const uint32_t *raw_vendor, char *vendor_str)
|
||||
{
|
||||
int i;
|
||||
cpu_vendor_t vendor = VENDOR_UNKNOWN;
|
||||
const struct { cpu_vendor_t vendor; char match[16]; }
|
||||
matchtable[NUM_CPU_VENDORS] = {
|
||||
/* source: http://www.sandpile.org/ia32/cpuid.htm */
|
||||
{ VENDOR_INTEL , "GenuineIntel" },
|
||||
{ VENDOR_AMD , "AuthenticAMD" },
|
||||
{ VENDOR_CYRIX , "CyrixInstead" },
|
||||
{ VENDOR_NEXGEN , "NexGenDriven" },
|
||||
{ VENDOR_TRANSMETA , "GenuineTMx86" },
|
||||
{ VENDOR_UMC , "UMC UMC UMC " },
|
||||
{ VENDOR_CENTAUR , "CentaurHauls" },
|
||||
{ VENDOR_RISE , "RiseRiseRise" },
|
||||
{ VENDOR_SIS , "SiS SiS SiS " },
|
||||
{ VENDOR_NSC , "Geode by NSC" },
|
||||
};
|
||||
|
||||
memcpy(vendor_str + 0, &raw_vendor[1], 4);
|
||||
memcpy(vendor_str + 4, &raw_vendor[3], 4);
|
||||
memcpy(vendor_str + 8, &raw_vendor[2], 4);
|
||||
vendor_str[12] = 0;
|
||||
|
||||
/* Determine vendor: */
|
||||
for (i = 0; i < NUM_CPU_VENDORS; i++)
|
||||
if (!strcmp(vendor_str, matchtable[i].match)) {
|
||||
vendor = matchtable[i].vendor;
|
||||
break;
|
||||
}
|
||||
return vendor;
|
||||
}
|
||||
|
||||
static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
int i, j, basic, xmodel, xfamily, ext;
|
||||
char brandstr[64] = {0};
|
||||
data->vendor = cpuid_vendor_identify(raw->basic_cpuid[0], data->vendor_str);
|
||||
|
||||
if (data->vendor == VENDOR_UNKNOWN)
|
||||
return set_error(ERR_CPU_UNKN);
|
||||
basic = raw->basic_cpuid[0][0];
|
||||
if (basic >= 1) {
|
||||
data->family = (raw->basic_cpuid[1][0] >> 8) & 0xf;
|
||||
data->model = (raw->basic_cpuid[1][0] >> 4) & 0xf;
|
||||
data->stepping = raw->basic_cpuid[1][0] & 0xf;
|
||||
xmodel = (raw->basic_cpuid[1][0] >> 16) & 0xf;
|
||||
xfamily = (raw->basic_cpuid[1][0] >> 20) & 0xff;
|
||||
if (data->vendor == VENDOR_AMD && data->family < 0xf)
|
||||
data->ext_family = data->family;
|
||||
else
|
||||
data->ext_family = data->family + xfamily;
|
||||
data->ext_model = data->model + (xmodel << 4);
|
||||
}
|
||||
ext = raw->ext_cpuid[0][0] - 0x8000000;
|
||||
|
||||
/* obtain the brand string, if present: */
|
||||
if (ext >= 4) {
|
||||
for (i = 0; i < 3; i++)
|
||||
for (j = 0; j < 4; j++)
|
||||
memcpy(brandstr + i * 16 + j * 4,
|
||||
&raw->ext_cpuid[2 + i][j], 4);
|
||||
brandstr[48] = 0;
|
||||
i = 0;
|
||||
while (brandstr[i] == ' ') i++;
|
||||
strncpy(data->brand_str, brandstr + i, sizeof(data->brand_str));
|
||||
data->brand_str[48] = 0;
|
||||
}
|
||||
load_features_common(raw, data);
|
||||
data->total_logical_cpus = get_total_cpus();
|
||||
return set_error(ERR_OK);
|
||||
}
|
||||
|
||||
/* Interface: */
|
||||
|
||||
int cpuid_get_total_cpus(void)
|
||||
{
|
||||
return get_total_cpus();
|
||||
}
|
||||
|
||||
int cpuid_present(void)
|
||||
{
|
||||
return cpuid_exists_by_eflags();
|
||||
}
|
||||
|
||||
void cpu_exec_cpuid(uint32_t eax, uint32_t* regs)
|
||||
{
|
||||
regs[0] = eax;
|
||||
regs[1] = regs[2] = regs[3] = 0;
|
||||
exec_cpuid(regs);
|
||||
}
|
||||
|
||||
void cpu_exec_cpuid_ext(uint32_t* regs)
|
||||
{
|
||||
exec_cpuid(regs);
|
||||
}
|
||||
|
||||
int cpuid_get_raw_data(struct cpu_raw_data_t* data)
|
||||
{
|
||||
unsigned i;
|
||||
if (!cpuid_present())
|
||||
return set_error(ERR_NO_CPUID);
|
||||
for (i = 0; i < 32; i++)
|
||||
cpu_exec_cpuid(i, data->basic_cpuid[i]);
|
||||
for (i = 0; i < 32; i++)
|
||||
cpu_exec_cpuid(0x80000000 + i, data->ext_cpuid[i]);
|
||||
for (i = 0; i < MAX_INTELFN4_LEVEL; i++) {
|
||||
memset(data->intel_fn4[i], 0, sizeof(data->intel_fn4[i]));
|
||||
data->intel_fn4[i][0] = 4;
|
||||
data->intel_fn4[i][2] = i;
|
||||
cpu_exec_cpuid_ext(data->intel_fn4[i]);
|
||||
}
|
||||
for (i = 0; i < MAX_INTELFN11_LEVEL; i++) {
|
||||
memset(data->intel_fn11[i], 0, sizeof(data->intel_fn11[i]));
|
||||
data->intel_fn11[i][0] = 11;
|
||||
data->intel_fn11[i][2] = i;
|
||||
cpu_exec_cpuid_ext(data->intel_fn11[i]);
|
||||
}
|
||||
for (i = 0; i < MAX_INTELFN12H_LEVEL; i++) {
|
||||
memset(data->intel_fn12h[i], 0, sizeof(data->intel_fn12h[i]));
|
||||
data->intel_fn12h[i][0] = 0x12;
|
||||
data->intel_fn12h[i][2] = i;
|
||||
cpu_exec_cpuid_ext(data->intel_fn12h[i]);
|
||||
}
|
||||
for (i = 0; i < MAX_INTELFN14H_LEVEL; i++) {
|
||||
memset(data->intel_fn14h[i], 0, sizeof(data->intel_fn14h[i]));
|
||||
data->intel_fn14h[i][0] = 0x14;
|
||||
data->intel_fn14h[i][2] = i;
|
||||
cpu_exec_cpuid_ext(data->intel_fn14h[i]);
|
||||
}
|
||||
return set_error(ERR_OK);
|
||||
}
|
||||
|
||||
int cpu_ident_internal(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
|
||||
{
|
||||
int r;
|
||||
struct cpu_raw_data_t myraw;
|
||||
if (!raw) {
|
||||
if ((r = cpuid_get_raw_data(&myraw)) < 0)
|
||||
return set_error(r);
|
||||
raw = &myraw;
|
||||
}
|
||||
cpu_id_t_constructor(data);
|
||||
if ((r = cpuid_basic_identify(raw, data)) < 0)
|
||||
return set_error(r);
|
||||
switch (data->vendor) {
|
||||
case VENDOR_INTEL:
|
||||
r = cpuid_identify_intel(raw, data, internal);
|
||||
break;
|
||||
case VENDOR_AMD:
|
||||
r = cpuid_identify_amd(raw, data, internal);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return set_error(r);
|
||||
}
|
||||
|
||||
int cpu_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
struct internal_id_info_t throwaway;
|
||||
return cpu_ident_internal(raw, data, &throwaway);
|
||||
}
|
||||
|
||||
const char* cpuid_lib_version(void)
|
||||
{
|
||||
return VERSION;
|
||||
}
|
58
src/3rdparty/libcpuid/intel_code_t.h
vendored
58
src/3rdparty/libcpuid/intel_code_t.h
vendored
|
@ -1,58 +0,0 @@
|
|||
/*
|
||||
* Copyright 2016 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains a list of internal codes we use in detection. It is
|
||||
* of no external use and isn't a complete list of intel products.
|
||||
*/
|
||||
CODE2(PENTIUM, 2000),
|
||||
|
||||
CODE(IRWIN),
|
||||
CODE(POTOMAC),
|
||||
CODE(GAINESTOWN),
|
||||
CODE(WESTMERE),
|
||||
|
||||
CODE(PENTIUM_M),
|
||||
CODE(NOT_CELERON),
|
||||
|
||||
CODE(CORE_SOLO),
|
||||
CODE(MOBILE_CORE_SOLO),
|
||||
CODE(CORE_DUO),
|
||||
CODE(MOBILE_CORE_DUO),
|
||||
|
||||
CODE(WOLFDALE),
|
||||
CODE(MEROM),
|
||||
CODE(PENRYN),
|
||||
CODE(QUAD_CORE),
|
||||
CODE(DUAL_CORE_HT),
|
||||
CODE(QUAD_CORE_HT),
|
||||
CODE(MORE_THAN_QUADCORE),
|
||||
CODE(PENTIUM_D),
|
||||
|
||||
CODE(SILVERTHORNE),
|
||||
CODE(DIAMONDVILLE),
|
||||
CODE(PINEVIEW),
|
||||
CODE(CEDARVIEW),
|
678
src/3rdparty/libcpuid/libcpuid.h
vendored
678
src/3rdparty/libcpuid/libcpuid.h
vendored
|
@ -1,678 +0,0 @@
|
|||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __LIBCPUID_H__
|
||||
#define __LIBCPUID_H__
|
||||
/**
|
||||
* \file libcpuid.h
|
||||
* \author Veselin Georgiev
|
||||
* \date Oct 2008
|
||||
* \version 0.4.0
|
||||
*
|
||||
* Version history:
|
||||
*
|
||||
* * 0.1.0 (2008-10-15): initial adaptation from wxfractgui sources
|
||||
* * 0.1.1 (2009-07-06): Added intel_fn11 fields to cpu_raw_data_t to handle
|
||||
* new processor topology enumeration required on Core i7
|
||||
* * 0.1.2 (2009-09-26): Added support for MSR reading through self-extracting
|
||||
* kernel driver on Win32.
|
||||
* * 0.1.3 (2010-04-20): Added support for greater more accurate CPU clock
|
||||
* measurements with cpu_clock_by_ic()
|
||||
* * 0.2.0 (2011-10-11): Support for AMD Bulldozer CPUs, 128-bit SSE unit size
|
||||
* checking. A backwards-incompatible change, since the
|
||||
* sizeof cpu_id_t is now different.
|
||||
* * 0.2.1 (2012-05-26): Support for Ivy Bridge, and detecting the presence of
|
||||
* the RdRand instruction.
|
||||
* * 0.2.2 (2015-11-04): Support for newer processors up to Haswell and Vishera.
|
||||
* Fix clock detection in cpu_clock_by_ic() for Bulldozer.
|
||||
* More entries supported in cpu_msrinfo().
|
||||
* *BSD and Solaris support (unofficial).
|
||||
* * 0.3.0 (2016-07-09): Support for Skylake; MSR ops in FreeBSD; INFO_VOLTAGE
|
||||
* for AMD CPUs. Level 4 cache support for Crystalwell
|
||||
* (a backwards-incompatible change since the sizeof
|
||||
* cpu_raw_data_t is now different).
|
||||
* * 0.4.0 (2016-09-30): Better detection of AMD clock multiplier with msrinfo.
|
||||
* Support for Intel SGX detection
|
||||
* (a backwards-incompatible change since the sizeof
|
||||
* cpu_raw_data_t and cpu_id_t is now different).
|
||||
*/
|
||||
|
||||
/** @mainpage A simple libcpuid introduction
|
||||
*
|
||||
* LibCPUID provides CPU identification and access to the CPUID and RDTSC
|
||||
* instructions on the x86.
|
||||
* <p>
|
||||
* To execute CPUID, use \ref cpu_exec_cpuid <br>
|
||||
* To execute RDTSC, use \ref cpu_rdtsc <br>
|
||||
* To fetch the CPUID info needed for CPU identification, use
|
||||
* \ref cpuid_get_raw_data <br>
|
||||
* To make sense of that data (decode, extract features), use \ref cpu_identify <br>
|
||||
* To detect the CPU speed, use either \ref cpu_clock, \ref cpu_clock_by_os,
|
||||
* \ref cpu_tsc_mark + \ref cpu_tsc_unmark + \ref cpu_clock_by_mark,
|
||||
* \ref cpu_clock_measure or \ref cpu_clock_by_ic.
|
||||
* Read carefully for pros/cons of each method. <br>
|
||||
*
|
||||
* To read MSRs, use \ref cpu_msr_driver_open to get a handle, and then
|
||||
* \ref cpu_rdmsr for querying abilities. Some MSR decoding is available on recent
|
||||
* CPUs, and can be queried through \ref cpu_msrinfo; the various types of queries
|
||||
* are described in \ref cpu_msrinfo_request_t.
|
||||
* </p>
|
||||
*/
|
||||
|
||||
/** @defgroup libcpuid LibCPUID
|
||||
* @brief LibCPUID provides CPU identification
|
||||
@{ */
|
||||
|
||||
/* Include some integer type specifications: */
|
||||
#include "libcpuid_types.h"
|
||||
|
||||
/* Some limits and other constants */
|
||||
#include "libcpuid_constants.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief CPU vendor, as guessed from the Vendor String.
|
||||
*/
|
||||
typedef enum {
|
||||
VENDOR_INTEL = 0, /*!< Intel CPU */
|
||||
VENDOR_AMD, /*!< AMD CPU */
|
||||
VENDOR_CYRIX, /*!< Cyrix CPU */
|
||||
VENDOR_NEXGEN, /*!< NexGen CPU */
|
||||
VENDOR_TRANSMETA, /*!< Transmeta CPU */
|
||||
VENDOR_UMC, /*!< x86 CPU by UMC */
|
||||
VENDOR_CENTAUR, /*!< x86 CPU by IDT */
|
||||
VENDOR_RISE, /*!< x86 CPU by Rise Technology */
|
||||
VENDOR_SIS, /*!< x86 CPU by SiS */
|
||||
VENDOR_NSC, /*!< x86 CPU by National Semiconductor */
|
||||
|
||||
NUM_CPU_VENDORS, /*!< Valid CPU vendor ids: 0..NUM_CPU_VENDORS - 1 */
|
||||
VENDOR_UNKNOWN = -1,
|
||||
} cpu_vendor_t;
|
||||
#define NUM_CPU_VENDORS NUM_CPU_VENDORS
|
||||
|
||||
/**
|
||||
* @brief Contains just the raw CPUID data.
|
||||
*
|
||||
* This contains only the most basic CPU data, required to do identification
|
||||
* and feature recognition. Every processor should be identifiable using this
|
||||
* data only.
|
||||
*/
|
||||
struct cpu_raw_data_t {
|
||||
/** contains results of CPUID for eax = 0, 1, ...*/
|
||||
uint32_t basic_cpuid[MAX_CPUID_LEVEL][4];
|
||||
|
||||
/** contains results of CPUID for eax = 0x80000000, 0x80000001, ...*/
|
||||
uint32_t ext_cpuid[MAX_EXT_CPUID_LEVEL][4];
|
||||
|
||||
/** when the CPU is intel and it supports deterministic cache
|
||||
information: this contains the results of CPUID for eax = 4
|
||||
and ecx = 0, 1, ... */
|
||||
uint32_t intel_fn4[MAX_INTELFN4_LEVEL][4];
|
||||
|
||||
/** when the CPU is intel and it supports leaf 0Bh (Extended Topology
|
||||
enumeration leaf), this stores the result of CPUID with
|
||||
eax = 11 and ecx = 0, 1, 2... */
|
||||
uint32_t intel_fn11[MAX_INTELFN11_LEVEL][4];
|
||||
|
||||
/** when the CPU is intel and supports leaf 12h (SGX enumeration leaf),
|
||||
* this stores the result of CPUID with eax = 0x12 and
|
||||
* ecx = 0, 1, 2... */
|
||||
uint32_t intel_fn12h[MAX_INTELFN12H_LEVEL][4];
|
||||
|
||||
/** when the CPU is intel and supports leaf 14h (Intel Processor Trace
|
||||
* capabilities leaf).
|
||||
* this stores the result of CPUID with eax = 0x12 and
|
||||
* ecx = 0, 1, 2... */
|
||||
uint32_t intel_fn14h[MAX_INTELFN14H_LEVEL][4];
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief This contains information about SGX features of the processor
|
||||
* Example usage:
|
||||
* @code
|
||||
* ...
|
||||
* struct cpu_raw_data_t raw;
|
||||
* struct cpu_id_t id;
|
||||
*
|
||||
* if (cpuid_get_raw_data(&raw) == 0 && cpu_identify(&raw, &id) == 0 && id.sgx.present) {
|
||||
* printf("SGX is present.\n");
|
||||
* printf("SGX1 instructions: %s.\n", id.sgx.flags[INTEL_SGX1] ? "present" : "absent");
|
||||
* printf("SGX2 instructions: %s.\n", id.sgx.flags[INTEL_SGX2] ? "present" : "absent");
|
||||
* printf("Max 32-bit enclave size: 2^%d bytes.\n", id.sgx.max_enclave_32bit);
|
||||
* printf("Max 64-bit enclave size: 2^%d bytes.\n", id.sgx.max_enclave_64bit);
|
||||
* for (int i = 0; i < id.sgx.num_epc_sections; i++) {
|
||||
* struct cpu_epc_t epc = cpuid_get_epc(i, NULL);
|
||||
* printf("EPC section #%d: address = %x, size = %d bytes.\n", epc.address, epc.size);
|
||||
* }
|
||||
* } else {
|
||||
* printf("SGX is not present.\n");
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
struct cpu_sgx_t {
|
||||
/** Whether SGX is present (boolean) */
|
||||
uint32_t present;
|
||||
|
||||
/** Max enclave size in 32-bit mode. This is a power-of-two value:
|
||||
* if it is "31", then the max enclave size is 2^31 bytes (2 GiB).
|
||||
*/
|
||||
uint8_t max_enclave_32bit;
|
||||
|
||||
/** Max enclave size in 64-bit mode. This is a power-of-two value:
|
||||
* if it is "36", then the max enclave size is 2^36 bytes (64 GiB).
|
||||
*/
|
||||
uint8_t max_enclave_64bit;
|
||||
|
||||
/**
|
||||
* contains SGX feature flags. See the \ref cpu_sgx_feature_t
|
||||
* "INTEL_SGX*" macros below.
|
||||
*/
|
||||
uint8_t flags[SGX_FLAGS_MAX];
|
||||
|
||||
/** number of Enclave Page Cache (EPC) sections. Info for each
|
||||
* section is available through the \ref cpuid_get_epc() function
|
||||
*/
|
||||
int num_epc_sections;
|
||||
|
||||
/** bit vector of the supported extended features that can be written
|
||||
* to the MISC region of the SSA (Save State Area)
|
||||
*/
|
||||
uint32_t misc_select;
|
||||
|
||||
/** a bit vector of the attributes that can be set to SECS.ATTRIBUTES
|
||||
* via ECREATE. Corresponds to bits 0-63 (incl.) of SECS.ATTRIBUTES.
|
||||
*/
|
||||
uint64_t secs_attributes;
|
||||
|
||||
/** a bit vector of the bits that can be set in the XSAVE feature
|
||||
* request mask; Corresponds to bits 64-127 of SECS.ATTRIBUTES.
|
||||
*/
|
||||
uint64_t secs_xfrm;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief This contains the recognized CPU features/info
|
||||
*/
|
||||
struct cpu_id_t {
|
||||
/** contains the CPU vendor string, e.g. "GenuineIntel" */
|
||||
char vendor_str[VENDOR_STR_MAX];
|
||||
|
||||
/** contains the brand string, e.g. "Intel(R) Xeon(TM) CPU 2.40GHz" */
|
||||
char brand_str[BRAND_STR_MAX];
|
||||
|
||||
/** contains the recognized CPU vendor */
|
||||
cpu_vendor_t vendor;
|
||||
|
||||
/**
|
||||
* contain CPU flags. Used to test for features. See
|
||||
* the \ref cpu_feature_t "CPU_FEATURE_*" macros below.
|
||||
* @see Features
|
||||
*/
|
||||
uint8_t flags[CPU_FLAGS_MAX];
|
||||
|
||||
/** CPU family */
|
||||
int32_t family;
|
||||
|
||||
/** CPU model */
|
||||
int32_t model;
|
||||
|
||||
/** CPU stepping */
|
||||
int32_t stepping;
|
||||
|
||||
/** CPU extended family */
|
||||
int32_t ext_family;
|
||||
|
||||
/** CPU extended model */
|
||||
int32_t ext_model;
|
||||
|
||||
/** Number of CPU cores on the current processor */
|
||||
int32_t num_cores;
|
||||
|
||||
/**
|
||||
* Number of logical processors on the current processor.
|
||||
* Could be more than the number of physical cores,
|
||||
* e.g. when the processor has HyperThreading.
|
||||
*/
|
||||
int32_t num_logical_cpus;
|
||||
|
||||
/**
|
||||
* The total number of logical processors.
|
||||
* The same value is availabe through \ref cpuid_get_total_cpus.
|
||||
*
|
||||
* This is num_logical_cpus * {total physical processors in the system}
|
||||
* (but only on a real system, under a VM this number may be lower).
|
||||
*
|
||||
* If you're writing a multithreaded program and you want to run it on
|
||||
* all CPUs, this is the number of threads you need.
|
||||
*
|
||||
* @note in a VM, this will exactly match the number of CPUs set in
|
||||
* the VM's configuration.
|
||||
*
|
||||
*/
|
||||
int32_t total_logical_cpus;
|
||||
|
||||
/**
|
||||
* L1 data cache size in KB. Could be zero, if the CPU lacks cache.
|
||||
* If the size cannot be determined, it will be -1.
|
||||
*/
|
||||
int32_t l1_data_cache;
|
||||
|
||||
/**
|
||||
* L1 instruction cache size in KB. Could be zero, if the CPU lacks
|
||||
* cache. If the size cannot be determined, it will be -1.
|
||||
* @note On some Intel CPUs, whose instruction cache is in fact
|
||||
* a trace cache, the size will be expressed in K uOps.
|
||||
*/
|
||||
int32_t l1_instruction_cache;
|
||||
|
||||
/**
|
||||
* L2 cache size in KB. Could be zero, if the CPU lacks L2 cache.
|
||||
* If the size of the cache could not be determined, it will be -1
|
||||
*/
|
||||
int32_t l2_cache;
|
||||
|
||||
/** L3 cache size in KB. Zero on most systems */
|
||||
int32_t l3_cache;
|
||||
|
||||
/** L4 cache size in KB. Zero on most systems */
|
||||
int32_t l4_cache;
|
||||
|
||||
/** Cache associativity for the L1 data cache. -1 if undetermined */
|
||||
int32_t l1_assoc;
|
||||
|
||||
/** Cache associativity for the L2 cache. -1 if undetermined */
|
||||
int32_t l2_assoc;
|
||||
|
||||
/** Cache associativity for the L3 cache. -1 if undetermined */
|
||||
int32_t l3_assoc;
|
||||
|
||||
/** Cache associativity for the L4 cache. -1 if undetermined */
|
||||
int32_t l4_assoc;
|
||||
|
||||
/** Cache-line size for L1 data cache. -1 if undetermined */
|
||||
int32_t l1_cacheline;
|
||||
|
||||
/** Cache-line size for L2 cache. -1 if undetermined */
|
||||
int32_t l2_cacheline;
|
||||
|
||||
/** Cache-line size for L3 cache. -1 if undetermined */
|
||||
int32_t l3_cacheline;
|
||||
|
||||
/** Cache-line size for L4 cache. -1 if undetermined */
|
||||
int32_t l4_cacheline;
|
||||
|
||||
/**
|
||||
* The brief and human-friendly CPU codename, which was recognized.<br>
|
||||
* Examples:
|
||||
* @code
|
||||
* +--------+--------+-------+-------+-------+---------------------------------------+-----------------------+
|
||||
* | Vendor | Family | Model | Step. | Cache | Brand String | cpu_id_t.cpu_codename |
|
||||
* +--------+--------+-------+-------+-------+---------------------------------------+-----------------------+
|
||||
* | AMD | 6 | 8 | 0 | 256 | (not available - will be ignored) | "K6-2" |
|
||||
* | Intel | 15 | 2 | 5 | 512 | "Intel(R) Xeon(TM) CPU 2.40GHz" | "Xeon (Prestonia)" |
|
||||
* | Intel | 6 | 15 | 11 | 4096 | "Intel(R) Core(TM)2 Duo CPU E6550..." | "Conroe (Core 2 Duo)" |
|
||||
* | AMD | 15 | 35 | 2 | 1024 | "Dual Core AMD Opteron(tm) Proces..." | "Opteron (Dual Core)" |
|
||||
* +--------+--------+-------+-------+-------+---------------------------------------+-----------------------+
|
||||
* @endcode
|
||||
*/
|
||||
char cpu_codename[64];
|
||||
|
||||
/** SSE execution unit size (64 or 128; -1 if N/A) */
|
||||
int32_t sse_size;
|
||||
|
||||
/**
|
||||
* contain miscellaneous detection information. Used to test about specifics of
|
||||
* certain detected features. See \ref cpu_hint_t "CPU_HINT_*" macros below.
|
||||
* @see Hints
|
||||
*/
|
||||
uint8_t detection_hints[CPU_HINTS_MAX];
|
||||
|
||||
/** contains information about SGX features if the processor, if present */
|
||||
struct cpu_sgx_t sgx;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief CPU feature identifiers
|
||||
*
|
||||
* Usage:
|
||||
* @code
|
||||
* ...
|
||||
* struct cpu_raw_data_t raw;
|
||||
* struct cpu_id_t id;
|
||||
* if (cpuid_get_raw_data(&raw) == 0 && cpu_identify(&raw, &id) == 0) {
|
||||
* if (id.flags[CPU_FEATURE_SSE2]) {
|
||||
* // The CPU has SSE2...
|
||||
* ...
|
||||
* } else {
|
||||
* // no SSE2
|
||||
* }
|
||||
* } else {
|
||||
* // processor cannot be determined.
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
typedef enum {
|
||||
CPU_FEATURE_FPU = 0, /*!< Floating point unit */
|
||||
CPU_FEATURE_VME, /*!< Virtual mode extension */
|
||||
CPU_FEATURE_DE, /*!< Debugging extension */
|
||||
CPU_FEATURE_PSE, /*!< Page size extension */
|
||||
CPU_FEATURE_TSC, /*!< Time-stamp counter */
|
||||
CPU_FEATURE_MSR, /*!< Model-specific regsisters, RDMSR/WRMSR supported */
|
||||
CPU_FEATURE_PAE, /*!< Physical address extension */
|
||||
CPU_FEATURE_MCE, /*!< Machine check exception */
|
||||
CPU_FEATURE_CX8, /*!< CMPXCHG8B instruction supported */
|
||||
CPU_FEATURE_APIC, /*!< APIC support */
|
||||
CPU_FEATURE_MTRR, /*!< Memory type range registers */
|
||||
CPU_FEATURE_SEP, /*!< SYSENTER / SYSEXIT instructions supported */
|
||||
CPU_FEATURE_PGE, /*!< Page global enable */
|
||||
CPU_FEATURE_MCA, /*!< Machine check architecture */
|
||||
CPU_FEATURE_CMOV, /*!< CMOVxx instructions supported */
|
||||
CPU_FEATURE_PAT, /*!< Page attribute table */
|
||||
CPU_FEATURE_PSE36, /*!< 36-bit page address extension */
|
||||
CPU_FEATURE_PN, /*!< Processor serial # implemented (Intel P3 only) */
|
||||
CPU_FEATURE_CLFLUSH, /*!< CLFLUSH instruction supported */
|
||||
CPU_FEATURE_DTS, /*!< Debug store supported */
|
||||
CPU_FEATURE_ACPI, /*!< ACPI support (power states) */
|
||||
CPU_FEATURE_MMX, /*!< MMX instruction set supported */
|
||||
CPU_FEATURE_FXSR, /*!< FXSAVE / FXRSTOR supported */
|
||||
CPU_FEATURE_SSE, /*!< Streaming-SIMD Extensions (SSE) supported */
|
||||
CPU_FEATURE_SSE2, /*!< SSE2 instructions supported */
|
||||
CPU_FEATURE_SS, /*!< Self-snoop */
|
||||
CPU_FEATURE_HT, /*!< Hyper-threading supported (but might be disabled) */
|
||||
CPU_FEATURE_TM, /*!< Thermal monitor */
|
||||
CPU_FEATURE_IA64, /*!< IA64 supported (Itanium only) */
|
||||
CPU_FEATURE_PBE, /*!< Pending-break enable */
|
||||
CPU_FEATURE_PNI, /*!< PNI (SSE3) instructions supported */
|
||||
CPU_FEATURE_PCLMUL, /*!< PCLMULQDQ instruction supported */
|
||||
CPU_FEATURE_DTS64, /*!< 64-bit Debug store supported */
|
||||
CPU_FEATURE_MONITOR, /*!< MONITOR / MWAIT supported */
|
||||
CPU_FEATURE_DS_CPL, /*!< CPL Qualified Debug Store */
|
||||
CPU_FEATURE_VMX, /*!< Virtualization technology supported */
|
||||
CPU_FEATURE_SMX, /*!< Safer mode exceptions */
|
||||
CPU_FEATURE_EST, /*!< Enhanced SpeedStep */
|
||||
CPU_FEATURE_TM2, /*!< Thermal monitor 2 */
|
||||
CPU_FEATURE_SSSE3, /*!< SSSE3 instructionss supported (this is different from SSE3!) */
|
||||
CPU_FEATURE_CID, /*!< Context ID supported */
|
||||
CPU_FEATURE_CX16, /*!< CMPXCHG16B instruction supported */
|
||||
CPU_FEATURE_XTPR, /*!< Send Task Priority Messages disable */
|
||||
CPU_FEATURE_PDCM, /*!< Performance capabilities MSR supported */
|
||||
CPU_FEATURE_DCA, /*!< Direct cache access supported */
|
||||
CPU_FEATURE_SSE4_1, /*!< SSE 4.1 instructions supported */
|
||||
CPU_FEATURE_SSE4_2, /*!< SSE 4.2 instructions supported */
|
||||
CPU_FEATURE_SYSCALL, /*!< SYSCALL / SYSRET instructions supported */
|
||||
CPU_FEATURE_XD, /*!< Execute disable bit supported */
|
||||
CPU_FEATURE_MOVBE, /*!< MOVBE instruction supported */
|
||||
CPU_FEATURE_POPCNT, /*!< POPCNT instruction supported */
|
||||
CPU_FEATURE_AES, /*!< AES* instructions supported */
|
||||
CPU_FEATURE_XSAVE, /*!< XSAVE/XRSTOR/etc instructions supported */
|
||||
CPU_FEATURE_OSXSAVE, /*!< non-privileged copy of OSXSAVE supported */
|
||||
CPU_FEATURE_AVX, /*!< Advanced vector extensions supported */
|
||||
CPU_FEATURE_MMXEXT, /*!< AMD MMX-extended instructions supported */
|
||||
CPU_FEATURE_3DNOW, /*!< AMD 3DNow! instructions supported */
|
||||
CPU_FEATURE_3DNOWEXT, /*!< AMD 3DNow! extended instructions supported */
|
||||
CPU_FEATURE_NX, /*!< No-execute bit supported */
|
||||
CPU_FEATURE_FXSR_OPT, /*!< FFXSR: FXSAVE and FXRSTOR optimizations */
|
||||
CPU_FEATURE_RDTSCP, /*!< RDTSCP instruction supported (AMD-only) */
|
||||
CPU_FEATURE_LM, /*!< Long mode (x86_64/EM64T) supported */
|
||||
CPU_FEATURE_LAHF_LM, /*!< LAHF/SAHF supported in 64-bit mode */
|
||||
CPU_FEATURE_CMP_LEGACY, /*!< core multi-processing legacy mode */
|
||||
CPU_FEATURE_SVM, /*!< AMD Secure virtual machine */
|
||||
CPU_FEATURE_ABM, /*!< LZCNT instruction support */
|
||||
CPU_FEATURE_MISALIGNSSE,/*!< Misaligned SSE supported */
|
||||
CPU_FEATURE_SSE4A, /*!< SSE 4a from AMD */
|
||||
CPU_FEATURE_3DNOWPREFETCH, /*!< PREFETCH/PREFETCHW support */
|
||||
CPU_FEATURE_OSVW, /*!< OS Visible Workaround (AMD) */
|
||||
CPU_FEATURE_IBS, /*!< Instruction-based sampling */
|
||||
CPU_FEATURE_SSE5, /*!< SSE 5 instructions supported (deprecated, will never be 1) */
|
||||
CPU_FEATURE_SKINIT, /*!< SKINIT / STGI supported */
|
||||
CPU_FEATURE_WDT, /*!< Watchdog timer support */
|
||||
CPU_FEATURE_TS, /*!< Temperature sensor */
|
||||
CPU_FEATURE_FID, /*!< Frequency ID control */
|
||||
CPU_FEATURE_VID, /*!< Voltage ID control */
|
||||
CPU_FEATURE_TTP, /*!< THERMTRIP */
|
||||
CPU_FEATURE_TM_AMD, /*!< AMD-specified hardware thermal control */
|
||||
CPU_FEATURE_STC, /*!< Software thermal control */
|
||||
CPU_FEATURE_100MHZSTEPS,/*!< 100 MHz multiplier control */
|
||||
CPU_FEATURE_HWPSTATE, /*!< Hardware P-state control */
|
||||
CPU_FEATURE_CONSTANT_TSC, /*!< TSC ticks at constant rate */
|
||||
CPU_FEATURE_XOP, /*!< The XOP instruction set (same as the old CPU_FEATURE_SSE5) */
|
||||
CPU_FEATURE_FMA3, /*!< The FMA3 instruction set */
|
||||
CPU_FEATURE_FMA4, /*!< The FMA4 instruction set */
|
||||
CPU_FEATURE_TBM, /*!< Trailing bit manipulation instruction support */
|
||||
CPU_FEATURE_F16C, /*!< 16-bit FP convert instruction support */
|
||||
CPU_FEATURE_RDRAND, /*!< RdRand instruction */
|
||||
CPU_FEATURE_X2APIC, /*!< x2APIC, APIC_BASE.EXTD, MSRs 0000_0800h...0000_0BFFh 64-bit ICR (+030h but not +031h), no DFR (+00Eh), SELF_IPI (+040h) also see standard level 0000_000Bh */
|
||||
CPU_FEATURE_CPB, /*!< Core performance boost */
|
||||
CPU_FEATURE_APERFMPERF, /*!< MPERF/APERF MSRs support */
|
||||
CPU_FEATURE_PFI, /*!< Processor Feedback Interface support */
|
||||
CPU_FEATURE_PA, /*!< Processor accumulator */
|
||||
CPU_FEATURE_AVX2, /*!< AVX2 instructions */
|
||||
CPU_FEATURE_BMI1, /*!< BMI1 instructions */
|
||||
CPU_FEATURE_BMI2, /*!< BMI2 instructions */
|
||||
CPU_FEATURE_HLE, /*!< Hardware Lock Elision prefixes */
|
||||
CPU_FEATURE_RTM, /*!< Restricted Transactional Memory instructions */
|
||||
CPU_FEATURE_AVX512F, /*!< AVX-512 Foundation */
|
||||
CPU_FEATURE_AVX512DQ, /*!< AVX-512 Double/Quad granular insns */
|
||||
CPU_FEATURE_AVX512PF, /*!< AVX-512 Prefetch */
|
||||
CPU_FEATURE_AVX512ER, /*!< AVX-512 Exponential/Reciprocal */
|
||||
CPU_FEATURE_AVX512CD, /*!< AVX-512 Conflict detection */
|
||||
CPU_FEATURE_SHA_NI, /*!< SHA-1/SHA-256 instructions */
|
||||
CPU_FEATURE_AVX512BW, /*!< AVX-512 Byte/Word granular insns */
|
||||
CPU_FEATURE_AVX512VL, /*!< AVX-512 128/256 vector length extensions */
|
||||
CPU_FEATURE_SGX, /*!< SGX extensions. Non-autoritative, check cpu_id_t::sgx::present to verify presence */
|
||||
CPU_FEATURE_RDSEED, /*!< RDSEED instruction */
|
||||
CPU_FEATURE_ADX, /*!< ADX extensions (arbitrary precision) */
|
||||
/* termination: */
|
||||
NUM_CPU_FEATURES,
|
||||
} cpu_feature_t;
|
||||
|
||||
/**
|
||||
* @brief CPU detection hints identifiers
|
||||
*
|
||||
* Usage: similar to the flags usage
|
||||
*/
|
||||
typedef enum {
|
||||
CPU_HINT_SSE_SIZE_AUTH = 0, /*!< SSE unit size is authoritative (not only a Family/Model guesswork, but based on an actual CPUID bit) */
|
||||
/* termination */
|
||||
NUM_CPU_HINTS,
|
||||
} cpu_hint_t;
|
||||
|
||||
/**
|
||||
* @brief SGX features flags
|
||||
* \see cpu_sgx_t
|
||||
*
|
||||
* Usage:
|
||||
* @code
|
||||
* ...
|
||||
* struct cpu_raw_data_t raw;
|
||||
* struct cpu_id_t id;
|
||||
* if (cpuid_get_raw_data(&raw) == 0 && cpu_identify(&raw, &id) == 0 && id.sgx.present) {
|
||||
* if (id.sgx.flags[INTEL_SGX1])
|
||||
* // The CPU has SGX1 instructions support...
|
||||
* ...
|
||||
* } else {
|
||||
* // no SGX
|
||||
* }
|
||||
* } else {
|
||||
* // processor cannot be determined.
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
|
||||
typedef enum {
|
||||
INTEL_SGX1, /*!< SGX1 instructions support */
|
||||
INTEL_SGX2, /*!< SGX2 instructions support */
|
||||
|
||||
/* termination: */
|
||||
NUM_SGX_FEATURES,
|
||||
} cpu_sgx_feature_t;
|
||||
|
||||
/**
|
||||
* @brief Describes common library error codes
|
||||
*/
|
||||
typedef enum {
|
||||
ERR_OK = 0, /*!< No error */
|
||||
ERR_NO_CPUID = -1, /*!< CPUID instruction is not supported */
|
||||
ERR_NO_RDTSC = -2, /*!< RDTSC instruction is not supported */
|
||||
ERR_NO_MEM = -3, /*!< Memory allocation failed */
|
||||
ERR_OPEN = -4, /*!< File open operation failed */
|
||||
ERR_BADFMT = -5, /*!< Bad file format */
|
||||
ERR_NOT_IMP = -6, /*!< Not implemented */
|
||||
ERR_CPU_UNKN = -7, /*!< Unsupported processor */
|
||||
ERR_NO_RDMSR = -8, /*!< RDMSR instruction is not supported */
|
||||
ERR_NO_DRIVER= -9, /*!< RDMSR driver error (generic) */
|
||||
ERR_NO_PERMS = -10, /*!< No permissions to install RDMSR driver */
|
||||
ERR_EXTRACT = -11, /*!< Cannot extract RDMSR driver (read only media?) */
|
||||
ERR_HANDLE = -12, /*!< Bad handle */
|
||||
ERR_INVMSR = -13, /*!< Invalid MSR */
|
||||
ERR_INVCNB = -14, /*!< Invalid core number */
|
||||
ERR_HANDLE_R = -15, /*!< Error on handle read */
|
||||
ERR_INVRANGE = -16, /*!< Invalid given range */
|
||||
} cpu_error_t;
|
||||
|
||||
/**
|
||||
* @brief Internal structure, used in cpu_tsc_mark, cpu_tsc_unmark and
|
||||
* cpu_clock_by_mark
|
||||
*/
|
||||
struct cpu_mark_t {
|
||||
uint64_t tsc; /*!< Time-stamp from RDTSC */
|
||||
uint64_t sys_clock; /*!< In microsecond resolution */
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Returns the total number of logical CPU threads (even if CPUID is not present).
|
||||
*
|
||||
* Under VM, this number (and total_logical_cpus, since they are fetched with the same code)
|
||||
* may be nonsensical, i.e. might not equal NumPhysicalCPUs*NumCoresPerCPU*HyperThreading.
|
||||
* This is because no matter how many logical threads the host machine has, you may limit them
|
||||
* in the VM to any number you like. **This** is the number returned by cpuid_get_total_cpus().
|
||||
*
|
||||
* @returns Number of logical CPU threads available. Equals the \ref cpu_id_t::total_logical_cpus.
|
||||
*/
|
||||
int cpuid_get_total_cpus(void);
|
||||
|
||||
/**
|
||||
* @brief Checks if the CPUID instruction is supported
|
||||
* @retval 1 if CPUID is present
|
||||
* @retval 0 the CPU doesn't have CPUID.
|
||||
*/
|
||||
int cpuid_present(void);
|
||||
|
||||
/**
|
||||
* @brief Executes the CPUID instruction
|
||||
* @param eax - the value of the EAX register when executing CPUID
|
||||
* @param regs - the results will be stored here. regs[0] = EAX, regs[1] = EBX, ...
|
||||
* @note CPUID will be executed with EAX set to the given value and EBX, ECX,
|
||||
* EDX set to zero.
|
||||
*/
|
||||
void cpu_exec_cpuid(uint32_t eax, uint32_t* regs);
|
||||
|
||||
/**
|
||||
* @brief Executes the CPUID instruction with the given input registers
|
||||
* @note This is just a bit more generic version of cpu_exec_cpuid - it allows
|
||||
* you to control all the registers.
|
||||
* @param regs - Input/output. Prior to executing CPUID, EAX, EBX, ECX and
|
||||
* EDX will be set to regs[0], regs[1], regs[2] and regs[3].
|
||||
* After CPUID, this array will contain the results.
|
||||
*/
|
||||
void cpu_exec_cpuid_ext(uint32_t* regs);
|
||||
|
||||
/**
|
||||
* @brief Obtains the raw CPUID data from the current CPU
|
||||
* @param data - a pointer to cpu_raw_data_t structure
|
||||
* @returns zero if successful, and some negative number on error.
|
||||
* The error message can be obtained by calling \ref cpuid_error.
|
||||
* @see cpu_error_t
|
||||
*/
|
||||
int cpuid_get_raw_data(struct cpu_raw_data_t* data);
|
||||
|
||||
/**
|
||||
* @brief Identifies the CPU
|
||||
* @param raw - Input - a pointer to the raw CPUID data, which is obtained
|
||||
* either by cpuid_get_raw_data or cpuid_deserialize_raw_data.
|
||||
* Can also be NULL, in which case the functions calls
|
||||
* cpuid_get_raw_data itself.
|
||||
* @param data - Output - the decoded CPU features/info is written here.
|
||||
* @note The function will not fail, even if some of the information
|
||||
* cannot be obtained. Even when the CPU is new and thus unknown to
|
||||
* libcpuid, some generic info, such as "AMD K9 family CPU" will be
|
||||
* written to data.cpu_codename, and most other things, such as the
|
||||
* CPU flags, cache sizes, etc. should be detected correctly anyway.
|
||||
* However, the function CAN fail, if the CPU is completely alien to
|
||||
* libcpuid.
|
||||
* @note While cpu_identify() and cpuid_get_raw_data() are fast for most
|
||||
* purposes, running them several thousand times per second can hamper
|
||||
* performance significantly. Specifically, avoid writing "cpu feature
|
||||
* checker" wrapping function, which calls cpu_identify and returns the
|
||||
* value of some flag, if that function is going to be called frequently.
|
||||
* @returns zero if successful, and some negative number on error.
|
||||
* The error message can be obtained by calling \ref cpuid_error.
|
||||
* @see cpu_error_t
|
||||
*/
|
||||
int cpu_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data);
|
||||
|
||||
/**
|
||||
* @brief The return value of cpuid_get_epc().
|
||||
* @details
|
||||
* Describes an EPC (Enclave Page Cache) layout (physical address and size).
|
||||
* A CPU may have one or more EPC areas, and information about each is
|
||||
* fetched via \ref cpuid_get_epc.
|
||||
*/
|
||||
struct cpu_epc_t {
|
||||
uint64_t start_addr;
|
||||
uint64_t length;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Fetches information about an EPC (Enclave Page Cache) area.
|
||||
* @param index - zero-based index, valid range [0..cpu_id_t.egx.num_epc_sections)
|
||||
* @param raw - a pointer to fetched raw CPUID data. Needed only for testing,
|
||||
* you can safely pass NULL here (if you pass a real structure,
|
||||
* it will be used for fetching the leaf 12h data if index < 2;
|
||||
* otherwise the real CPUID instruction will be used).
|
||||
* @returns the requested data. If the CPU doesn't support SGX, or if
|
||||
* index >= cpu_id_t.egx.num_epc_sections, both fields of the returned
|
||||
* structure will be zeros.
|
||||
*/
|
||||
struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw);
|
||||
|
||||
/**
|
||||
* @brief Returns the libcpuid version
|
||||
*
|
||||
* @returns the string representation of the libcpuid version, like "0.1.1"
|
||||
*/
|
||||
const char* cpuid_lib_version(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
|
||||
/** @} */
|
||||
|
||||
#endif /* __LIBCPUID_H__ */
|
47
src/3rdparty/libcpuid/libcpuid_constants.h
vendored
47
src/3rdparty/libcpuid/libcpuid_constants.h
vendored
|
@ -1,47 +0,0 @@
|
|||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**
|
||||
* @File libcpuid_constants.h
|
||||
* @Author Veselin Georgiev
|
||||
* @Brief Some limits and constants for libcpuid
|
||||
*/
|
||||
|
||||
#ifndef __LIBCPUID_CONSTANTS_H__
|
||||
#define __LIBCPUID_CONSTANTS_H__
|
||||
|
||||
#define VENDOR_STR_MAX 16
|
||||
#define BRAND_STR_MAX 64
|
||||
#define CPU_FLAGS_MAX 128
|
||||
#define MAX_CPUID_LEVEL 32
|
||||
#define MAX_EXT_CPUID_LEVEL 32
|
||||
#define MAX_INTELFN4_LEVEL 8
|
||||
#define MAX_INTELFN11_LEVEL 4
|
||||
#define MAX_INTELFN12H_LEVEL 4
|
||||
#define MAX_INTELFN14H_LEVEL 4
|
||||
#define CPU_HINTS_MAX 16
|
||||
#define SGX_FLAGS_MAX 14
|
||||
|
||||
#endif /* __LIBCPUID_CONSTANTS_H__ */
|
107
src/3rdparty/libcpuid/libcpuid_internal.h
vendored
107
src/3rdparty/libcpuid/libcpuid_internal.h
vendored
|
@ -1,107 +0,0 @@
|
|||
/*
|
||||
* Copyright 2016 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __LIBCPUID_INTERNAL_H__
|
||||
#define __LIBCPUID_INTERNAL_H__
|
||||
/*
|
||||
* This file contains internal undocumented declarations and function prototypes
|
||||
* for the workings of the internal library infrastructure.
|
||||
*/
|
||||
|
||||
enum _common_codes_t {
|
||||
NA = 0,
|
||||
NC, /* No code */
|
||||
};
|
||||
|
||||
#define CODE(x) x
|
||||
#define CODE2(x, y) x = y
|
||||
enum _amd_code_t {
|
||||
#include "amd_code_t.h"
|
||||
};
|
||||
typedef enum _amd_code_t amd_code_t;
|
||||
|
||||
enum _intel_code_t {
|
||||
#include "intel_code_t.h"
|
||||
};
|
||||
typedef enum _intel_code_t intel_code_t;
|
||||
#undef CODE
|
||||
#undef CODE2
|
||||
|
||||
struct internal_id_info_t {
|
||||
union {
|
||||
amd_code_t amd;
|
||||
intel_code_t intel;
|
||||
} code;
|
||||
uint64_t bits;
|
||||
int score; // detection (matchtable) score
|
||||
};
|
||||
|
||||
#define LBIT(x) (((long long) 1) << x)
|
||||
|
||||
enum _common_bits_t {
|
||||
_M_ = LBIT( 0 ),
|
||||
MOBILE_ = LBIT( 1 ),
|
||||
_MP_ = LBIT( 2 ),
|
||||
};
|
||||
|
||||
// additional detection bits for Intel CPUs:
|
||||
enum _intel_bits_t {
|
||||
PENTIUM_ = LBIT( 10 ),
|
||||
CELERON_ = LBIT( 11 ),
|
||||
CORE_ = LBIT( 12 ),
|
||||
_I_ = LBIT( 13 ),
|
||||
_3 = LBIT( 14 ),
|
||||
_5 = LBIT( 15 ),
|
||||
_7 = LBIT( 16 ),
|
||||
_9 = LBIT( 17 ),
|
||||
XEON_ = LBIT( 18 ),
|
||||
ATOM_ = LBIT( 19 ),
|
||||
};
|
||||
typedef enum _intel_bits_t intel_bits_t;
|
||||
|
||||
enum _amd_bits_t {
|
||||
ATHLON_ = LBIT( 10 ),
|
||||
_XP_ = LBIT( 11 ),
|
||||
DURON_ = LBIT( 12 ),
|
||||
SEMPRON_ = LBIT( 13 ),
|
||||
OPTERON_ = LBIT( 14 ),
|
||||
TURION_ = LBIT( 15 ),
|
||||
_LV_ = LBIT( 16 ),
|
||||
_64_ = LBIT( 17 ),
|
||||
_X2 = LBIT( 18 ),
|
||||
_X3 = LBIT( 19 ),
|
||||
_X4 = LBIT( 20 ),
|
||||
_X6 = LBIT( 21 ),
|
||||
_FX = LBIT( 22 ),
|
||||
_APU_ = LBIT( 23 ),
|
||||
};
|
||||
typedef enum _amd_bits_t amd_bits_t;
|
||||
|
||||
|
||||
|
||||
int cpu_ident_internal(struct cpu_raw_data_t* raw, struct cpu_id_t* data,
|
||||
struct internal_id_info_t* internal);
|
||||
|
||||
#endif /* __LIBCPUID_INTERNAL_H__ */
|
63
src/3rdparty/libcpuid/libcpuid_types.h
vendored
63
src/3rdparty/libcpuid/libcpuid_types.h
vendored
|
@ -1,63 +0,0 @@
|
|||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**
|
||||
* @File libcpuid_types.h
|
||||
* @Author Veselin Georgiev
|
||||
* @Brief Type specifications for libcpuid.
|
||||
*/
|
||||
|
||||
#ifndef __LIBCPUID_TYPES_H__
|
||||
#define __LIBCPUID_TYPES_H__
|
||||
|
||||
#if !defined(_MSC_VER) || _MSC_VER >= 1600
|
||||
# include <stdint.h>
|
||||
#else
|
||||
/* we have to provide our own: */
|
||||
# if !defined(__int32_t_defined)
|
||||
typedef int int32_t;
|
||||
# endif
|
||||
|
||||
# if !defined(__uint32_t_defined)
|
||||
typedef unsigned uint32_t;
|
||||
# endif
|
||||
|
||||
typedef signed char int8_t;
|
||||
typedef unsigned char uint8_t;
|
||||
typedef signed short int16_t;
|
||||
typedef unsigned short uint16_t;
|
||||
#if (defined _MSC_VER) && (_MSC_VER <= 1300)
|
||||
/* MSVC 6.0: no long longs ... */
|
||||
typedef signed __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
#else
|
||||
/* all other sane compilers: */
|
||||
typedef signed long long int64_t;
|
||||
typedef unsigned long long uint64_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* __LIBCPUID_TYPES_H__ */
|
93
src/3rdparty/libcpuid/libcpuid_util.c
vendored
93
src/3rdparty/libcpuid/libcpuid_util.c
vendored
|
@ -1,93 +0,0 @@
|
|||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "libcpuid.h"
|
||||
#include "libcpuid_util.h"
|
||||
|
||||
void match_features(const struct feature_map_t* matchtable, int count, uint32_t reg, struct cpu_id_t* data)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < count; i++)
|
||||
if (reg & (1u << matchtable[i].bit))
|
||||
data->flags[matchtable[i].feature] = 1;
|
||||
}
|
||||
|
||||
static int xmatch_entry(char c, const char* p)
|
||||
{
|
||||
int i, j;
|
||||
if (c == 0) return -1;
|
||||
if (c == p[0]) return 1;
|
||||
if (p[0] == '.') return 1;
|
||||
if (p[0] == '#' && isdigit(c)) return 1;
|
||||
if (p[0] == '[') {
|
||||
j = 1;
|
||||
while (p[j] && p[j] != ']') j++;
|
||||
if (!p[j]) return -1;
|
||||
for (i = 1; i < j; i++)
|
||||
if (p[i] == c) return j + 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int match_pattern(const char* s, const char* p)
|
||||
{
|
||||
int i, j, dj, k, n, m;
|
||||
n = (int) strlen(s);
|
||||
m = (int) strlen(p);
|
||||
for (i = 0; i < n; i++) {
|
||||
if (xmatch_entry(s[i], p) != -1) {
|
||||
j = 0;
|
||||
k = 0;
|
||||
while (j < m && ((dj = xmatch_entry(s[i + k], p + j)) != -1)) {
|
||||
k++;
|
||||
j += dj;
|
||||
}
|
||||
if (j == m) return i + 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct cpu_id_t* get_cached_cpuid(void)
|
||||
{
|
||||
static int initialized = 0;
|
||||
static struct cpu_id_t id;
|
||||
if (initialized) return &id;
|
||||
if (cpu_identify(NULL, &id))
|
||||
memset(&id, 0, sizeof(id));
|
||||
initialized = 1;
|
||||
return &id;
|
||||
}
|
||||
|
||||
int match_all(uint64_t bits, uint64_t mask)
|
||||
{
|
||||
return (bits & mask) == mask;
|
||||
}
|
78
src/3rdparty/libcpuid/libcpuid_util.h
vendored
78
src/3rdparty/libcpuid/libcpuid_util.h
vendored
|
@ -1,78 +0,0 @@
|
|||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __LIBCPUID_UTIL_H__
|
||||
#define __LIBCPUID_UTIL_H__
|
||||
|
||||
#define COUNT_OF(array) (sizeof(array) / sizeof(array[0]))
|
||||
|
||||
struct feature_map_t {
|
||||
unsigned bit;
|
||||
cpu_feature_t feature;
|
||||
};
|
||||
|
||||
void match_features(const struct feature_map_t* matchtable, int count,
|
||||
uint32_t reg, struct cpu_id_t* data);
|
||||
|
||||
struct match_entry_t {
|
||||
int family, model, stepping, ext_family, ext_model;
|
||||
int ncores, l2cache, l3cache, brand_code;
|
||||
uint64_t model_bits;
|
||||
int model_code;
|
||||
char name[32];
|
||||
};
|
||||
|
||||
// returns the match score:
|
||||
int match_cpu_codename(const struct match_entry_t* matchtable, int count,
|
||||
struct cpu_id_t* data, int brand_code, uint64_t bits,
|
||||
int model_code);
|
||||
/*
|
||||
* Seek for a pattern in `haystack'.
|
||||
* Pattern may be an fixed string, or contain the special metacharacters
|
||||
* '.' - match any single character
|
||||
* '#' - match any digit
|
||||
* '[<chars>] - match any of the given chars (regex-like ranges are not
|
||||
* supported)
|
||||
* Return val: 0 if the pattern is not found. Nonzero if it is found (actually,
|
||||
* x + 1 where x is the index where the match is found).
|
||||
*/
|
||||
int match_pattern(const char* haystack, const char* pattern);
|
||||
|
||||
/*
|
||||
* Gets an initialized cpu_id_t. It is cached, so that internal libcpuid
|
||||
* machinery doesn't need to issue cpu_identify more than once.
|
||||
*/
|
||||
struct cpu_id_t* get_cached_cpuid(void);
|
||||
|
||||
|
||||
/* returns true if all bits of mask are present in `bits'. */
|
||||
int match_all(uint64_t bits, uint64_t mask);
|
||||
|
||||
/*
|
||||
* Sets the current errno
|
||||
*/
|
||||
int set_error(cpu_error_t err);
|
||||
|
||||
#endif /* __LIBCPUID_UTIL_H__ */
|
359
src/3rdparty/libcpuid/masm-x64.asm
vendored
359
src/3rdparty/libcpuid/masm-x64.asm
vendored
|
@ -1,359 +0,0 @@
|
|||
|
||||
.code
|
||||
; procedure exec_cpuid
|
||||
; Signature: void exec_cpiud(uint32_t *regs)
|
||||
exec_cpuid Proc
|
||||
push rbx
|
||||
push rcx
|
||||
push rdx
|
||||
push rdi
|
||||
|
||||
mov rdi, rcx
|
||||
|
||||
mov eax, [rdi]
|
||||
mov ebx, [rdi+4]
|
||||
mov ecx, [rdi+8]
|
||||
mov edx, [rdi+12]
|
||||
|
||||
cpuid
|
||||
|
||||
mov [rdi], eax
|
||||
mov [rdi+4], ebx
|
||||
mov [rdi+8], ecx
|
||||
mov [rdi+12], edx
|
||||
pop rdi
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rbx
|
||||
ret
|
||||
exec_cpuid endp
|
||||
|
||||
; procedure cpu_rdtsc
|
||||
; Signature: void cpu_rdtsc(uint64_t *result)
|
||||
cpu_rdtsc Proc
|
||||
push rdx
|
||||
rdtsc
|
||||
mov [rcx], eax
|
||||
mov [rcx+4], edx
|
||||
pop rdx
|
||||
ret
|
||||
cpu_rdtsc endp
|
||||
|
||||
; procedure busy_sse_loop
|
||||
; Signature: void busy_sse_loop(int cycles)
|
||||
busy_sse_loop Proc
|
||||
; save xmm6 & xmm7 into the shadow area, as Visual C++ 2008
|
||||
; expects that we don't touch them:
|
||||
movups [rsp + 8], xmm6
|
||||
movups [rsp + 24], xmm7
|
||||
|
||||
xorps xmm0, xmm0
|
||||
xorps xmm1, xmm1
|
||||
xorps xmm2, xmm2
|
||||
xorps xmm3, xmm3
|
||||
xorps xmm4, xmm4
|
||||
xorps xmm5, xmm5
|
||||
xorps xmm6, xmm6
|
||||
xorps xmm7, xmm7
|
||||
; --
|
||||
align 16
|
||||
bsLoop:
|
||||
; 0:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 1:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 2:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 3:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 4:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 5:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 6:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 7:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 8:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 9:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 10:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 11:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 12:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 13:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 14:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 15:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 16:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 17:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 18:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 19:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 20:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 21:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 22:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 23:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 24:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 25:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 26:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 27:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 28:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 29:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 30:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; 31:
|
||||
addps xmm0, xmm1
|
||||
addps xmm1, xmm2
|
||||
addps xmm2, xmm3
|
||||
addps xmm3, xmm4
|
||||
addps xmm4, xmm5
|
||||
addps xmm5, xmm6
|
||||
addps xmm6, xmm7
|
||||
addps xmm7, xmm0
|
||||
; ----------------------
|
||||
dec ecx
|
||||
jnz bsLoop
|
||||
|
||||
; restore xmm6 & xmm7:
|
||||
movups xmm6, [rsp + 8]
|
||||
movups xmm7, [rsp + 24]
|
||||
ret
|
||||
busy_sse_loop endp
|
||||
|
||||
END
|
173
src/3rdparty/libcpuid/recog_amd.c
vendored
173
src/3rdparty/libcpuid/recog_amd.c
vendored
|
@ -1,173 +0,0 @@
|
|||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "libcpuid.h"
|
||||
#include "libcpuid_util.h"
|
||||
#include "libcpuid_internal.h"
|
||||
#include "recog_amd.h"
|
||||
|
||||
const struct amd_code_str { amd_code_t code; char *str; } amd_code_str[] = {
|
||||
#define CODE(x) { x, #x }
|
||||
#define CODE2(x, y) CODE(x)
|
||||
#include "amd_code_t.h"
|
||||
#undef CODE
|
||||
};
|
||||
|
||||
struct amd_code_and_bits_t {
|
||||
int code;
|
||||
uint64_t bits;
|
||||
};
|
||||
|
||||
enum _amd_model_codes_t {
|
||||
// Only for Ryzen CPUs:
|
||||
_1400,
|
||||
_1500,
|
||||
_1600,
|
||||
_1900,
|
||||
_2400,
|
||||
_2500,
|
||||
_2700,
|
||||
};
|
||||
|
||||
static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
const struct feature_map_t matchtable_edx81[] = {
|
||||
{ 20, CPU_FEATURE_NX },
|
||||
{ 22, CPU_FEATURE_MMXEXT },
|
||||
{ 25, CPU_FEATURE_FXSR_OPT },
|
||||
{ 30, CPU_FEATURE_3DNOWEXT },
|
||||
{ 31, CPU_FEATURE_3DNOW },
|
||||
};
|
||||
const struct feature_map_t matchtable_ecx81[] = {
|
||||
{ 1, CPU_FEATURE_CMP_LEGACY },
|
||||
{ 2, CPU_FEATURE_SVM },
|
||||
{ 5, CPU_FEATURE_ABM },
|
||||
{ 6, CPU_FEATURE_SSE4A },
|
||||
{ 7, CPU_FEATURE_MISALIGNSSE },
|
||||
{ 8, CPU_FEATURE_3DNOWPREFETCH },
|
||||
{ 9, CPU_FEATURE_OSVW },
|
||||
{ 10, CPU_FEATURE_IBS },
|
||||
{ 11, CPU_FEATURE_XOP },
|
||||
{ 12, CPU_FEATURE_SKINIT },
|
||||
{ 13, CPU_FEATURE_WDT },
|
||||
{ 16, CPU_FEATURE_FMA4 },
|
||||
{ 21, CPU_FEATURE_TBM },
|
||||
};
|
||||
const struct feature_map_t matchtable_edx87[] = {
|
||||
{ 0, CPU_FEATURE_TS },
|
||||
{ 1, CPU_FEATURE_FID },
|
||||
{ 2, CPU_FEATURE_VID },
|
||||
{ 3, CPU_FEATURE_TTP },
|
||||
{ 4, CPU_FEATURE_TM_AMD },
|
||||
{ 5, CPU_FEATURE_STC },
|
||||
{ 6, CPU_FEATURE_100MHZSTEPS },
|
||||
{ 7, CPU_FEATURE_HWPSTATE },
|
||||
/* id 8 is handled in common */
|
||||
{ 9, CPU_FEATURE_CPB },
|
||||
{ 10, CPU_FEATURE_APERFMPERF },
|
||||
{ 11, CPU_FEATURE_PFI },
|
||||
{ 12, CPU_FEATURE_PA },
|
||||
};
|
||||
if (raw->ext_cpuid[0][0] >= 0x80000001) {
|
||||
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
|
||||
match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data);
|
||||
}
|
||||
if (raw->ext_cpuid[0][0] >= 0x80000007)
|
||||
match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data);
|
||||
if (raw->ext_cpuid[0][0] >= 0x8000001a) {
|
||||
/* We have the extended info about SSE unit size */
|
||||
data->detection_hints[CPU_HINT_SSE_SIZE_AUTH] = 1;
|
||||
data->sse_size = (raw->ext_cpuid[0x1a][0] & 1) ? 128 : 64;
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_amd_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
int l3_result;
|
||||
const int assoc_table[16] = {
|
||||
0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 255
|
||||
};
|
||||
unsigned n = raw->ext_cpuid[0][0];
|
||||
|
||||
if (n >= 0x80000005) {
|
||||
data->l1_data_cache = (raw->ext_cpuid[5][2] >> 24) & 0xff;
|
||||
data->l1_assoc = (raw->ext_cpuid[5][2] >> 16) & 0xff;
|
||||
data->l1_cacheline = (raw->ext_cpuid[5][2]) & 0xff;
|
||||
data->l1_instruction_cache = (raw->ext_cpuid[5][3] >> 24) & 0xff;
|
||||
}
|
||||
if (n >= 0x80000006) {
|
||||
data->l2_cache = (raw->ext_cpuid[6][2] >> 16) & 0xffff;
|
||||
data->l2_assoc = assoc_table[(raw->ext_cpuid[6][2] >> 12) & 0xf];
|
||||
data->l2_cacheline = (raw->ext_cpuid[6][2]) & 0xff;
|
||||
|
||||
l3_result = (raw->ext_cpuid[6][3] >> 18);
|
||||
if (l3_result > 0) {
|
||||
l3_result = 512 * l3_result; /* AMD spec says it's a range,
|
||||
but we take the lower bound */
|
||||
data->l3_cache = l3_result;
|
||||
data->l3_assoc = assoc_table[(raw->ext_cpuid[6][3] >> 12) & 0xf];
|
||||
data->l3_cacheline = (raw->ext_cpuid[6][3]) & 0xff;
|
||||
} else {
|
||||
data->l3_cache = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_amd_number_of_cores(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
int logical_cpus = -1, num_cores = -1;
|
||||
|
||||
if (raw->basic_cpuid[0][0] >= 1) {
|
||||
logical_cpus = (raw->basic_cpuid[1][1] >> 16) & 0xff;
|
||||
if (raw->ext_cpuid[0][0] >= 8) {
|
||||
num_cores = 1 + (raw->ext_cpuid[8][2] & 0xff);
|
||||
}
|
||||
}
|
||||
if (data->flags[CPU_FEATURE_HT]) {
|
||||
if (num_cores > 1) {
|
||||
if (data->ext_family >= 23)
|
||||
num_cores /= 2; // e.g., Ryzen 7 reports 16 "real" cores, but they are really just 8.
|
||||
data->num_cores = num_cores;
|
||||
data->num_logical_cpus = logical_cpus;
|
||||
} else {
|
||||
data->num_cores = 1;
|
||||
data->num_logical_cpus = (logical_cpus >= 2 ? logical_cpus : 2);
|
||||
}
|
||||
} else {
|
||||
data->num_cores = data->num_logical_cpus = 1;
|
||||
}
|
||||
}
|
||||
|
||||
int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
|
||||
{
|
||||
load_amd_features(raw, data);
|
||||
decode_amd_cache_info(raw, data);
|
||||
decode_amd_number_of_cores(raw, data);
|
||||
return 0;
|
||||
}
|
31
src/3rdparty/libcpuid/recog_amd.h
vendored
31
src/3rdparty/libcpuid/recog_amd.h
vendored
|
@ -1,31 +0,0 @@
|
|||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __RECOG_AMD_H__
|
||||
#define __RECOG_AMD_H__
|
||||
|
||||
int cpuid_identify_amd(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal);
|
||||
|
||||
#endif /* __RECOG_AMD_H__ */
|
543
src/3rdparty/libcpuid/recog_intel.c
vendored
543
src/3rdparty/libcpuid/recog_intel.c
vendored
|
@ -1,543 +0,0 @@
|
|||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "libcpuid.h"
|
||||
#include "libcpuid_util.h"
|
||||
#include "libcpuid_internal.h"
|
||||
#include "recog_intel.h"
|
||||
|
||||
const struct intel_bcode_str { intel_code_t code; char *str; } intel_bcode_str[] = {
|
||||
#define CODE(x) { x, #x }
|
||||
#define CODE2(x, y) CODE(x)
|
||||
#include "intel_code_t.h"
|
||||
#undef CODE
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
int code;
|
||||
uint64_t bits;
|
||||
} intel_code_and_bits_t;
|
||||
|
||||
enum _intel_model_t {
|
||||
UNKNOWN = -1,
|
||||
_3000 = 100,
|
||||
_3100,
|
||||
_3200,
|
||||
X3200,
|
||||
_3300,
|
||||
X3300,
|
||||
_5100,
|
||||
_5200,
|
||||
_5300,
|
||||
_5400,
|
||||
_2xxx, /* Core i[357] 2xxx */
|
||||
_3xxx, /* Core i[357] 3xxx */
|
||||
};
|
||||
typedef enum _intel_model_t intel_model_t;
|
||||
|
||||
static void load_intel_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
const struct feature_map_t matchtable_edx1[] = {
|
||||
{ 18, CPU_FEATURE_PN },
|
||||
{ 21, CPU_FEATURE_DTS },
|
||||
{ 22, CPU_FEATURE_ACPI },
|
||||
{ 27, CPU_FEATURE_SS },
|
||||
{ 29, CPU_FEATURE_TM },
|
||||
{ 30, CPU_FEATURE_IA64 },
|
||||
{ 31, CPU_FEATURE_PBE },
|
||||
};
|
||||
const struct feature_map_t matchtable_ecx1[] = {
|
||||
{ 2, CPU_FEATURE_DTS64 },
|
||||
{ 4, CPU_FEATURE_DS_CPL },
|
||||
{ 5, CPU_FEATURE_VMX },
|
||||
{ 6, CPU_FEATURE_SMX },
|
||||
{ 7, CPU_FEATURE_EST },
|
||||
{ 8, CPU_FEATURE_TM2 },
|
||||
{ 10, CPU_FEATURE_CID },
|
||||
{ 14, CPU_FEATURE_XTPR },
|
||||
{ 15, CPU_FEATURE_PDCM },
|
||||
{ 18, CPU_FEATURE_DCA },
|
||||
{ 21, CPU_FEATURE_X2APIC },
|
||||
};
|
||||
const struct feature_map_t matchtable_edx81[] = {
|
||||
{ 20, CPU_FEATURE_XD },
|
||||
};
|
||||
const struct feature_map_t matchtable_ebx7[] = {
|
||||
{ 2, CPU_FEATURE_SGX },
|
||||
{ 4, CPU_FEATURE_HLE },
|
||||
{ 11, CPU_FEATURE_RTM },
|
||||
{ 16, CPU_FEATURE_AVX512F },
|
||||
{ 17, CPU_FEATURE_AVX512DQ },
|
||||
{ 18, CPU_FEATURE_RDSEED },
|
||||
{ 19, CPU_FEATURE_ADX },
|
||||
{ 26, CPU_FEATURE_AVX512PF },
|
||||
{ 27, CPU_FEATURE_AVX512ER },
|
||||
{ 28, CPU_FEATURE_AVX512CD },
|
||||
{ 29, CPU_FEATURE_SHA_NI },
|
||||
{ 30, CPU_FEATURE_AVX512BW },
|
||||
{ 31, CPU_FEATURE_AVX512VL },
|
||||
};
|
||||
if (raw->basic_cpuid[0][0] >= 1) {
|
||||
match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data);
|
||||
match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data);
|
||||
}
|
||||
if (raw->ext_cpuid[0][0] >= 1) {
|
||||
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
|
||||
}
|
||||
// detect TSX/AVX512:
|
||||
if (raw->basic_cpuid[0][0] >= 7) {
|
||||
match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data);
|
||||
}
|
||||
}
|
||||
|
||||
enum _cache_type_t {
|
||||
L1I,
|
||||
L1D,
|
||||
L2,
|
||||
L3,
|
||||
L4
|
||||
};
|
||||
typedef enum _cache_type_t cache_type_t;
|
||||
|
||||
static void check_case(uint8_t on, cache_type_t cache, int size, int assoc, int linesize, struct cpu_id_t* data)
|
||||
{
|
||||
if (!on) return;
|
||||
switch (cache) {
|
||||
case L1I:
|
||||
data->l1_instruction_cache = size;
|
||||
break;
|
||||
case L1D:
|
||||
data->l1_data_cache = size;
|
||||
data->l1_assoc = assoc;
|
||||
data->l1_cacheline = linesize;
|
||||
break;
|
||||
case L2:
|
||||
data->l2_cache = size;
|
||||
data->l2_assoc = assoc;
|
||||
data->l2_cacheline = linesize;
|
||||
break;
|
||||
case L3:
|
||||
data->l3_cache = size;
|
||||
data->l3_assoc = assoc;
|
||||
data->l3_cacheline = linesize;
|
||||
break;
|
||||
case L4:
|
||||
data->l4_cache = size;
|
||||
data->l4_assoc = assoc;
|
||||
data->l4_cacheline = linesize;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_intel_oldstyle_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
uint8_t f[256] = {0};
|
||||
int reg, off;
|
||||
uint32_t x;
|
||||
for (reg = 0; reg < 4; reg++) {
|
||||
x = raw->basic_cpuid[2][reg];
|
||||
if (x & 0x80000000) continue;
|
||||
for (off = 0; off < 4; off++) {
|
||||
f[x & 0xff] = 1;
|
||||
x >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
check_case(f[0x06], L1I, 8, 4, 32, data);
|
||||
check_case(f[0x08], L1I, 16, 4, 32, data);
|
||||
check_case(f[0x0A], L1D, 8, 2, 32, data);
|
||||
check_case(f[0x0C], L1D, 16, 4, 32, data);
|
||||
check_case(f[0x22], L3, 512, 4, 64, data);
|
||||
check_case(f[0x23], L3, 1024, 8, 64, data);
|
||||
check_case(f[0x25], L3, 2048, 8, 64, data);
|
||||
check_case(f[0x29], L3, 4096, 8, 64, data);
|
||||
check_case(f[0x2C], L1D, 32, 8, 64, data);
|
||||
check_case(f[0x30], L1I, 32, 8, 64, data);
|
||||
check_case(f[0x39], L2, 128, 4, 64, data);
|
||||
check_case(f[0x3A], L2, 192, 6, 64, data);
|
||||
check_case(f[0x3B], L2, 128, 2, 64, data);
|
||||
check_case(f[0x3C], L2, 256, 4, 64, data);
|
||||
check_case(f[0x3D], L2, 384, 6, 64, data);
|
||||
check_case(f[0x3E], L2, 512, 4, 64, data);
|
||||
check_case(f[0x41], L2, 128, 4, 32, data);
|
||||
check_case(f[0x42], L2, 256, 4, 32, data);
|
||||
check_case(f[0x43], L2, 512, 4, 32, data);
|
||||
check_case(f[0x44], L2, 1024, 4, 32, data);
|
||||
check_case(f[0x45], L2, 2048, 4, 32, data);
|
||||
check_case(f[0x46], L3, 4096, 4, 64, data);
|
||||
check_case(f[0x47], L3, 8192, 8, 64, data);
|
||||
check_case(f[0x4A], L3, 6144, 12, 64, data);
|
||||
check_case(f[0x4B], L3, 8192, 16, 64, data);
|
||||
check_case(f[0x4C], L3, 12288, 12, 64, data);
|
||||
check_case(f[0x4D], L3, 16384, 16, 64, data);
|
||||
check_case(f[0x4E], L2, 6144, 24, 64, data);
|
||||
check_case(f[0x60], L1D, 16, 8, 64, data);
|
||||
check_case(f[0x66], L1D, 8, 4, 64, data);
|
||||
check_case(f[0x67], L1D, 16, 4, 64, data);
|
||||
check_case(f[0x68], L1D, 32, 4, 64, data);
|
||||
/* The following four entries are trace cache. Intel does not
|
||||
* specify a cache-line size, so we use -1 instead
|
||||
*/
|
||||
check_case(f[0x70], L1I, 12, 8, -1, data);
|
||||
check_case(f[0x71], L1I, 16, 8, -1, data);
|
||||
check_case(f[0x72], L1I, 32, 8, -1, data);
|
||||
check_case(f[0x73], L1I, 64, 8, -1, data);
|
||||
|
||||
check_case(f[0x78], L2, 1024, 4, 64, data);
|
||||
check_case(f[0x79], L2, 128, 8, 64, data);
|
||||
check_case(f[0x7A], L2, 256, 8, 64, data);
|
||||
check_case(f[0x7B], L2, 512, 8, 64, data);
|
||||
check_case(f[0x7C], L2, 1024, 8, 64, data);
|
||||
check_case(f[0x7D], L2, 2048, 8, 64, data);
|
||||
check_case(f[0x7F], L2, 512, 2, 64, data);
|
||||
check_case(f[0x82], L2, 256, 8, 32, data);
|
||||
check_case(f[0x83], L2, 512, 8, 32, data);
|
||||
check_case(f[0x84], L2, 1024, 8, 32, data);
|
||||
check_case(f[0x85], L2, 2048, 8, 32, data);
|
||||
check_case(f[0x86], L2, 512, 4, 64, data);
|
||||
check_case(f[0x87], L2, 1024, 8, 64, data);
|
||||
|
||||
if (f[0x49]) {
|
||||
/* This flag is overloaded with two meanings. On Xeon MP
|
||||
* (family 0xf, model 0x6) this means L3 cache. On all other
|
||||
* CPUs (notably Conroe et al), this is L2 cache. In both cases
|
||||
* it means 4MB, 16-way associative, 64-byte line size.
|
||||
*/
|
||||
if (data->family == 0xf && data->model == 0x6) {
|
||||
data->l3_cache = 4096;
|
||||
data->l3_assoc = 16;
|
||||
data->l3_cacheline = 64;
|
||||
} else {
|
||||
data->l2_cache = 4096;
|
||||
data->l2_assoc = 16;
|
||||
data->l2_cacheline = 64;
|
||||
}
|
||||
}
|
||||
if (f[0x40]) {
|
||||
/* Again, a special flag. It means:
|
||||
* 1) If no L2 is specified, then CPU is w/o L2 (0 KB)
|
||||
* 2) If L2 is specified by other flags, then, CPU is w/o L3.
|
||||
*/
|
||||
if (data->l2_cache == -1) {
|
||||
data->l2_cache = 0;
|
||||
} else {
|
||||
data->l3_cache = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void decode_intel_deterministic_cache_info(struct cpu_raw_data_t* raw,
|
||||
struct cpu_id_t* data)
|
||||
{
|
||||
int ecx;
|
||||
int ways, partitions, linesize, sets, size, level, typenumber;
|
||||
cache_type_t type;
|
||||
for (ecx = 0; ecx < MAX_INTELFN4_LEVEL; ecx++) {
|
||||
typenumber = raw->intel_fn4[ecx][0] & 0x1f;
|
||||
if (typenumber == 0) break;
|
||||
level = (raw->intel_fn4[ecx][0] >> 5) & 0x7;
|
||||
if (level == 1 && typenumber == 1)
|
||||
type = L1D;
|
||||
else if (level == 1 && typenumber == 2)
|
||||
type = L1I;
|
||||
else if (level == 2 && typenumber == 3)
|
||||
type = L2;
|
||||
else if (level == 3 && typenumber == 3)
|
||||
type = L3;
|
||||
else if (level == 4 && typenumber == 3)
|
||||
type = L4;
|
||||
else {
|
||||
continue;
|
||||
}
|
||||
ways = ((raw->intel_fn4[ecx][1] >> 22) & 0x3ff) + 1;
|
||||
partitions = ((raw->intel_fn4[ecx][1] >> 12) & 0x3ff) + 1;
|
||||
linesize = (raw->intel_fn4[ecx][1] & 0xfff) + 1;
|
||||
sets = raw->intel_fn4[ecx][2] + 1;
|
||||
size = ways * partitions * linesize * sets / 1024;
|
||||
check_case(1, type, size, ways, linesize, data);
|
||||
}
|
||||
}
|
||||
|
||||
static int decode_intel_extended_topology(struct cpu_raw_data_t* raw,
|
||||
struct cpu_id_t* data)
|
||||
{
|
||||
int i, level_type, num_smt = -1, num_core = -1;
|
||||
for (i = 0; i < MAX_INTELFN11_LEVEL; i++) {
|
||||
level_type = (raw->intel_fn11[i][2] & 0xff00) >> 8;
|
||||
switch (level_type) {
|
||||
case 0x01:
|
||||
num_smt = raw->intel_fn11[i][1] & 0xffff;
|
||||
break;
|
||||
case 0x02:
|
||||
num_core = raw->intel_fn11[i][1] & 0xffff;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (num_smt == -1 || num_core == -1) return 0;
|
||||
data->num_logical_cpus = num_core;
|
||||
data->num_cores = num_core / num_smt;
|
||||
// make sure num_cores is at least 1. In VMs, the CPUID instruction
|
||||
// is rigged and may give nonsensical results, but we should at least
|
||||
// avoid outputs like data->num_cores == 0.
|
||||
if (data->num_cores <= 0) data->num_cores = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void decode_intel_number_of_cores(struct cpu_raw_data_t* raw,
|
||||
struct cpu_id_t* data)
|
||||
{
|
||||
int logical_cpus = -1, num_cores = -1;
|
||||
|
||||
if (raw->basic_cpuid[0][0] >= 11) {
|
||||
if (decode_intel_extended_topology(raw, data)) return;
|
||||
}
|
||||
|
||||
if (raw->basic_cpuid[0][0] >= 1) {
|
||||
logical_cpus = (raw->basic_cpuid[1][1] >> 16) & 0xff;
|
||||
if (raw->basic_cpuid[0][0] >= 4) {
|
||||
num_cores = 1 + ((raw->basic_cpuid[4][0] >> 26) & 0x3f);
|
||||
}
|
||||
}
|
||||
if (data->flags[CPU_FEATURE_HT]) {
|
||||
if (num_cores > 1) {
|
||||
data->num_cores = num_cores;
|
||||
data->num_logical_cpus = logical_cpus;
|
||||
} else {
|
||||
data->num_cores = 1;
|
||||
data->num_logical_cpus = (logical_cpus >= 1 ? logical_cpus : 1);
|
||||
if (data->num_logical_cpus == 1)
|
||||
data->flags[CPU_FEATURE_HT] = 0;
|
||||
}
|
||||
} else {
|
||||
data->num_cores = data->num_logical_cpus = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static intel_code_and_bits_t get_brand_code_and_bits(struct cpu_id_t* data)
|
||||
{
|
||||
intel_code_t code = (intel_code_t) NC;
|
||||
intel_code_and_bits_t result;
|
||||
uint64_t bits = 0;
|
||||
int i = 0;
|
||||
const char* bs = data->brand_str;
|
||||
const char* s;
|
||||
const struct { intel_code_t c; const char *search; } matchtable[] = {
|
||||
{ PENTIUM_M, "Pentium(R) M" },
|
||||
{ CORE_SOLO, "Pentium(R) Dual CPU" },
|
||||
{ CORE_SOLO, "Pentium(R) Dual-Core" },
|
||||
{ PENTIUM_D, "Pentium(R) D" },
|
||||
{ CORE_SOLO, "Genuine Intel(R) CPU" },
|
||||
{ CORE_SOLO, "Intel(R) Core(TM)" },
|
||||
{ DIAMONDVILLE, "CPU [N ][23]## " },
|
||||
{ SILVERTHORNE, "CPU Z" },
|
||||
{ PINEVIEW, "CPU [ND][45]## " },
|
||||
{ CEDARVIEW, "CPU [ND]#### " },
|
||||
};
|
||||
|
||||
const struct { uint64_t bit; const char* search; } bit_matchtable[] = {
|
||||
{ XEON_, "Xeon" },
|
||||
{ _MP_, " MP" },
|
||||
{ ATOM_, "Atom(TM) CPU" },
|
||||
{ MOBILE_, "Mobile" },
|
||||
{ CELERON_, "Celeron" },
|
||||
{ PENTIUM_, "Pentium" },
|
||||
};
|
||||
|
||||
for (i = 0; i < COUNT_OF(bit_matchtable); i++) {
|
||||
if (match_pattern(bs, bit_matchtable[i].search))
|
||||
bits |= bit_matchtable[i].bit;
|
||||
}
|
||||
|
||||
if ((i = match_pattern(bs, "Core(TM) [im][3579]")) != 0) {
|
||||
bits |= CORE_;
|
||||
i--;
|
||||
switch (bs[i + 9]) {
|
||||
case 'i': bits |= _I_; break;
|
||||
case 'm': bits |= _M_; break;
|
||||
}
|
||||
switch (bs[i + 10]) {
|
||||
case '3': bits |= _3; break;
|
||||
case '5': bits |= _5; break;
|
||||
case '7': bits |= _7; break;
|
||||
case '9': bits |= _9; break;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < COUNT_OF(matchtable); i++)
|
||||
if (match_pattern(bs, matchtable[i].search)) {
|
||||
code = matchtable[i].c;
|
||||
break;
|
||||
}
|
||||
if (bits & XEON_) {
|
||||
if (match_pattern(bs, "W35##") || match_pattern(bs, "[ELXW]75##"))
|
||||
bits |= _7;
|
||||
else if (match_pattern(bs, "[ELXW]55##"))
|
||||
code = GAINESTOWN;
|
||||
else if (match_pattern(bs, "[ELXW]56##"))
|
||||
code = WESTMERE;
|
||||
else if (data->l3_cache > 0 && data->family == 16)
|
||||
/* restrict by family, since later Xeons also have L3 ... */
|
||||
code = IRWIN;
|
||||
}
|
||||
if (match_all(bits, XEON_ + _MP_) && data->l3_cache > 0)
|
||||
code = POTOMAC;
|
||||
if (code == CORE_SOLO) {
|
||||
s = strstr(bs, "CPU");
|
||||
if (s) {
|
||||
s += 3;
|
||||
while (*s == ' ') s++;
|
||||
if (*s == 'T')
|
||||
bits |= MOBILE_;
|
||||
}
|
||||
}
|
||||
if (code == CORE_SOLO) {
|
||||
switch (data->num_cores) {
|
||||
case 1: break;
|
||||
case 2:
|
||||
{
|
||||
code = CORE_DUO;
|
||||
if (data->num_logical_cpus > 2)
|
||||
code = DUAL_CORE_HT;
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
code = QUAD_CORE;
|
||||
if (data->num_logical_cpus > 4)
|
||||
code = QUAD_CORE_HT;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
code = MORE_THAN_QUADCORE; break;
|
||||
}
|
||||
}
|
||||
|
||||
if (code == CORE_DUO && (bits & MOBILE_) && data->model != 14) {
|
||||
if (data->ext_model < 23) {
|
||||
code = MEROM;
|
||||
} else {
|
||||
code = PENRYN;
|
||||
}
|
||||
}
|
||||
if (data->ext_model == 23 &&
|
||||
(code == CORE_DUO || code == PENTIUM_D || (bits & CELERON_))) {
|
||||
code = WOLFDALE;
|
||||
}
|
||||
|
||||
result.code = code;
|
||||
result.bits = bits;
|
||||
return result;
|
||||
}
|
||||
|
||||
static void decode_intel_sgx_features(const struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||
{
|
||||
struct cpu_epc_t epc;
|
||||
int i;
|
||||
|
||||
if (raw->basic_cpuid[0][0] < 0x12) return; // no 12h leaf
|
||||
if (raw->basic_cpuid[0x12][0] == 0) return; // no sub-leafs available, probably it's disabled by BIOS
|
||||
|
||||
// decode sub-leaf 0:
|
||||
if (raw->basic_cpuid[0x12][0] & 1) data->sgx.flags[INTEL_SGX1] = 1;
|
||||
if (raw->basic_cpuid[0x12][0] & 2) data->sgx.flags[INTEL_SGX2] = 1;
|
||||
if (data->sgx.flags[INTEL_SGX1] || data->sgx.flags[INTEL_SGX2])
|
||||
data->sgx.present = 1;
|
||||
data->sgx.misc_select = raw->basic_cpuid[0x12][1];
|
||||
data->sgx.max_enclave_32bit = (raw->basic_cpuid[0x12][3] ) & 0xff;
|
||||
data->sgx.max_enclave_64bit = (raw->basic_cpuid[0x12][3] >> 8) & 0xff;
|
||||
|
||||
// decode sub-leaf 1:
|
||||
data->sgx.secs_attributes = raw->intel_fn12h[1][0] | (((uint64_t) raw->intel_fn12h[1][1]) << 32);
|
||||
data->sgx.secs_xfrm = raw->intel_fn12h[1][2] | (((uint64_t) raw->intel_fn12h[1][3]) << 32);
|
||||
|
||||
// decode higher-order subleafs, whenever present:
|
||||
data->sgx.num_epc_sections = -1;
|
||||
for (i = 0; i < 1000000; i++) {
|
||||
epc = cpuid_get_epc(i, raw);
|
||||
if (epc.length == 0) {
|
||||
data->sgx.num_epc_sections = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (data->sgx.num_epc_sections == -1) {
|
||||
data->sgx.num_epc_sections = 1000000;
|
||||
}
|
||||
}
|
||||
|
||||
struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw)
|
||||
{
|
||||
uint32_t regs[4];
|
||||
struct cpu_epc_t retval = {0, 0};
|
||||
if (raw && index < MAX_INTELFN12H_LEVEL - 2) {
|
||||
// this was queried already, use the data:
|
||||
memcpy(regs, raw->intel_fn12h[2 + index], sizeof(regs));
|
||||
} else {
|
||||
// query this ourselves:
|
||||
regs[0] = 0x12;
|
||||
regs[2] = 2 + index;
|
||||
regs[1] = regs[3] = 0;
|
||||
cpu_exec_cpuid_ext(regs);
|
||||
}
|
||||
|
||||
// decode values:
|
||||
if ((regs[0] & 0xf) == 0x1) {
|
||||
retval.start_addr |= (regs[0] & 0xfffff000); // bits [12, 32) -> bits [12, 32)
|
||||
retval.start_addr |= ((uint64_t) (regs[1] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52)
|
||||
retval.length |= (regs[2] & 0xfffff000); // bits [12, 32) -> bits [12, 32)
|
||||
retval.length |= ((uint64_t) (regs[3] & 0x000fffff)) << 32; // bits [0, 20) -> bits [32, 52)
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal)
|
||||
{
|
||||
intel_code_and_bits_t brand;
|
||||
|
||||
load_intel_features(raw, data);
|
||||
if (raw->basic_cpuid[0][0] >= 4) {
|
||||
/* Deterministic way is preferred, being more generic */
|
||||
decode_intel_deterministic_cache_info(raw, data);
|
||||
} else if (raw->basic_cpuid[0][0] >= 2) {
|
||||
decode_intel_oldstyle_cache_info(raw, data);
|
||||
}
|
||||
decode_intel_number_of_cores(raw, data);
|
||||
|
||||
brand = get_brand_code_and_bits(data);
|
||||
|
||||
internal->code.intel = brand.code;
|
||||
internal->bits = brand.bits;
|
||||
|
||||
if (data->flags[CPU_FEATURE_SGX]) {
|
||||
// if SGX is indicated by the CPU, verify its presence:
|
||||
decode_intel_sgx_features(raw, data);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
31
src/3rdparty/libcpuid/recog_intel.h
vendored
31
src/3rdparty/libcpuid/recog_intel.h
vendored
|
@ -1,31 +0,0 @@
|
|||
/*
|
||||
* Copyright 2008 Veselin Georgiev,
|
||||
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __RECOG_INTEL_H__
|
||||
#define __RECOG_INTEL_H__
|
||||
|
||||
int cpuid_identify_intel(struct cpu_raw_data_t* raw, struct cpu_id_t* data, struct internal_id_info_t* internal);
|
||||
|
||||
#endif /*__RECOG_INTEL_H__*/
|
2
src/3rdparty/libethash/CMakeLists.txt
vendored
2
src/3rdparty/libethash/CMakeLists.txt
vendored
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required (VERSION 2.8)
|
||||
cmake_minimum_required (VERSION 2.8.12)
|
||||
project (ethash C)
|
||||
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Os")
|
||||
|
|
32
src/App.cpp
32
src/App.cpp
|
@ -43,17 +43,13 @@
|
|||
|
||||
xmrig::App::App(Process *process)
|
||||
{
|
||||
m_controller = new Controller(process);
|
||||
m_controller = std::make_shared<Controller>(process);
|
||||
}
|
||||
|
||||
|
||||
xmrig::App::~App()
|
||||
{
|
||||
Cpu::release();
|
||||
|
||||
delete m_signals;
|
||||
delete m_console;
|
||||
delete m_controller;
|
||||
}
|
||||
|
||||
|
||||
|
@ -65,7 +61,7 @@ int xmrig::App::exec()
|
|||
return 2;
|
||||
}
|
||||
|
||||
m_signals = new Signals(this);
|
||||
m_signals = std::make_shared<Signals>(this);
|
||||
|
||||
int rc = 0;
|
||||
if (background(rc)) {
|
||||
|
@ -78,10 +74,10 @@ int xmrig::App::exec()
|
|||
}
|
||||
|
||||
if (!m_controller->isBackground()) {
|
||||
m_console = new Console(this);
|
||||
m_console = std::make_shared<Console>(this);
|
||||
}
|
||||
|
||||
Summary::print(m_controller);
|
||||
Summary::print(m_controller.get());
|
||||
|
||||
if (m_controller->config()->isDryRun()) {
|
||||
LOG_NOTICE("%s " WHITE_BOLD("OK"), Tags::config());
|
||||
|
@ -115,32 +111,20 @@ void xmrig::App::onSignal(int signum)
|
|||
switch (signum)
|
||||
{
|
||||
case SIGHUP:
|
||||
LOG_WARN("%s " YELLOW("SIGHUP received, exiting"), Tags::signal());
|
||||
break;
|
||||
|
||||
case SIGTERM:
|
||||
LOG_WARN("%s " YELLOW("SIGTERM received, exiting"), Tags::signal());
|
||||
break;
|
||||
|
||||
case SIGINT:
|
||||
LOG_WARN("%s " YELLOW("SIGINT received, exiting"), Tags::signal());
|
||||
break;
|
||||
return close();
|
||||
|
||||
default:
|
||||
return;
|
||||
break;
|
||||
}
|
||||
|
||||
close();
|
||||
}
|
||||
|
||||
|
||||
void xmrig::App::close()
|
||||
{
|
||||
m_signals->stop();
|
||||
|
||||
if (m_console) {
|
||||
m_console->stop();
|
||||
}
|
||||
m_signals.reset();
|
||||
m_console.reset();
|
||||
|
||||
m_controller->stop();
|
||||
|
||||
|
|
|
@ -32,6 +32,9 @@
|
|||
#include "base/tools/Object.h"
|
||||
|
||||
|
||||
#include <memory>
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
|
@ -60,9 +63,9 @@ private:
|
|||
bool background(int &rc);
|
||||
void close();
|
||||
|
||||
Console *m_console = nullptr;
|
||||
Controller *m_controller = nullptr;
|
||||
Signals *m_signals = nullptr;
|
||||
std::shared_ptr<Console> m_console;
|
||||
std::shared_ptr<Controller> m_controller;
|
||||
std::shared_ptr<Signals> m_signals;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -36,8 +36,6 @@
|
|||
|
||||
bool xmrig::App::background(int &rc)
|
||||
{
|
||||
signal(SIGPIPE, SIG_IGN);
|
||||
|
||||
if (!m_controller->isBackground()) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <support@xmrig.com>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -47,6 +47,13 @@
|
|||
namespace xmrig {
|
||||
|
||||
|
||||
#ifdef XMRIG_OS_WIN
|
||||
static constexpr const char *kHugepagesSupported = GREEN_BOLD("permission granted");
|
||||
#else
|
||||
static constexpr const char *kHugepagesSupported = GREEN_BOLD("supported");
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM
|
||||
static const char *coloredAsmNames[] = {
|
||||
RED_BOLD("none"),
|
||||
|
@ -66,17 +73,13 @@ inline static const char *asmName(Assembly::Id assembly)
|
|||
|
||||
static void print_memory(Config *config)
|
||||
{
|
||||
# ifdef XMRIG_OS_WIN
|
||||
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s",
|
||||
"HUGE PAGES", config->cpu().isHugePages() ? (VirtualMemory::isHugepagesAvailable() ? GREEN_BOLD("permission granted") : RED_BOLD("unavailable")) : RED_BOLD("disabled"));
|
||||
# else
|
||||
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "HUGE PAGES", config->cpu().isHugePages() ? GREEN_BOLD("supported") : RED_BOLD("disabled"));
|
||||
# endif
|
||||
"HUGE PAGES", config->cpu().isHugePages() ? (VirtualMemory::isHugepagesAvailable() ? kHugepagesSupported : RED_BOLD("unavailable")) : RED_BOLD("disabled"));
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
# ifdef XMRIG_OS_LINUX
|
||||
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s",
|
||||
"1GB PAGES", (VirtualMemory::isOneGbPagesAvailable() ? (config->rx().isOneGbPages() ? GREEN_BOLD("supported") : YELLOW_BOLD("disabled")) : YELLOW_BOLD("unavailable")));
|
||||
"1GB PAGES", (VirtualMemory::isOneGbPagesAvailable() ? (config->rx().isOneGbPages() ? kHugepagesSupported : YELLOW_BOLD("disabled")) : YELLOW_BOLD("unavailable")));
|
||||
# else
|
||||
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") "%s", "1GB PAGES", YELLOW_BOLD("unavailable"));
|
||||
# endif
|
||||
|
@ -88,33 +91,27 @@ static void print_cpu(Config *)
|
|||
{
|
||||
const auto info = Cpu::info();
|
||||
|
||||
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %sx64 %sAES",
|
||||
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%zu)") " %s %sAES%s",
|
||||
"CPU",
|
||||
info->brand(),
|
||||
info->packages(),
|
||||
info->isX64() ? GREEN_BOLD_S : RED_BOLD_S "-",
|
||||
info->hasAES() ? GREEN_BOLD_S : RED_BOLD_S "-"
|
||||
ICpuInfo::is64bit() ? GREEN_BOLD("64-bit") : RED_BOLD("32-bit"),
|
||||
info->hasAES() ? GREEN_BOLD_S : RED_BOLD_S "-",
|
||||
info->isVM() ? RED_BOLD_S " VM" : ""
|
||||
);
|
||||
# if defined(XMRIG_FEATURE_LIBCPUID) || defined (XMRIG_FEATURE_HWLOC)
|
||||
# if defined(XMRIG_FEATURE_HWLOC)
|
||||
Log::print(WHITE_BOLD(" %-13s") BLACK_BOLD("L2:") WHITE_BOLD("%.1f MB") BLACK_BOLD(" L3:") WHITE_BOLD("%.1f MB")
|
||||
CYAN_BOLD(" %zu") "C" BLACK_BOLD("/") CYAN_BOLD("%zu") "T"
|
||||
# ifdef XMRIG_FEATURE_HWLOC
|
||||
BLACK_BOLD(" NUMA:") CYAN_BOLD("%zu")
|
||||
# endif
|
||||
, "",
|
||||
BLACK_BOLD(" NUMA:") CYAN_BOLD("%zu"),
|
||||
"",
|
||||
info->L2() / 1048576.0,
|
||||
info->L3() / 1048576.0,
|
||||
info->cores(),
|
||||
info->threads()
|
||||
# ifdef XMRIG_FEATURE_HWLOC
|
||||
, info->nodes()
|
||||
# endif
|
||||
info->threads(),
|
||||
info->nodes()
|
||||
);
|
||||
# else
|
||||
Log::print(WHITE_BOLD(" %-13s") BLACK_BOLD("threads:") CYAN_BOLD("%zu"),
|
||||
"",
|
||||
info->threads()
|
||||
);
|
||||
Log::print(WHITE_BOLD(" %-13s") BLACK_BOLD("threads:") CYAN_BOLD("%zu"), "", info->threads());
|
||||
# endif
|
||||
}
|
||||
|
||||
|
@ -160,11 +157,11 @@ static void print_threads(Config *config)
|
|||
static void print_commands(Config *)
|
||||
{
|
||||
if (Log::isColors()) {
|
||||
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("COMMANDS ") MAGENTA_BG(WHITE_BOLD_S "h") WHITE_BOLD("ashrate, ")
|
||||
MAGENTA_BG(WHITE_BOLD_S "p") WHITE_BOLD("ause, ")
|
||||
MAGENTA_BG(WHITE_BOLD_S "r") WHITE_BOLD("esume, ")
|
||||
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("COMMANDS ") MAGENTA_BG_BOLD("h") WHITE_BOLD("ashrate, ")
|
||||
MAGENTA_BG_BOLD("p") WHITE_BOLD("ause, ")
|
||||
MAGENTA_BG_BOLD("r") WHITE_BOLD("esume, ")
|
||||
WHITE_BOLD("re") MAGENTA_BG(WHITE_BOLD_S "s") WHITE_BOLD("ults, ")
|
||||
MAGENTA_BG(WHITE_BOLD_S "c") WHITE_BOLD("onnection")
|
||||
MAGENTA_BG_BOLD("c") WHITE_BOLD("onnection")
|
||||
);
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
include (src/backend/cpu/cpu.cmake)
|
||||
include (src/backend/opencl/opencl.cmake)
|
||||
include (src/backend/cuda/cuda.cmake)
|
||||
include (src/backend/common/common.cmake)
|
||||
include(src/backend/cpu/cpu.cmake)
|
||||
include(src/backend/opencl/opencl.cmake)
|
||||
include(src/backend/cuda/cuda.cmake)
|
||||
include(src/backend/common/common.cmake)
|
||||
|
||||
|
||||
set(HEADERS_BACKEND
|
||||
|
|
52
src/backend/common/GpuWorker.cpp
Normal file
52
src/backend/common/GpuWorker.cpp
Normal file
|
@ -0,0 +1,52 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "backend/common/GpuWorker.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
|
||||
|
||||
xmrig::GpuWorker::GpuWorker(size_t id, int64_t affinity, int priority, uint32_t deviceIndex) : Worker(id, affinity, priority),
|
||||
m_deviceIndex(deviceIndex)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void xmrig::GpuWorker::storeStats()
|
||||
{
|
||||
// Get index which is unused now
|
||||
const uint32_t index = m_index.load(std::memory_order_relaxed) ^ 1;
|
||||
|
||||
// Fill in the data for that index
|
||||
m_hashCount[index] = m_count;
|
||||
m_timestamp[index] = Chrono::steadyMSecs();
|
||||
|
||||
// Switch to that index
|
||||
// All data will be in memory by the time it completes thanks to std::memory_order_seq_cst
|
||||
m_index.fetch_xor(1, std::memory_order_seq_cst);
|
||||
}
|
||||
|
||||
|
||||
void xmrig::GpuWorker::hashrateData(uint64_t &hashCount, uint64_t &timeStamp, uint64_t &rawHashes) const
|
||||
{
|
||||
const uint32_t index = m_index.load(std::memory_order_relaxed);
|
||||
|
||||
rawHashes = m_hashrateData.interpolate(timeStamp);
|
||||
hashCount = m_hashCount[index];
|
||||
timeStamp = m_timestamp[index];
|
||||
}
|
58
src/backend/common/GpuWorker.h
Normal file
58
src/backend/common/GpuWorker.h
Normal file
|
@ -0,0 +1,58 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_GPUWORKER_H
|
||||
#define XMRIG_GPUWORKER_H
|
||||
|
||||
|
||||
#include <atomic>
|
||||
|
||||
|
||||
#include "backend/common/HashrateInterpolator.h"
|
||||
#include "backend/common/Worker.h"
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
class GpuWorker : public Worker
|
||||
{
|
||||
public:
|
||||
GpuWorker(size_t id, int64_t affinity, int priority, uint32_t m_deviceIndex);
|
||||
|
||||
protected:
|
||||
inline const VirtualMemory *memory() const override { return nullptr; }
|
||||
inline uint32_t deviceIndex() const { return m_deviceIndex; }
|
||||
|
||||
void hashrateData(uint64_t &hashCount, uint64_t &timeStamp, uint64_t &rawHashes) const override;
|
||||
|
||||
protected:
|
||||
void storeStats();
|
||||
|
||||
const uint32_t m_deviceIndex;
|
||||
HashrateInterpolator m_hashrateData;
|
||||
std::atomic<uint32_t> m_index = {};
|
||||
uint64_t m_hashCount[2] = {};
|
||||
uint64_t m_timestamp[2] = {};
|
||||
};
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
#endif /* XMRIG_GPUWORKER_H */
|
|
@ -1,12 +1,7 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -24,7 +19,6 @@
|
|||
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <memory.h>
|
||||
#include <cstdio>
|
||||
|
||||
|
@ -75,72 +69,6 @@ xmrig::Hashrate::~Hashrate()
|
|||
}
|
||||
|
||||
|
||||
double xmrig::Hashrate::calc(size_t ms) const
|
||||
{
|
||||
const double data = calc(0, ms);
|
||||
return std::isnormal(data) ? data : 0.0;
|
||||
}
|
||||
|
||||
|
||||
double xmrig::Hashrate::calc(size_t threadId, size_t ms) const
|
||||
{
|
||||
assert(threadId < m_threads);
|
||||
if (threadId >= m_threads) {
|
||||
return nan("");
|
||||
}
|
||||
|
||||
uint64_t earliestHashCount = 0;
|
||||
uint64_t earliestStamp = 0;
|
||||
bool haveFullSet = false;
|
||||
|
||||
const uint64_t timeStampLimit = xmrig::Chrono::steadyMSecs() - ms;
|
||||
uint64_t* timestamps = m_timestamps[threadId];
|
||||
uint64_t* counts = m_counts[threadId];
|
||||
|
||||
const size_t idx_start = (m_top[threadId] - 1) & kBucketMask;
|
||||
size_t idx = idx_start;
|
||||
|
||||
uint64_t lastestStamp = timestamps[idx];
|
||||
uint64_t lastestHashCnt = counts[idx];
|
||||
|
||||
do {
|
||||
if (timestamps[idx] < timeStampLimit) {
|
||||
haveFullSet = (timestamps[idx] != 0);
|
||||
if (idx != idx_start) {
|
||||
idx = (idx + 1) & kBucketMask;
|
||||
earliestStamp = timestamps[idx];
|
||||
earliestHashCount = counts[idx];
|
||||
}
|
||||
break;
|
||||
}
|
||||
idx = (idx - 1) & kBucketMask;
|
||||
} while (idx != idx_start);
|
||||
|
||||
if (!haveFullSet || earliestStamp == 0 || lastestStamp == 0) {
|
||||
return nan("");
|
||||
}
|
||||
|
||||
if (lastestStamp - earliestStamp == 0) {
|
||||
return nan("");
|
||||
}
|
||||
|
||||
const auto hashes = static_cast<double>(lastestHashCnt - earliestHashCount);
|
||||
const auto time = static_cast<double>(lastestStamp - earliestStamp) / 1000.0;
|
||||
|
||||
return hashes / time;
|
||||
}
|
||||
|
||||
|
||||
void xmrig::Hashrate::add(size_t threadId, uint64_t count, uint64_t timestamp)
|
||||
{
|
||||
const size_t top = m_top[threadId];
|
||||
m_counts[threadId][top] = count;
|
||||
m_timestamps[threadId][top] = timestamp;
|
||||
|
||||
m_top[threadId] = (top + 1) & kBucketMask;
|
||||
}
|
||||
|
||||
|
||||
const char *xmrig::Hashrate::format(double h, char *buf, size_t size)
|
||||
{
|
||||
return ::format(h, buf, size);
|
||||
|
@ -174,10 +102,69 @@ rapidjson::Value xmrig::Hashrate::toJSON(size_t threadId, rapidjson::Document &d
|
|||
auto &allocator = doc.GetAllocator();
|
||||
|
||||
Value out(kArrayType);
|
||||
out.PushBack(normalize(calc(threadId + 1, ShortInterval)), allocator);
|
||||
out.PushBack(normalize(calc(threadId + 1, MediumInterval)), allocator);
|
||||
out.PushBack(normalize(calc(threadId + 1, LargeInterval)), allocator);
|
||||
out.PushBack(normalize(calc(threadId, ShortInterval)), allocator);
|
||||
out.PushBack(normalize(calc(threadId, MediumInterval)), allocator);
|
||||
out.PushBack(normalize(calc(threadId, LargeInterval)), allocator);
|
||||
|
||||
return out;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
double xmrig::Hashrate::hashrate(size_t index, size_t ms) const
|
||||
{
|
||||
assert(index < m_threads);
|
||||
if (index >= m_threads) {
|
||||
return nan("");
|
||||
}
|
||||
|
||||
uint64_t earliestHashCount = 0;
|
||||
uint64_t earliestStamp = 0;
|
||||
bool haveFullSet = false;
|
||||
|
||||
const uint64_t timeStampLimit = xmrig::Chrono::steadyMSecs() - ms;
|
||||
uint64_t* timestamps = m_timestamps[index];
|
||||
uint64_t* counts = m_counts[index];
|
||||
|
||||
const size_t idx_start = (m_top[index] - 1) & kBucketMask;
|
||||
size_t idx = idx_start;
|
||||
|
||||
uint64_t lastestStamp = timestamps[idx];
|
||||
uint64_t lastestHashCnt = counts[idx];
|
||||
|
||||
do {
|
||||
if (timestamps[idx] < timeStampLimit) {
|
||||
haveFullSet = (timestamps[idx] != 0);
|
||||
if (idx != idx_start) {
|
||||
idx = (idx + 1) & kBucketMask;
|
||||
earliestStamp = timestamps[idx];
|
||||
earliestHashCount = counts[idx];
|
||||
}
|
||||
break;
|
||||
}
|
||||
idx = (idx - 1) & kBucketMask;
|
||||
} while (idx != idx_start);
|
||||
|
||||
if (!haveFullSet || earliestStamp == 0 || lastestStamp == 0) {
|
||||
return nan("");
|
||||
}
|
||||
|
||||
if (lastestStamp - earliestStamp == 0) {
|
||||
return nan("");
|
||||
}
|
||||
|
||||
const auto hashes = static_cast<double>(lastestHashCnt - earliestHashCount);
|
||||
const auto time = static_cast<double>(lastestStamp - earliestStamp) / 1000.0;
|
||||
|
||||
return hashes / time;
|
||||
}
|
||||
|
||||
|
||||
void xmrig::Hashrate::addData(size_t index, uint64_t count, uint64_t timestamp)
|
||||
{
|
||||
const size_t top = m_top[index];
|
||||
m_counts[index][top] = count;
|
||||
m_timestamps[index][top] = timestamp;
|
||||
|
||||
m_top[index] = (top + 1) & kBucketMask;
|
||||
}
|
||||
|
|
|
@ -1,12 +1,7 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -26,6 +21,7 @@
|
|||
#define XMRIG_HASHRATE_H
|
||||
|
||||
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
|
@ -42,7 +38,7 @@ class Hashrate
|
|||
public:
|
||||
XMRIG_DISABLE_COPY_MOVE_DEFAULT(Hashrate)
|
||||
|
||||
enum Intervals {
|
||||
enum Intervals : size_t {
|
||||
ShortInterval = 10000,
|
||||
MediumInterval = 60000,
|
||||
LargeInterval = 900000
|
||||
|
@ -50,11 +46,12 @@ public:
|
|||
|
||||
Hashrate(size_t threads);
|
||||
~Hashrate();
|
||||
double calc(size_t ms) const;
|
||||
double calc(size_t threadId, size_t ms) const;
|
||||
void add(size_t threadId, uint64_t count, uint64_t timestamp);
|
||||
|
||||
inline size_t threads() const { return m_threads; }
|
||||
inline double calc(size_t ms) const { const double data = hashrate(0U, ms); return std::isnormal(data) ? data : 0.0; }
|
||||
inline double calc(size_t threadId, size_t ms) const { return hashrate(threadId + 1, ms); }
|
||||
inline size_t threads() const { return m_threads > 0U ? m_threads - 1U : 0U; }
|
||||
inline void add(size_t threadId, uint64_t count, uint64_t timestamp) { addData(threadId + 1U, count, timestamp); }
|
||||
inline void add(uint64_t count, uint64_t timestamp) { addData(0U, count, timestamp); }
|
||||
|
||||
static const char *format(double h, char *buf, size_t size);
|
||||
static rapidjson::Value normalize(double d);
|
||||
|
@ -65,6 +62,9 @@ public:
|
|||
# endif
|
||||
|
||||
private:
|
||||
double hashrate(size_t index, size_t ms) const;
|
||||
void addData(size_t index, uint64_t count, uint64_t timestamp);
|
||||
|
||||
constexpr static size_t kBucketSize = 2 << 11;
|
||||
constexpr static size_t kBucketMask = kBucketSize - 1;
|
||||
|
||||
|
|
|
@ -1,10 +1,4 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
|
|
|
@ -1,10 +1,4 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
|
@ -47,7 +41,7 @@ public:
|
|||
private:
|
||||
// Buffer of hashrate counters, used for linear interpolation of past data
|
||||
mutable std::mutex m_lock;
|
||||
std::deque<std::pair<uint64_t, uint64_t>> m_data;
|
||||
std::deque<std::pair<uint64_t, uint64_t> > m_data;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -1,12 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -27,12 +21,17 @@
|
|||
|
||||
|
||||
#include "backend/common/interfaces/IWorker.h"
|
||||
#include "base/tools/Object.h"
|
||||
|
||||
|
||||
#include <thread>
|
||||
|
||||
|
||||
#ifdef XMRIG_OS_APPLE
|
||||
# include <pthread.h>
|
||||
# include <mach/thread_act.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
|
@ -46,21 +45,48 @@ public:
|
|||
XMRIG_DISABLE_COPY_MOVE_DEFAULT(Thread)
|
||||
|
||||
inline Thread(IBackend *backend, size_t id, const T &config) : m_id(id), m_config(config), m_backend(backend) {}
|
||||
|
||||
# ifdef XMRIG_OS_APPLE
|
||||
inline ~Thread() { pthread_join(m_thread, nullptr); delete m_worker; }
|
||||
|
||||
inline void start(void *(*callback)(void *))
|
||||
{
|
||||
if (m_config.affinity >= 0) {
|
||||
pthread_create_suspended_np(&m_thread, nullptr, callback, this);
|
||||
|
||||
mach_port_t mach_thread = pthread_mach_thread_np(m_thread);
|
||||
thread_affinity_policy_data_t policy = { static_cast<integer_t>(m_config.affinity + 1) };
|
||||
|
||||
thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, reinterpret_cast<thread_policy_t>(&policy), THREAD_AFFINITY_POLICY_COUNT);
|
||||
thread_resume(mach_thread);
|
||||
}
|
||||
else {
|
||||
pthread_create(&m_thread, nullptr, callback, this);
|
||||
}
|
||||
}
|
||||
# else
|
||||
inline ~Thread() { m_thread.join(); delete m_worker; }
|
||||
|
||||
inline void start(void *(*callback)(void *)) { m_thread = std::thread(callback, this); }
|
||||
# endif
|
||||
|
||||
inline const T &config() const { return m_config; }
|
||||
inline IBackend *backend() const { return m_backend; }
|
||||
inline IWorker *worker() const { return m_worker; }
|
||||
inline size_t id() const { return m_id; }
|
||||
inline void setWorker(IWorker *worker) { m_worker = worker; }
|
||||
inline void start(void (*callback) (void *)) { m_thread = std::thread(callback, this); }
|
||||
|
||||
private:
|
||||
const size_t m_id = 0;
|
||||
const T m_config;
|
||||
IBackend *m_backend;
|
||||
IWorker *m_worker = nullptr;
|
||||
|
||||
#ifdef XMRIG_OS_APPLE
|
||||
pthread_t m_thread{};
|
||||
# else
|
||||
std::thread m_thread;
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -1,13 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -26,7 +19,6 @@
|
|||
|
||||
#include "backend/common/Worker.h"
|
||||
#include "base/kernel/Platform.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include "crypto/common/VirtualMemory.h"
|
||||
|
||||
|
||||
|
@ -39,27 +31,3 @@ xmrig::Worker::Worker(size_t id, int64_t affinity, int priority) :
|
|||
Platform::trySetThreadAffinity(affinity);
|
||||
Platform::setThreadPriority(priority);
|
||||
}
|
||||
|
||||
|
||||
void xmrig::Worker::storeStats()
|
||||
{
|
||||
// Get index which is unused now
|
||||
const uint32_t index = m_index.load(std::memory_order_relaxed) ^ 1;
|
||||
|
||||
// Fill in the data for that index
|
||||
m_hashCount[index] = m_count;
|
||||
m_timestamp[index] = Chrono::steadyMSecs();
|
||||
|
||||
// Switch to that index
|
||||
// All data will be in memory by the time it completes thanks to std::memory_order_seq_cst
|
||||
m_index.fetch_xor(1, std::memory_order_seq_cst);
|
||||
}
|
||||
|
||||
|
||||
void xmrig::Worker::getHashrateData(uint64_t& hashCount, uint64_t& timeStamp) const
|
||||
{
|
||||
const uint32_t index = m_index.load(std::memory_order_relaxed);
|
||||
|
||||
hashCount = m_hashCount[index];
|
||||
timeStamp = m_timestamp[index];
|
||||
}
|
||||
|
|
|
@ -1,13 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -27,9 +20,6 @@
|
|||
#define XMRIG_WORKER_H
|
||||
|
||||
|
||||
#include <atomic>
|
||||
|
||||
|
||||
#include "backend/common/interfaces/IWorker.h"
|
||||
|
||||
|
||||
|
@ -41,23 +31,17 @@ class Worker : public IWorker
|
|||
public:
|
||||
Worker(size_t id, int64_t affinity, int priority);
|
||||
|
||||
inline const VirtualMemory *memory() const override { return nullptr; }
|
||||
inline size_t id() const override { return m_id; }
|
||||
inline uint64_t rawHashes() const override { return m_count; }
|
||||
inline void jobEarlyNotification(const Job&) override {}
|
||||
|
||||
void getHashrateData(uint64_t& hashCount, uint64_t& timeStamp) const override;
|
||||
|
||||
protected:
|
||||
void storeStats();
|
||||
inline int64_t affinity() const { return m_affinity; }
|
||||
inline size_t id() const override { return m_id; }
|
||||
inline uint32_t node() const { return m_node; }
|
||||
|
||||
uint64_t m_count = 0;
|
||||
|
||||
private:
|
||||
const int64_t m_affinity;
|
||||
const size_t m_id;
|
||||
std::atomic<uint32_t> m_index = {};
|
||||
uint32_t m_node = 0;
|
||||
uint64_t m_count = 0;
|
||||
uint64_t m_hashCount[2] = {};
|
||||
uint64_t m_timestamp[2] = {};
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -110,7 +110,7 @@ private:
|
|||
alignas(16) uint8_t m_blobs[2][Job::kMaxBlobSize * N]{};
|
||||
Job m_jobs[2];
|
||||
uint32_t m_rounds[2] = { 0, 0 };
|
||||
uint64_t m_nonce_mask[2];
|
||||
uint64_t m_nonce_mask[2] = { 0, 0 };
|
||||
uint64_t m_sequence = 0;
|
||||
uint8_t m_index = 0;
|
||||
};
|
||||
|
|
|
@ -1,13 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -24,16 +17,13 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "backend/common/Workers.h"
|
||||
#include "backend/common/Hashrate.h"
|
||||
#include "backend/common/interfaces/IBackend.h"
|
||||
#include "backend/common/Workers.h"
|
||||
#include "backend/cpu/CpuWorker.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/io/log/Tags.h"
|
||||
#include "base/net/stratum/Pool.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include "base/tools/Object.h"
|
||||
#include "core/Miner.h"
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_OPENCL
|
||||
|
@ -59,7 +49,6 @@ class WorkersPrivate
|
|||
public:
|
||||
XMRIG_DISABLE_COPY_MOVE(WorkersPrivate)
|
||||
|
||||
|
||||
WorkersPrivate() = default;
|
||||
~WorkersPrivate() = default;
|
||||
|
||||
|
@ -87,20 +76,6 @@ xmrig::Workers<T>::~Workers()
|
|||
}
|
||||
|
||||
|
||||
template<class T>
|
||||
static void getHashrateData(xmrig::IWorker* worker, uint64_t& hashCount, uint64_t& timeStamp)
|
||||
{
|
||||
worker->getHashrateData(hashCount, timeStamp);
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
void getHashrateData<xmrig::CpuLaunchData>(xmrig::IWorker* worker, uint64_t& hashCount, uint64_t&)
|
||||
{
|
||||
hashCount = worker->rawHashes();
|
||||
}
|
||||
|
||||
|
||||
template<class T>
|
||||
bool xmrig::Workers<T>::tick(uint64_t)
|
||||
{
|
||||
|
@ -111,33 +86,32 @@ bool xmrig::Workers<T>::tick(uint64_t)
|
|||
uint64_t ts = Chrono::steadyMSecs();
|
||||
bool totalAvailable = true;
|
||||
uint64_t totalHashCount = 0;
|
||||
uint64_t hashCount = 0;
|
||||
uint64_t rawHashes = 0;
|
||||
|
||||
for (Thread<T> *handle : m_workers) {
|
||||
IWorker *worker = handle->worker();
|
||||
if (worker) {
|
||||
uint64_t hashCount;
|
||||
getHashrateData<T>(worker, hashCount, ts);
|
||||
d_ptr->hashrate->add(handle->id() + 1, hashCount, ts);
|
||||
worker->hashrateData(hashCount, ts, rawHashes);
|
||||
d_ptr->hashrate->add(handle->id(), hashCount, ts);
|
||||
|
||||
const uint64_t n = worker->rawHashes();
|
||||
if (n == 0) {
|
||||
if (rawHashes == 0) {
|
||||
totalAvailable = false;
|
||||
}
|
||||
totalHashCount += n;
|
||||
|
||||
totalHashCount += rawHashes;
|
||||
}
|
||||
}
|
||||
|
||||
if (totalAvailable) {
|
||||
d_ptr->hashrate->add(0, totalHashCount, Chrono::steadyMSecs());
|
||||
d_ptr->hashrate->add(totalHashCount, Chrono::steadyMSecs());
|
||||
}
|
||||
|
||||
# ifdef XMRIG_FEATURE_BENCHMARK
|
||||
if (d_ptr->benchmark && d_ptr->benchmark->finish(totalHashCount)) {
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
return !d_ptr->benchmark || !d_ptr->benchmark->finish(totalHashCount);
|
||||
# else
|
||||
return true;
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -158,14 +132,19 @@ void xmrig::Workers<T>::setBackend(IBackend *backend)
|
|||
template<class T>
|
||||
void xmrig::Workers<T>::stop()
|
||||
{
|
||||
# ifdef XMRIG_MINER_PROJECT
|
||||
Nonce::stop(T::backend());
|
||||
# endif
|
||||
|
||||
for (Thread<T> *worker : m_workers) {
|
||||
delete worker;
|
||||
}
|
||||
|
||||
m_workers.clear();
|
||||
|
||||
# ifdef XMRIG_MINER_PROJECT
|
||||
Nonce::touch(T::backend());
|
||||
# endif
|
||||
|
||||
d_ptr->hashrate.reset();
|
||||
}
|
||||
|
@ -195,7 +174,7 @@ xmrig::IWorker *xmrig::Workers<T>::create(Thread<T> *)
|
|||
|
||||
|
||||
template<class T>
|
||||
void xmrig::Workers<T>::onReady(void *arg)
|
||||
void *xmrig::Workers<T>::onReady(void *arg)
|
||||
{
|
||||
auto handle = static_cast<Thread<T>* >(arg);
|
||||
|
||||
|
@ -208,13 +187,15 @@ void xmrig::Workers<T>::onReady(void *arg)
|
|||
handle->backend()->start(worker, false);
|
||||
delete worker;
|
||||
|
||||
return;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
assert(handle->backend() != nullptr);
|
||||
|
||||
handle->setWorker(worker);
|
||||
handle->backend()->start(worker, true);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
|
@ -226,17 +207,13 @@ void xmrig::Workers<T>::start(const std::vector<T> &data, bool sleep)
|
|||
}
|
||||
|
||||
d_ptr->hashrate = std::make_shared<Hashrate>(m_workers.size());
|
||||
|
||||
# ifdef XMRIG_MINER_PROJECT
|
||||
Nonce::touch(T::backend());
|
||||
# endif
|
||||
|
||||
for (auto worker : m_workers) {
|
||||
worker->start(Workers<T>::onReady);
|
||||
|
||||
// This sleep is important for optimal caching!
|
||||
// Threads must allocate scratchpads in order so that adjacent cores will use adjacent scratchpads
|
||||
// Sub-optimal caching can result in up to 0.5% hashrate penalty
|
||||
if (sleep) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(20));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -247,6 +224,7 @@ namespace xmrig {
|
|||
template<>
|
||||
xmrig::IWorker *xmrig::Workers<CpuLaunchData>::create(Thread<CpuLaunchData> *handle)
|
||||
{
|
||||
# ifdef XMRIG_MINER_PROJECT
|
||||
switch (handle->config().intensity) {
|
||||
case 1:
|
||||
return new CpuWorker<1>(handle->id(), handle->config());
|
||||
|
@ -265,6 +243,11 @@ xmrig::IWorker *xmrig::Workers<CpuLaunchData>::create(Thread<CpuLaunchData> *han
|
|||
}
|
||||
|
||||
return nullptr;
|
||||
# else
|
||||
assert(handle->config().intensity == 1);
|
||||
|
||||
return new CpuWorker<1>(handle->id(), handle->config());
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,13 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -44,10 +37,9 @@
|
|||
namespace xmrig {
|
||||
|
||||
|
||||
class Benchmark;
|
||||
class Hashrate;
|
||||
class WorkersPrivate;
|
||||
class Job;
|
||||
class Benchmark;
|
||||
|
||||
|
||||
template<class T>
|
||||
|
@ -63,7 +55,7 @@ public:
|
|||
|
||||
bool tick(uint64_t ticks);
|
||||
const Hashrate *hashrate() const;
|
||||
void jobEarlyNotification(const Job&);
|
||||
void jobEarlyNotification(const Job &job);
|
||||
void setBackend(IBackend *backend);
|
||||
void stop();
|
||||
|
||||
|
@ -73,7 +65,7 @@ public:
|
|||
|
||||
private:
|
||||
static IWorker *create(Thread<T> *handle);
|
||||
static void onReady(void *arg);
|
||||
static void *onReady(void *arg);
|
||||
|
||||
void start(const std::vector<T> &data, bool sleep);
|
||||
|
||||
|
@ -83,7 +75,7 @@ private:
|
|||
|
||||
|
||||
template<class T>
|
||||
void xmrig::Workers<T>::jobEarlyNotification(const Job& job)
|
||||
void xmrig::Workers<T>::jobEarlyNotification(const Job &job)
|
||||
{
|
||||
for (Thread<T>* t : m_workers) {
|
||||
if (t->worker()) {
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
set(HEADERS_BACKEND_COMMON
|
||||
src/backend/common/Hashrate.h
|
||||
src/backend/common/HashrateInterpolator.h
|
||||
src/backend/common/Tags.h
|
||||
src/backend/common/interfaces/IBackend.h
|
||||
src/backend/common/interfaces/IRxListener.h
|
||||
|
@ -16,7 +15,6 @@ set(HEADERS_BACKEND_COMMON
|
|||
|
||||
set(SOURCES_BACKEND_COMMON
|
||||
src/backend/common/Hashrate.cpp
|
||||
src/backend/common/HashrateInterpolator.cpp
|
||||
src/backend/common/Threads.cpp
|
||||
src/backend/common/Worker.cpp
|
||||
src/backend/common/Workers.cpp
|
||||
|
@ -35,3 +33,16 @@ if (WITH_RANDOMX AND WITH_BENCHMARK)
|
|||
src/backend/common/benchmark/BenchState.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
if (WITH_OPENCL OR WITH_CUDA)
|
||||
list(APPEND HEADERS_BACKEND_COMMON
|
||||
src/backend/common/HashrateInterpolator.h
|
||||
src/backend/common/GpuWorker.h
|
||||
)
|
||||
|
||||
list(APPEND SOURCES_BACKEND_COMMON
|
||||
src/backend/common/HashrateInterpolator.cpp
|
||||
src/backend/common/GpuWorker.cpp
|
||||
)
|
||||
endif()
|
||||
|
|
|
@ -1,12 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -26,10 +20,11 @@
|
|||
#define XMRIG_IBACKEND_H
|
||||
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
|
||||
#include "3rdparty/rapidjson/fwd.h"
|
||||
#include "base/tools/Object.h"
|
||||
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
@ -47,10 +42,14 @@ class String;
|
|||
class IBackend
|
||||
{
|
||||
public:
|
||||
XMRIG_DISABLE_COPY_MOVE(IBackend)
|
||||
|
||||
IBackend() = default;
|
||||
virtual ~IBackend() = default;
|
||||
|
||||
virtual bool isEnabled() const = 0;
|
||||
virtual bool isEnabled(const Algorithm &algorithm) const = 0;
|
||||
virtual bool tick(uint64_t ticks) = 0;
|
||||
virtual const Hashrate *hashrate() const = 0;
|
||||
virtual const String &profileName() const = 0;
|
||||
virtual const String &type() const = 0;
|
||||
|
@ -61,7 +60,6 @@ public:
|
|||
virtual void setJob(const Job &job) = 0;
|
||||
virtual void start(IWorker *worker, bool ready) = 0;
|
||||
virtual void stop() = 0;
|
||||
virtual bool tick(uint64_t ticks) = 0;
|
||||
|
||||
# ifdef XMRIG_FEATURE_API
|
||||
virtual rapidjson::Value toJSON(rapidjson::Document &doc) const = 0;
|
||||
|
|
|
@ -1,14 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2018-2019 tevador <tevador@gmail.com>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -28,6 +20,9 @@
|
|||
#define XMRIG_IMEMORYPOOL_H
|
||||
|
||||
|
||||
#include "base/tools/Object.h"
|
||||
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
|
@ -38,7 +33,10 @@ namespace xmrig {
|
|||
class IMemoryPool
|
||||
{
|
||||
public:
|
||||
virtual ~IMemoryPool() = default;
|
||||
XMRIG_DISABLE_COPY_MOVE(IMemoryPool)
|
||||
|
||||
IMemoryPool() = default;
|
||||
virtual ~IMemoryPool() = default;
|
||||
|
||||
virtual bool isHugePages(uint32_t node) const = 0;
|
||||
virtual uint8_t *get(size_t size, uint32_t node) = 0;
|
||||
|
|
|
@ -1,10 +1,7 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2018 XMRig <support@xmrig.com>
|
||||
* Copyright (c) 2018-2019 tevador <tevador@gmail.com>
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -24,13 +21,19 @@
|
|||
#define XMRIG_IRXLISTENER_H
|
||||
|
||||
|
||||
#include "base/tools/Object.h"
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
class IRxListener
|
||||
{
|
||||
public:
|
||||
virtual ~IRxListener() = default;
|
||||
XMRIG_DISABLE_COPY_MOVE(IRxListener)
|
||||
|
||||
IRxListener() = default;
|
||||
virtual ~IRxListener() = default;
|
||||
|
||||
# ifdef XMRIG_ALGO_RANDOMX
|
||||
virtual void onDatasetReady() = 0;
|
||||
|
|
|
@ -1,10 +1,7 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2018 XMRig <support@xmrig.com>
|
||||
* Copyright (c) 2018-2019 tevador <tevador@gmail.com>
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -24,8 +21,9 @@
|
|||
#define XMRIG_IRXSTORAGE_H
|
||||
|
||||
|
||||
#include "crypto/rx/RxConfig.h"
|
||||
#include "base/tools/Object.h"
|
||||
#include "crypto/common/HugePagesInfo.h"
|
||||
#include "crypto/rx/RxConfig.h"
|
||||
|
||||
|
||||
#include <cstdint>
|
||||
|
@ -43,7 +41,10 @@ class RxSeed;
|
|||
class IRxStorage
|
||||
{
|
||||
public:
|
||||
virtual ~IRxStorage() = default;
|
||||
XMRIG_DISABLE_COPY_MOVE(IRxStorage)
|
||||
|
||||
IRxStorage() = default;
|
||||
virtual ~IRxStorage() = default;
|
||||
|
||||
virtual bool isAllocated() const = 0;
|
||||
virtual HugePagesInfo hugePages() const = 0;
|
||||
|
|
|
@ -1,12 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -36,8 +30,8 @@
|
|||
namespace xmrig {
|
||||
|
||||
|
||||
class VirtualMemory;
|
||||
class Job;
|
||||
class VirtualMemory;
|
||||
|
||||
|
||||
class IWorker
|
||||
|
@ -48,14 +42,13 @@ public:
|
|||
IWorker() = default;
|
||||
virtual ~IWorker() = default;
|
||||
|
||||
virtual bool selfTest() = 0;
|
||||
virtual const VirtualMemory *memory() const = 0;
|
||||
virtual size_t id() const = 0;
|
||||
virtual size_t intensity() const = 0;
|
||||
virtual uint64_t rawHashes() const = 0;
|
||||
virtual void getHashrateData(uint64_t&, uint64_t&) const = 0;
|
||||
virtual void start() = 0;
|
||||
virtual void jobEarlyNotification(const Job&) = 0;
|
||||
virtual bool selfTest() = 0;
|
||||
virtual const VirtualMemory *memory() const = 0;
|
||||
virtual size_t id() const = 0;
|
||||
virtual size_t intensity() const = 0;
|
||||
virtual void hashrateData(uint64_t &hashCount, uint64_t &timeStamp, uint64_t &rawHashes) const = 0;
|
||||
virtual void jobEarlyNotification(const Job &job) = 0;
|
||||
virtual void start() = 0;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -1,12 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -32,8 +26,6 @@
|
|||
|
||||
#if defined(XMRIG_FEATURE_HWLOC)
|
||||
# include "backend/cpu/platform/HwlocCpuInfo.h"
|
||||
#elif defined(XMRIG_FEATURE_LIBCPUID)
|
||||
# include "backend/cpu/platform/AdvancedCpuInfo.h"
|
||||
#else
|
||||
# include "backend/cpu/platform/BasicCpuInfo.h"
|
||||
#endif
|
||||
|
@ -47,8 +39,6 @@ xmrig::ICpuInfo *xmrig::Cpu::info()
|
|||
if (cpuInfo == nullptr) {
|
||||
# if defined(XMRIG_FEATURE_HWLOC)
|
||||
cpuInfo = new HwlocCpuInfo();
|
||||
# elif defined(XMRIG_FEATURE_LIBCPUID)
|
||||
cpuInfo = new AdvancedCpuInfo();
|
||||
# else
|
||||
cpuInfo = new BasicCpuInfo();
|
||||
# endif
|
||||
|
|
|
@ -1,12 +1,6 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -266,6 +266,12 @@ bool xmrig::CpuBackend::isEnabled(const Algorithm &algorithm) const
|
|||
}
|
||||
|
||||
|
||||
bool xmrig::CpuBackend::tick(uint64_t ticks)
|
||||
{
|
||||
return d_ptr->workers.tick(ticks);
|
||||
}
|
||||
|
||||
|
||||
const xmrig::Hashrate *xmrig::CpuBackend::hashrate() const
|
||||
{
|
||||
return d_ptr->workers.hashrate();
|
||||
|
@ -316,9 +322,9 @@ void xmrig::CpuBackend::printHashrate(bool details)
|
|||
Log::print("| %8zu | %8" PRId64 " | %7s | %7s | %7s |",
|
||||
i,
|
||||
data.affinity,
|
||||
Hashrate::format(hashrate()->calc(i + 1, Hashrate::ShortInterval), num, sizeof num / 3),
|
||||
Hashrate::format(hashrate()->calc(i + 1, Hashrate::MediumInterval), num + 8, sizeof num / 3),
|
||||
Hashrate::format(hashrate()->calc(i + 1, Hashrate::LargeInterval), num + 8 * 2, sizeof num / 3)
|
||||
Hashrate::format(hashrate()->calc(i, Hashrate::ShortInterval), num, sizeof num / 3),
|
||||
Hashrate::format(hashrate()->calc(i, Hashrate::MediumInterval), num + 8, sizeof num / 3),
|
||||
Hashrate::format(hashrate()->calc(i, Hashrate::LargeInterval), num + 8 * 2, sizeof num / 3)
|
||||
);
|
||||
|
||||
i++;
|
||||
|
@ -405,12 +411,6 @@ void xmrig::CpuBackend::stop()
|
|||
}
|
||||
|
||||
|
||||
bool xmrig::CpuBackend::tick(uint64_t ticks)
|
||||
{
|
||||
return d_ptr->workers.tick(ticks);
|
||||
}
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_API
|
||||
rapidjson::Value xmrig::CpuBackend::toJSON(rapidjson::Document &doc) const
|
||||
{
|
||||
|
|
|
@ -54,6 +54,7 @@ protected:
|
|||
|
||||
bool isEnabled() const override;
|
||||
bool isEnabled(const Algorithm &algorithm) const override;
|
||||
bool tick(uint64_t ticks) override;
|
||||
const Hashrate *hashrate() const override;
|
||||
const String &profileName() const override;
|
||||
const String &type() const override;
|
||||
|
@ -63,7 +64,6 @@ protected:
|
|||
void setJob(const Job &job) override;
|
||||
void start(IWorker *worker, bool ready) override;
|
||||
void stop() override;
|
||||
bool tick(uint64_t ticks) override;
|
||||
|
||||
# ifdef XMRIG_FEATURE_API
|
||||
rapidjson::Value toJSON(rapidjson::Document &doc) const override;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue