From 8a4792f6388a61fa22cfb6468a261aef9663bd18 Mon Sep 17 00:00:00 2001 From: XMRig Date: Mon, 21 Oct 2024 08:31:52 +0700 Subject: [PATCH] Update hwloc for MSVC. --- src/3rdparty/hwloc/NEWS | 67 ++- src/3rdparty/hwloc/README | 10 +- src/3rdparty/hwloc/VERSION | 8 +- src/3rdparty/hwloc/include/hwloc.h | 481 ++++++++++++++---- .../hwloc/include/hwloc/autogen/config.h | 8 +- src/3rdparty/hwloc/include/hwloc/distances.h | 24 +- src/3rdparty/hwloc/include/hwloc/helper.h | 12 +- src/3rdparty/hwloc/include/hwloc/memattrs.h | 248 +++++---- src/3rdparty/hwloc/include/hwloc/opencl.h | 19 + src/3rdparty/hwloc/include/hwloc/plugins.h | 15 +- src/3rdparty/hwloc/include/hwloc/rename.h | 7 +- .../hwloc/include/private/autogen/config.h | 16 +- .../hwloc/include/private/cpuid-x86.h | 22 + src/3rdparty/hwloc/include/private/misc.h | 33 +- src/3rdparty/hwloc/src/bind.c | 3 +- src/3rdparty/hwloc/src/bitmap.c | 59 ++- src/3rdparty/hwloc/src/cpukinds.c | 3 +- src/3rdparty/hwloc/src/distances.c | 6 +- src/3rdparty/hwloc/src/memattrs.c | 50 +- src/3rdparty/hwloc/src/pci-common.c | 28 +- src/3rdparty/hwloc/src/topology-windows.c | 8 +- src/3rdparty/hwloc/src/topology-x86.c | 43 +- .../hwloc/src/topology-xml-nolibxml.c | 4 +- src/3rdparty/hwloc/src/topology-xml.c | 16 +- src/3rdparty/hwloc/src/topology.c | 14 + 25 files changed, 875 insertions(+), 329 deletions(-) diff --git a/src/3rdparty/hwloc/NEWS b/src/3rdparty/hwloc/NEWS index 62cc687e..bf5f1f6f 100644 --- a/src/3rdparty/hwloc/NEWS +++ b/src/3rdparty/hwloc/NEWS @@ -1,5 +1,5 @@ Copyright © 2009 CNRS -Copyright © 2009-2023 Inria. All rights reserved. +Copyright © 2009-2024 Inria. All rights reserved. Copyright © 2009-2013 Université Bordeaux Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. Copyright © 2020 Hewlett Packard Enterprise. All rights reserved. @@ -17,6 +17,71 @@ bug fixes (and other actions) for each version of hwloc since version 0.9. +Version 2.11.2 +-------------- +* Add missing CPU info attrs on aarch64 on Linux. +* Use ACPI CPPC on Linux to get better information about cpukinds, + at least on AMD CPUs. +* Fix crash when manipulating cpukinds after topology + duplication, thanks to Hadrien Grasland for the report. +* Fix missing input target checks in memattr functions, + thanks to Hadrien Grasland for the report. +* Fix a memory leak when ignoring NUMA distances on FreeBSD. +* Fix build failure on old Linux distributions without accessat(). +* Fix non-Windows importing of XML topologies and CPUID dumps exported + on Windows. +* hwloc-calc --cpuset-output-format systemd-dbus-api now allows + to generate AllowedCPUs information for systemd slices. + See the hwloc-calc manpage for examples. Thanks to Pierre Neyron. +* Some fixes in manpage EXAMPLES and split them into subsections. + + +Version 2.11.1 +-------------- +* Fix bash completions, thanks Tavis Rudd. + + +Version 2.11.0 +-------------- +* API + + Add HWLOC_MEMBIND_WEIGHTED_INTERLEAVE memory binding policy on + Linux 6.9+. Thanks to Honggyu Kim for the patch. + - weighted_interleave_membind is added to membind support bits. + - The "weighted" policy is added to the hwloc-bind tool. + + Add hwloc_obj_set_subtype(). Thanks to Hadrien Grasland for the report. +* GPU support + + Don't hide the GPU NUMA node on NVIDIA Grace Hopper. + + Get Intel GPU OpenCL device locality. + + Add bandwidths between subdevices in the LevelZero XeLinkBandwidth + matrix. + + Fix PCI Gen4+ link speed of NVIDIA GPU obtained from NVML, + thanks to Akram Sbaih for the report. +* Windows support + + Fix Windows support when UNICODE is enabled, several hwloc features + were missing, thanks to Martin for the report. + + Fix the enabling of CUDA in Windows CMake build, + Thanks to Moritz Kreutzer for the patch. + + Fix CUDA/OpenCL test source path in Windows CMake. +* Tools + + Option --best-memattr may now return multiple nodes. Additional + configuration flags may be given to tweak its behavior. + + hwloc-info has a new --get-attr option to get a single attribute. + + hwloc-info now supports "levels", "support" and "topology" + special keywords for backward compatibility for hwloc 3.0. + + The --taskset command-line option is superseded by the new + --cpuset-output-format which also allows to export as list. + + hwloc-calc may now import bitmasks described as a list of bits + with the new "--cpuset-input-format list". +* Misc + + The MemoryTiersNr info attribute in the root object now says how many + memory tiers were built. Thanks to Antoine Morvan for the report. + + Fix the management of infinite cpusets in the bitmap printf/sscanf + API as well as in command-line tools. + + Add section "Compiling software on top of hwloc's C API" in the + documentation with examples for GNU Make and CMake, + thanks to Florent Pruvost for the help. + + Version 2.10.0 -------------- * Heterogeneous Memory core improvements diff --git a/src/3rdparty/hwloc/README b/src/3rdparty/hwloc/README index f2971d07..91fe2066 100644 --- a/src/3rdparty/hwloc/README +++ b/src/3rdparty/hwloc/README @@ -418,14 +418,8 @@ return 0; } hwloc provides a pkg-config executable to obtain relevant compiler and linker -flags. For example, it can be used thusly to compile applications that utilize -the hwloc library (assuming GNU Make): - -CFLAGS += $(shell pkg-config --cflags hwloc) -LDLIBS += $(shell pkg-config --libs hwloc) - -hwloc-hello: hwloc-hello.c - $(CC) hwloc-hello.c $(CFLAGS) -o hwloc-hello $(LDLIBS) +flags. See Compiling software on top of hwloc's C API for details on building +program on top of hwloc's API using GNU Make or CMake. On a machine 2 processor packages -- each package of which has two processing cores -- the output from running hwloc-hello could be something like the diff --git a/src/3rdparty/hwloc/VERSION b/src/3rdparty/hwloc/VERSION index cd608187..25c0cc54 100644 --- a/src/3rdparty/hwloc/VERSION +++ b/src/3rdparty/hwloc/VERSION @@ -8,8 +8,8 @@ # Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too. major=2 -minor=10 -release=0 +minor=11 +release=2 # greek is used for alpha or beta release tags. If it is non-empty, # it will be appended to the version number. It does not have to be @@ -22,7 +22,7 @@ greek= # The date when this release was created -date="Dec 04, 2023" +date="Sep 26, 2024" # If snapshot=1, then use the value from snapshot_version as the # entire hwloc version (i.e., ignore major, minor, release, and @@ -41,6 +41,6 @@ snapshot_version=${major}.${minor}.${release}${greek}-git # 2. Version numbers are described in the Libtool current:revision:age # format. -libhwloc_so_version=22:0:7 +libhwloc_so_version=23:1:8 # Please also update the lines in contrib/windows/libhwloc.vcxproj diff --git a/src/3rdparty/hwloc/include/hwloc.h b/src/3rdparty/hwloc/include/hwloc.h index 18ea1dfa..d52e9900 100644 --- a/src/3rdparty/hwloc/include/hwloc.h +++ b/src/3rdparty/hwloc/include/hwloc.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2022 Inria. All rights reserved. + * Copyright © 2009-2024 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -77,6 +77,25 @@ extern "C" { #endif +/** \defgroup hwlocality_api_error_reporting Error reporting in the API + * @{ + * Most functions in the hwloc API return an integer value. + * Unless documentated differently, they return 0 on success + * and -1 on error. + * Functions that return a pointer type return \c NULL on error. + * + * \p errno will be set to a meaningful value whenever possible. + * This includes the usual \c EINVAL when invalid function parameters are passed + * or \c ENOMEM when an internal allocation fails. + * Some specific \c errno value are also used, for instance for binding + * errors as documented in \ref hwlocality_cpubinding. + * + * Some modules describe return values of their functions + * in their introduction, for instance in \ref hwlocality_bitmap. + * @} + */ + + /** \defgroup hwlocality_api_version API version * @{ */ @@ -93,11 +112,13 @@ extern "C" { * Two stable releases of the same series usually have the same ::HWLOC_API_VERSION * even if their HWLOC_VERSION are different. */ -#define HWLOC_API_VERSION 0x00020800 +#define HWLOC_API_VERSION 0x00020b00 /** \brief Indicate at runtime which hwloc API version was used at build time. * * Should be ::HWLOC_API_VERSION if running on the same version. + * + * \return the build-time version number. */ HWLOC_DECLSPEC unsigned hwloc_get_api_version(void); @@ -242,6 +263,11 @@ typedef enum { * This is the smallest object representing Memory resources, * it cannot have any child except Misc objects. * However it may have Memory-side cache parents. + * + * NUMA nodes may correspond to different kinds of memory + * (DRAM, HBM, CXL-DRAM, etc.). When hwloc is able to guess + * that kind, it is specified in the subtype field of the object. + * See also \ref attributes_normal in the main documentation. * * There is always at least one such object in the topology * even if the machine is not NUMA. @@ -317,6 +343,12 @@ typedef enum { HWLOC_OBJ_DIE, /**< \brief Die within a physical package. * A subpart of the physical package, that contains multiple cores. + * + * Some operating systems (e.g. Linux) may expose a single die per package + * even if the hardware does not support dies at all. To avoid showing + * such non-existing dies, the corresponding hwloc backend may filter them out. + * This is functionally equivalent to ::HWLOC_TYPE_FILTER_KEEP_STRUCTURE + * being enforced. */ HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */ @@ -358,14 +390,17 @@ typedef enum hwloc_obj_osdev_type_e { /** \brief Compare the depth of two object types * * Types shouldn't be compared as they are, since newer ones may be added in - * the future. This function returns less than, equal to, or greater than zero - * respectively if \p type1 objects usually include \p type2 objects, are the - * same as \p type2 objects, or are included in \p type2 objects. If the types - * can not be compared (because neither is usually contained in the other), - * ::HWLOC_TYPE_UNORDERED is returned. Object types containing CPUs can always - * be compared (usually, a system contains machines which contain nodes which - * contain packages which contain caches, which contain cores, which contain - * processors). + * the future. + * + * \return A negative integer if \p type1 objects usually include \p type2 objects. + * \return A positive integer if \p type1 objects are usually included in \p type2 objects. + * \return 0 if \p type1 and \p type2 objects are the same. + * \return ::HWLOC_TYPE_UNORDERED if objects cannot be compared + * (because neither is usually contained in the other). + * + * \note Object types containing CPUs can always be compared + * (usually, a machine contains packages, which contain caches, + * which contain cores, which contain PUs). * * \note ::HWLOC_OBJ_PU will always be the deepest, * while ::HWLOC_OBJ_MACHINE is always the highest. @@ -575,7 +610,7 @@ struct hwloc_obj { * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead. */ - struct hwloc_info_s *infos; /**< \brief Array of stringified info type=name. */ + struct hwloc_info_s *infos; /**< \brief Array of info attributes (name and value strings). */ unsigned infos_count; /**< \brief Size of infos array. */ /* misc */ @@ -632,33 +667,48 @@ union hwloc_obj_attr_u { /** \brief PCI Device specific Object Attributes */ struct hwloc_pcidev_attr_s { #ifndef HWLOC_HAVE_32BITS_PCI_DOMAIN - unsigned short domain; /* Only 16bits PCI domains are supported by default */ + unsigned short domain; /**< \brief Domain number (xxxx in the PCI BDF notation xxxx:yy:zz.t). + * Only 16bits PCI domains are supported by default. */ #else - unsigned int domain; /* 32bits PCI domain support break the library ABI, hence it's disabled by default */ + unsigned int domain; /**< \brief Domain number (xxxx in the PCI BDF notation xxxx:yy:zz.t). + * 32bits PCI domain support break the library ABI, hence it's disabled by default. */ #endif - unsigned char bus, dev, func; - unsigned short class_id; - unsigned short vendor_id, device_id, subvendor_id, subdevice_id; - unsigned char revision; - float linkspeed; /* in GB/s */ + unsigned char bus; /**< \brief Bus number (yy in the PCI BDF notation xxxx:yy:zz.t). */ + unsigned char dev; /**< \brief Device number (zz in the PCI BDF notation xxxx:yy:zz.t). */ + unsigned char func; /**< \brief Function number (t in the PCI BDF notation xxxx:yy:zz.t). */ + unsigned short class_id; /**< \brief The class number (first two bytes, without the prog_if). */ + unsigned short vendor_id; /**< \brief Vendor ID (xxxx in [xxxx:yyyy]). */ + unsigned short device_id; /**< \brief Device ID (yyyy in [xxxx:yyyy]). */ + unsigned short subvendor_id; /**< \brief Sub-Vendor ID. */ + unsigned short subdevice_id; /**< \brief Sub-Device ID. */ + unsigned char revision; /**< \brief Revision number. */ + float linkspeed; /**< \brief Link speed in GB/s. + * This datarate is the currently configured speed of the entire PCI link + * (sum of the bandwidth of all PCI lanes in that link). + * It may change during execution since some devices are able to + * slow their PCI links down when idle. + */ } pcidev; /** \brief Bridge specific Object Attributes */ struct hwloc_bridge_attr_s { union { - struct hwloc_pcidev_attr_s pci; + struct hwloc_pcidev_attr_s pci; /**< \brief PCI attribute of the upstream part as a PCI device. */ } upstream; - hwloc_obj_bridge_type_t upstream_type; + hwloc_obj_bridge_type_t upstream_type; /**< \brief Upstream Bridge type. */ union { struct { #ifndef HWLOC_HAVE_32BITS_PCI_DOMAIN - unsigned short domain; /* Only 16bits PCI domains are supported by default */ + unsigned short domain; /**< \brief Domain number the downstream PCI buses. + * Only 16bits PCI domains are supported by default. */ #else - unsigned int domain; /* 32bits PCI domain support break the library ABI, hence it's disabled by default */ + unsigned int domain; /**< \brief Domain number the downstream PCI buses. + * 32bits PCI domain support break the library ABI, hence it's disabled by default */ #endif - unsigned char secondary_bus, subordinate_bus; + unsigned char secondary_bus; /**< \brief First PCI bus number below the bridge. */ + unsigned char subordinate_bus; /**< \brief Highest PCI bus number below the bridge. */ } pci; } downstream; - hwloc_obj_bridge_type_t downstream_type; + hwloc_obj_bridge_type_t downstream_type; /**< \brief Downstream Bridge type. */ unsigned depth; } bridge; /** \brief OS Device specific Object Attributes */ @@ -667,7 +717,7 @@ union hwloc_obj_attr_u { } osdev; }; -/** \brief Object info +/** \brief Object info attribute (name and value strings) * * \sa hwlocality_info_attr */ @@ -734,6 +784,8 @@ HWLOC_DECLSPEC void hwloc_topology_destroy (hwloc_topology_t topology); * * This is useful for keeping a backup while modifying a topology. * + * \return 0 on success, -1 on error. + * * \note Object userdata is not duplicated since hwloc does not know what it point to. * The objects of both old and new topologies will point to the same userdata. */ @@ -788,6 +840,8 @@ HWLOC_DECLSPEC void hwloc_topology_check(hwloc_topology_t topology); * * This is the depth of ::HWLOC_OBJ_PU objects plus one. * + * \return the depth of the object tree. + * * \note NUMA nodes, I/O and Misc objects are ignored when computing * the depth of the tree (they are placed on special levels). */ @@ -795,23 +849,26 @@ HWLOC_DECLSPEC int hwloc_topology_get_depth(hwloc_topology_t __hwloc_restrict to /** \brief Returns the depth of objects of type \p type. * - * If no object of this type is present on the underlying architecture, or if - * the OS doesn't provide this kind of information, the function returns - * ::HWLOC_TYPE_DEPTH_UNKNOWN. + * \return The depth of objects of type \p type. * - * If type is absent but a similar type is acceptable, see also - * hwloc_get_type_or_below_depth() and hwloc_get_type_or_above_depth(). - * - * If ::HWLOC_OBJ_GROUP is given, the function may return ::HWLOC_TYPE_DEPTH_MULTIPLE - * if multiple levels of Groups exist. - * - * If a NUMA node, I/O or Misc object type is given, the function returns a virtual - * value because these objects are stored in special levels that are not CPU-related. + * \return A negative virtual depth if a NUMA node, I/O or Misc object type is given. + * These objects are stored in special levels that are not CPU-related. * This virtual depth may be passed to other hwloc functions such as * hwloc_get_obj_by_depth() but it should not be considered as an actual * depth by the application. In particular, it should not be compared with * any other object depth or with the entire topology depth. - * \sa hwloc_get_memory_parents_depth(). + * + * \return ::HWLOC_TYPE_DEPTH_UNKNOWN + * if no object of this type is present on the underlying architecture, + * or if the OS doesn't provide this kind of information. + * + * \return ::HWLOC_TYPE_DEPTH_MULTIPLE if type ::HWLOC_OBJ_GROUP is given + * and multiple levels of Groups exist. + * + * \note If the type is absent but a similar type is acceptable, see also + * hwloc_get_type_or_below_depth() and hwloc_get_type_or_above_depth(). + * + * \sa hwloc_get_memory_parents_depth() for managing the depth of memory objects. * * \sa hwloc_type_sscanf_as_depth() for returning the depth of objects * whose type is given as a string. @@ -887,18 +944,23 @@ hwloc_get_type_or_above_depth (hwloc_topology_t topology, hwloc_obj_type_t type) * \p depth should between 0 and hwloc_topology_get_depth()-1, * or a virtual depth such as ::HWLOC_TYPE_DEPTH_NUMANODE. * + * \return The type of objects at depth \p depth. * \return (hwloc_obj_type_t)-1 if depth \p depth does not exist. */ HWLOC_DECLSPEC hwloc_obj_type_t hwloc_get_depth_type (hwloc_topology_t topology, int depth) __hwloc_attribute_pure; /** \brief Returns the width of level at depth \p depth. + * + * \return The number of objects at topology depth \p depth. + * \return 0 if there are no objects at depth \p depth. */ HWLOC_DECLSPEC unsigned hwloc_get_nbobjs_by_depth (hwloc_topology_t topology, int depth) __hwloc_attribute_pure; /** \brief Returns the width of level type \p type * - * If no object for that type exists, 0 is returned. - * If there are several levels with objects of that type, -1 is returned. + * \return The number of objects of type \p type. + * \return -1 if there are multiple levels with objects of that type, e.g. ::HWLOC_OBJ_GROUP. + * \return 0 if there are no objects at depth \p depth. */ static __hwloc_inline int hwloc_get_nbobjs_by_type (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure; @@ -906,34 +968,45 @@ hwloc_get_nbobjs_by_type (hwloc_topology_t topology, hwloc_obj_type_t type) __hw /** \brief Returns the top-object of the topology-tree. * * Its type is ::HWLOC_OBJ_MACHINE. + * + * This function cannot return \c NULL. */ static __hwloc_inline hwloc_obj_t hwloc_get_root_obj (hwloc_topology_t topology) __hwloc_attribute_pure; -/** \brief Returns the topology object at logical index \p idx from depth \p depth */ +/** \brief Returns the topology object at logical index \p idx from depth \p depth + * + * \return The object if it exists. + * \return \c NULL if there is no object with this index and depth. + */ HWLOC_DECLSPEC hwloc_obj_t hwloc_get_obj_by_depth (hwloc_topology_t topology, int depth, unsigned idx) __hwloc_attribute_pure; /** \brief Returns the topology object at logical index \p idx with type \p type * - * If no object for that type exists, \c NULL is returned. - * If there are several levels with objects of that type (::HWLOC_OBJ_GROUP), - * \c NULL is returned and the caller may fallback to hwloc_get_obj_by_depth(). + * \return The object if it exists. + * \return \c NULL if there is no object with this index and type. + * \return \c NULL if there are multiple levels with objects of that type (e.g. ::HWLOC_OBJ_GROUP), + * the caller may fallback to hwloc_get_obj_by_depth(). */ static __hwloc_inline hwloc_obj_t hwloc_get_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure; /** \brief Returns the next object at depth \p depth. * - * If \p prev is \c NULL, return the first object at depth \p depth. + * \return The first object at depth \p depth if \p prev is \c NULL. + * \return The object after \p prev at depth \p depth if \p prev is not \c NULL. + * \return \c NULL if there is no such object. */ static __hwloc_inline hwloc_obj_t hwloc_get_next_obj_by_depth (hwloc_topology_t topology, int depth, hwloc_obj_t prev); /** \brief Returns the next object of type \p type. * - * If \p prev is \c NULL, return the first object at type \p type. If - * there are multiple or no depth for given type, return \c NULL and - * let the caller fallback to hwloc_get_next_obj_by_depth(). + * \return The first object of type \p type if \p prev is \c NULL. + * \return The object after \p prev of type \p type if \p prev is not \c NULL. + * \return \c NULL if there is no such object. + * \return \c NULL if there are multiple levels with objects of that type (e.g. ::HWLOC_OBJ_GROUP), + * the caller may fallback to hwloc_get_obj_by_depth(). */ static __hwloc_inline hwloc_obj_t hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, @@ -954,6 +1027,8 @@ hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, * * hwloc_obj_type_snprintf() may return a more precise output for a specific * object, but it requires the caller to provide the output buffer. + * + * \return A constant string containing the object type name or \c "Unknown". */ HWLOC_DECLSPEC const char * hwloc_obj_type_string (hwloc_obj_type_t type) __hwloc_attribute_const; @@ -1049,23 +1124,26 @@ HWLOC_DECLSPEC int hwloc_type_sscanf_as_depth(const char *string, -/** \defgroup hwlocality_info_attr Consulting and Adding Key-Value Info Attributes +/** \defgroup hwlocality_info_attr Consulting and Adding Info Attributes * * @{ */ -/** \brief Search the given key name in object infos and return the corresponding value. +/** \brief Search the given name in object infos and return the corresponding value. * - * If multiple keys match the given name, only the first one is returned. + * If multiple info attributes match the given name, only the first one is returned. * - * \return \c NULL if no such key exists. + * \return A pointer to the value string if it exists. + * \return \c NULL if no such info attribute exists. + * + * \note The string should not be freed by the caller, it belongs to the hwloc library. */ static __hwloc_inline const char * hwloc_obj_get_info_by_name(hwloc_obj_t obj, const char *name) __hwloc_attribute_pure; -/** \brief Add the given info name and value pair to the given object. +/** \brief Add the given name and value pair to the given object info attributes. * - * The info is appended to the existing info array even if another key + * The info pair is appended to the existing info array even if another pair * with the same name already exists. * * The input strings are copied before being added in the object infos. @@ -1073,14 +1151,30 @@ hwloc_obj_get_info_by_name(hwloc_obj_t obj, const char *name) __hwloc_attribute_ * \return \c 0 on success, \c -1 on error. * * \note This function may be used to enforce object colors in the lstopo - * graphical output by using "lstopoStyle" as a name and "Background=#rrggbb" + * graphical output by adding "lstopoStyle" as a name and "Background=#rrggbb" * as a value. See CUSTOM COLORS in the lstopo(1) manpage for details. * - * \note If \p value contains some non-printable characters, they will + * \note If \p name or \p value contain some non-printable characters, they will * be dropped when exporting to XML, see hwloc_topology_export_xml() in hwloc/export.h. */ HWLOC_DECLSPEC int hwloc_obj_add_info(hwloc_obj_t obj, const char *name, const char *value); +/** \brief Set (or replace) the subtype of an object. + * + * The given \p subtype is copied internally, the caller is responsible + * for freeing the original \p subtype if needed. + * + * If another subtype already exists in \p object, it is replaced. + * The given \p subtype may be \c NULL to remove the existing subtype. + * + * \note This function is mostly meant to initialize the subtype of user-added + * objects such as groups with hwloc_topology_alloc_group_object(). + * + * \return \c 0 on success. + * \return \c -1 with \p errno set to \c ENOMEM on failure to allocate memory. + */ +HWLOC_DECLSPEC int hwloc_obj_set_subtype(hwloc_topology_t topology, hwloc_obj_t obj, const char *subtype); + /** @} */ @@ -1193,7 +1287,7 @@ typedef enum { * a problem for the application, but if it is, setting this flag * will make hwloc avoid using OS functions that would also bind * memory. This will however reduce the support of CPU bindings, - * i.e. potentially return -1 with errno set to ENOSYS in some + * i.e. potentially return -1 with errno set to \c ENOSYS in some * cases. * * This flag is only meaningful when used with functions that set @@ -1206,8 +1300,9 @@ typedef enum { /** \brief Bind current process or thread on CPUs given in physical bitmap \p set. * - * \return -1 with errno set to ENOSYS if the action is not supported - * \return -1 with errno set to EXDEV if the binding cannot be enforced + * \return 0 on success. + * \return -1 with errno set to \c ENOSYS if the action is not supported. + * \return -1 with errno set to \c EXDEV if the binding cannot be enforced. */ HWLOC_DECLSPEC int hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags); @@ -1216,10 +1311,14 @@ HWLOC_DECLSPEC int hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_cpus * The CPU-set \p set (previously allocated by the caller) * is filled with the list of PUs which the process or * thread (according to \e flags) was last bound to. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_get_cpubind(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); /** \brief Bind a process \p pid on CPUs given in physical bitmap \p set. + * + * \return 0 on success, -1 on error. * * \note \p hwloc_pid_t is \p pid_t on Unix platforms, * and \p HANDLE on native Windows platforms. @@ -1238,6 +1337,8 @@ HWLOC_DECLSPEC int hwloc_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t * is filled with the list of PUs which the process * was last bound to. * + * \return 0 on success, -1 on error. + * * \note \p hwloc_pid_t is \p pid_t on Unix platforms, * and \p HANDLE on native Windows platforms. * @@ -1251,6 +1352,8 @@ HWLOC_DECLSPEC int hwloc_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t #ifdef hwloc_thread_t /** \brief Bind a thread \p thread on CPUs given in physical bitmap \p set. + * + * \return 0 on success, -1 on error. * * \note \p hwloc_thread_t is \p pthread_t on Unix platforms, * and \p HANDLE on native Windows platforms. @@ -1267,6 +1370,8 @@ HWLOC_DECLSPEC int hwloc_set_thread_cpubind(hwloc_topology_t topology, hwloc_thr * is filled with the list of PUs which the thread * was last bound to. * + * \return 0 on success, -1 on error. + * * \note \p hwloc_thread_t is \p pthread_t on Unix platforms, * and \p HANDLE on native Windows platforms. * @@ -1291,6 +1396,8 @@ HWLOC_DECLSPEC int hwloc_get_thread_cpubind(hwloc_topology_t topology, hwloc_thr * on which all threads are running), or only the current thread. If the * process is single-threaded, flags can be set to zero to let hwloc use * whichever method is available on the underlying OS. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_cpuset_t set, int flags); @@ -1305,6 +1412,8 @@ HWLOC_DECLSPEC int hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_ * so this function may return something that is already * outdated. * + * \return 0 on success, -1 on error. + * * \note \p hwloc_pid_t is \p pid_t on Unix platforms, * and \p HANDLE on native Windows platforms. * @@ -1343,7 +1452,7 @@ HWLOC_DECLSPEC int hwloc_get_proc_last_cpu_location(hwloc_topology_t topology, h * (e.g., some systems only allow binding memory on a per-thread * basis, whereas other systems only allow binding memory for all * threads in a process). - * \p errno will be set to EXDEV when the requested set can not be enforced + * \p errno will be set to \c EXDEV when the requested set can not be enforced * (e.g., some systems only allow binding memory to a single NUMA node). * * If ::HWLOC_MEMBIND_STRICT was not passed, the function may fail as well, @@ -1417,6 +1526,12 @@ typedef enum { HWLOC_MEMBIND_FIRSTTOUCH = 1, /** \brief Allocate memory on the specified nodes. + * + * The actual behavior may slightly vary between operating systems, + * especially when (some of) the requested nodes are full. + * On Linux, by default, the MPOL_PREFERRED_MANY (or MPOL_PREFERRED) policy + * is used. However, if the hwloc strict flag is also given, the Linux + * MPOL_BIND policy is rather used. * \hideinitializer */ HWLOC_MEMBIND_BIND = 2, @@ -1429,6 +1544,16 @@ typedef enum { * \hideinitializer */ HWLOC_MEMBIND_INTERLEAVE = 3, + /** \brief Allocate memory on the given nodes in an interleaved + * / weighted manner. The precise layout of the memory across + * multiple NUMA nodes is OS/system specific. Weighted interleaving + * can be useful when threads distributed across the specified NUMA + * nodes with different bandwidth capabilities will all be accessing + * the whole memory range concurrently, since the interleave will then + * balance the memory references. + * \hideinitializer */ + HWLOC_MEMBIND_WEIGHTED_INTERLEAVE = 5, + /** \brief For each page bound with this policy, by next time * it is touched (and next time only), it is moved from its current * location to the local NUMA node of the thread where the memory @@ -1492,7 +1617,7 @@ typedef enum { * could potentially affect CPU bindings. Note, however, that using * NOCPUBIND may reduce hwloc's overall memory binding * support. Specifically: some of hwloc's memory binding functions - * may fail with errno set to ENOSYS when used with NOCPUBIND. + * may fail with errno set to \c ENOSYS when used with NOCPUBIND. * \hideinitializer */ HWLOC_MEMBIND_NOCPUBIND = (1<<4), @@ -1521,8 +1646,9 @@ typedef enum { * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. * Otherwise it's a cpuset. * - * \return -1 with errno set to ENOSYS if the action is not supported - * \return -1 with errno set to EXDEV if the binding cannot be enforced + * \return 0 on success. + * \return -1 with errno set to \c ENOSYS if the action is not supported. + * \return -1 with errno set to \c EXDEV if the binding cannot be enforced. */ HWLOC_DECLSPEC int hwloc_set_membind(hwloc_topology_t topology, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags); @@ -1551,7 +1677,7 @@ HWLOC_DECLSPEC int hwloc_set_membind(hwloc_topology_t topology, hwloc_const_bitm * ::HWLOC_MEMBIND_STRICT is only meaningful when ::HWLOC_MEMBIND_PROCESS * is also specified. In this case, hwloc will check the default * memory policies and nodesets for all threads in the process. If - * they are not identical, -1 is returned and errno is set to EXDEV. + * they are not identical, -1 is returned and errno is set to \c EXDEV. * If they are identical, the values are returned in \p set and \p * policy. * @@ -1571,7 +1697,9 @@ HWLOC_DECLSPEC int hwloc_set_membind(hwloc_topology_t topology, hwloc_const_bitm * Otherwise it's a cpuset. * * If any other flags are specified, -1 is returned and errno is set - * to EINVAL. + * to \c EINVAL. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_get_membind(hwloc_topology_t topology, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags); @@ -1581,8 +1709,9 @@ HWLOC_DECLSPEC int hwloc_get_membind(hwloc_topology_t topology, hwloc_bitmap_t s * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. * Otherwise it's a cpuset. * - * \return -1 with errno set to ENOSYS if the action is not supported - * \return -1 with errno set to EXDEV if the binding cannot be enforced + * \return 0 on success. + * \return -1 with errno set to \c ENOSYS if the action is not supported. + * \return -1 with errno set to \c EXDEV if the binding cannot be enforced. * * \note \p hwloc_pid_t is \p pid_t on Unix platforms, * and \p HANDLE on native Windows platforms. @@ -1614,7 +1743,7 @@ HWLOC_DECLSPEC int hwloc_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t * If ::HWLOC_MEMBIND_STRICT is specified, hwloc will check the default * memory policies and nodesets for all threads in the specified * process. If they are not identical, -1 is returned and errno is - * set to EXDEV. If they are identical, the values are returned in \p + * set to \c EXDEV. If they are identical, the values are returned in \p * set and \p policy. * * Otherwise, \p set is set to the logical OR of all threads' @@ -1626,7 +1755,9 @@ HWLOC_DECLSPEC int hwloc_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t * Otherwise it's a cpuset. * * If any other flags are specified, -1 is returned and errno is set - * to EINVAL. + * to \c EINVAL. + * + * \return 0 on success, -1 on error. * * \note \p hwloc_pid_t is \p pid_t on Unix platforms, * and \p HANDLE on native Windows platforms. @@ -1639,9 +1770,9 @@ HWLOC_DECLSPEC int hwloc_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. * Otherwise it's a cpuset. * - * \return 0 if \p len is 0. - * \return -1 with errno set to ENOSYS if the action is not supported - * \return -1 with errno set to EXDEV if the binding cannot be enforced + * \return 0 on success or if \p len is 0. + * \return -1 with errno set to \c ENOSYS if the action is not supported. + * \return -1 with errno set to \c EXDEV if the binding cannot be enforced. */ HWLOC_DECLSPEC int hwloc_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags); @@ -1658,7 +1789,7 @@ HWLOC_DECLSPEC int hwloc_set_area_membind(hwloc_topology_t topology, const void * * If ::HWLOC_MEMBIND_STRICT is specified, the target pages are first * checked to see if they all have the same memory binding policy and - * nodeset. If they do not, -1 is returned and errno is set to EXDEV. + * nodeset. If they do not, -1 is returned and errno is set to \c EXDEV. * If they are identical across all pages, the set and policy are * returned in \p set and \p policy, respectively. * @@ -1671,9 +1802,10 @@ HWLOC_DECLSPEC int hwloc_set_area_membind(hwloc_topology_t topology, const void * Otherwise it's a cpuset. * * If any other flags are specified, -1 is returned and errno is set - * to EINVAL. + * to \c EINVAL. * - * If \p len is 0, -1 is returned and errno is set to EINVAL. + * \return 0 on success. + * \return -1 with errno set to \c EINVAL if \p len is 0. */ HWLOC_DECLSPEC int hwloc_get_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags); @@ -1696,6 +1828,8 @@ HWLOC_DECLSPEC int hwloc_get_area_membind(hwloc_topology_t topology, const void * considered a nodeset. Otherwise it's a cpuset. * * If \p len is 0, \p set is emptied. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_get_area_memlocation(hwloc_topology_t topology, const void *addr, size_t len, hwloc_bitmap_t set, int flags); @@ -1704,17 +1838,20 @@ HWLOC_DECLSPEC int hwloc_get_area_memlocation(hwloc_topology_t topology, const v * This is equivalent to malloc(), except that it tries to allocate * page-aligned memory from the OS. * + * \return a pointer to the allocated area, or \c NULL on error. + * * \note The allocated memory should be freed with hwloc_free(). */ HWLOC_DECLSPEC void *hwloc_alloc(hwloc_topology_t topology, size_t len); /** \brief Allocate some memory on NUMA memory nodes specified by \p set * - * \return NULL with errno set to ENOSYS if the action is not supported - * and ::HWLOC_MEMBIND_STRICT is given - * \return NULL with errno set to EXDEV if the binding cannot be enforced - * and ::HWLOC_MEMBIND_STRICT is given - * \return NULL with errno set to ENOMEM if the memory allocation failed + * \return a pointer to the allocated area. + * \return NULL with errno set to \c ENOSYS if the action is not supported + * and ::HWLOC_MEMBIND_STRICT is given. + * \return NULL with errno set to \c EXDEV if the binding cannot be enforced + * and ::HWLOC_MEMBIND_STRICT is given. + * \return NULL with errno set to \c ENOMEM if the memory allocation failed * even before trying to bind. * * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. @@ -1735,12 +1872,16 @@ HWLOC_DECLSPEC void *hwloc_alloc_membind(hwloc_topology_t topology, size_t len, * * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset. * Otherwise it's a cpuset. + * + * \return a pointer to the allocated area, or \c NULL on error. */ static __hwloc_inline void * hwloc_alloc_membind_policy(hwloc_topology_t topology, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc; /** \brief Free memory that was previously allocated by hwloc_alloc() * or hwloc_alloc_membind(). + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_free(hwloc_topology_t topology, void *addr, size_t len); @@ -1749,6 +1890,9 @@ HWLOC_DECLSPEC int hwloc_free(hwloc_topology_t topology, void *addr, size_t len) /** \defgroup hwlocality_setsource Changing the Source of Topology Discovery + * + * These functions must be called between hwloc_topology_init() and hwloc_topology_load(). + * Otherwise, they will return -1 with errno set to \c EBUSY. * * If none of the functions below is called, the default is to detect all the objects * of the machine that the caller is allowed to access. @@ -1777,8 +1921,14 @@ HWLOC_DECLSPEC int hwloc_free(hwloc_topology_t topology, void *addr, size_t len) * \note \p hwloc_pid_t is \p pid_t on Unix platforms, * and \p HANDLE on native Windows platforms. * - * \note -1 is returned and errno is set to ENOSYS on platforms that do not + * \note -1 is returned and errno is set to \c ENOSYS on platforms that do not * support this feature. + * + * \note The PID will not actually be used until hwloc_topology_load(). + * If the corresponding process exits in the meantime, hwloc will ignore the PID. + * If another process reuses the PID, the view of that process will be used. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_topology_set_pid(hwloc_topology_t __hwloc_restrict topology, hwloc_pid_t pid); @@ -1796,13 +1946,16 @@ HWLOC_DECLSPEC int hwloc_topology_set_pid(hwloc_topology_t __hwloc_restrict topo * * If \p description was properly parsed and describes a valid topology * configuration, this function returns 0. - * Otherwise -1 is returned and errno is set to EINVAL. + * Otherwise -1 is returned and errno is set to \c EINVAL. * * Note that this function does not actually load topology * information; it just tells hwloc where to load it from. You'll * still need to invoke hwloc_topology_load() to actually load the * topology information. * + * \return 0 on success. + * \return -1 with errno set to \c EINVAL if the description was invalid. + * * \note For convenience, this backend provides empty binding hooks which just * return success. * @@ -1824,7 +1977,8 @@ HWLOC_DECLSPEC int hwloc_topology_set_synthetic(hwloc_topology_t __hwloc_restric * still need to invoke hwloc_topology_load() to actually load the * topology information. * - * \return -1 with errno set to EINVAL on failure to read the XML file. + * \return 0 on success. + * \return -1 with errno set to \c EINVAL on failure to read the XML file. * * \note See also hwloc_topology_set_userdata_import_callback() * for importing application-specific object userdata. @@ -1837,22 +1991,28 @@ HWLOC_DECLSPEC int hwloc_topology_set_synthetic(hwloc_topology_t __hwloc_restric * \note On success, the XML component replaces the previously enabled * component (if any), but the topology is not actually modified until * hwloc_topology_load(). + * + * \note If an invalid XML input file is given, the error may be reported + * either here or later by hwloc_topology_load() depending on the XML library + * used by hwloc. */ HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict xmlpath); /** \brief Enable XML based topology using a memory buffer (instead of * a file, as with hwloc_topology_set_xml()). * - * Gather topology information from the XML memory buffer given at \p - * buffer and of length \p size. This buffer may have been filled - * earlier with hwloc_topology_export_xmlbuffer() in hwloc/export.h. + * Gather topology information from the XML memory buffer given at + * \p buffer and of length \p size (including an ending \0). + * This buffer may have been filled earlier with + * hwloc_topology_export_xmlbuffer() in hwloc/export.h. * * Note that this function does not actually load topology * information; it just tells hwloc where to load it from. You'll * still need to invoke hwloc_topology_load() to actually load the * topology information. * - * \return -1 with errno set to EINVAL on failure to read the XML buffer. + * \return 0 on success. + * \return -1 with errno set to \c EINVAL on failure to read the XML buffer. * * \note See also hwloc_topology_set_userdata_import_callback() * for importing application-specific object userdata. @@ -1865,6 +2025,10 @@ HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topo * \note On success, the XML component replaces the previously enabled * component (if any), but the topology is not actually modified until * hwloc_topology_load(). + * + * \note If an invalid XML input file is given, the error may be reported + * either here or later by hwloc_topology_load() depending on the XML library + * used by hwloc. */ HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict buffer, int size); @@ -1890,6 +2054,9 @@ enum hwloc_topology_components_flag_e { * This may be used to avoid expensive parts of the discovery process. * For instance, CUDA-specific discovery may be expensive and unneeded * while generic I/O discovery could still be useful. + * + * \return 0 on success. + * \return -1 on error, for instance if flags are invalid. */ HWLOC_DECLSPEC int hwloc_topology_set_components(hwloc_topology_t __hwloc_restrict topology, unsigned long flags, const char * __hwloc_restrict name); @@ -2069,9 +2236,10 @@ enum hwloc_topology_flags_e { */ HWLOC_TOPOLOGY_FLAG_NO_DISTANCES = (1UL<<7), - /** \brief Ignore memory attributes. + /** \brief Ignore memory attributes and tiers. * - * Ignore memory attribues from the operating systems (and from XML). + * Ignore memory attribues from the operating systems (and from XML) + * Hence also do not try to build memory tiers. */ HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS = (1UL<<8), @@ -2092,6 +2260,9 @@ enum hwloc_topology_flags_e { * By default, no flags are set (\c 0). * * The flags set in a topology may be retrieved with hwloc_topology_get_flags(). + * + * \return 0 on success. + * \return -1 on error, for instance if flags are invalid. */ HWLOC_DECLSPEC int hwloc_topology_set_flags (hwloc_topology_t topology, unsigned long flags); @@ -2103,6 +2274,8 @@ HWLOC_DECLSPEC int hwloc_topology_set_flags (hwloc_topology_t topology, unsigned * no flags are set (\c 0 is returned). * * \return the flags previously set with hwloc_topology_set_flags(). + * + * \note This function may also be called after hwloc_topology_load(). */ HWLOC_DECLSPEC unsigned long hwloc_topology_get_flags (hwloc_topology_t topology); @@ -2112,6 +2285,8 @@ HWLOC_DECLSPEC unsigned long hwloc_topology_get_flags (hwloc_topology_t topology * running this program. * \return 0 instead (for instance if using another file-system root, * a XML topology file, or a synthetic topology). + * + * \note This function may also be called after hwloc_topology_load(). */ HWLOC_DECLSPEC int hwloc_topology_is_thissystem(hwloc_topology_t __hwloc_restrict topology) __hwloc_attribute_pure; @@ -2197,6 +2372,8 @@ struct hwloc_topology_membind_support { unsigned char migrate_membind; /** Getting the last NUMA nodes where a memory area was allocated is supported */ unsigned char get_area_memlocation; + /** Weighted interleave policy is supported. */ + unsigned char weighted_interleave_membind; }; /** \brief Flags describing miscellaneous features. @@ -2239,14 +2416,22 @@ struct hwloc_topology_support { * to report the supported features of the original remote machine * instead. If it was successfully imported, \p imported_support * will be set in the struct hwloc_topology_misc_support array. + * + * \return A pointer to a support structure. + * + * \note The function cannot return \c NULL. + * \note The returned pointer should not be freed, it belongs to the hwloc library. + * + * \note This function may be called before or after hwloc_topology_load() + * but the support structure only contains valid information after. */ HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(hwloc_topology_t __hwloc_restrict topology); /** \brief Type filtering flags. * * By default, most objects are kept (::HWLOC_TYPE_FILTER_KEEP_ALL). - * Instruction caches, I/O and Misc objects are ignored by default (::HWLOC_TYPE_FILTER_KEEP_NONE). - * Die and Group levels are ignored unless they bring structure (::HWLOC_TYPE_FILTER_KEEP_STRUCTURE). + * Instruction caches, memory-side caches, I/O and Misc objects are ignored by default (::HWLOC_TYPE_FILTER_KEEP_NONE). + * Group levels are ignored unless they bring structure (::HWLOC_TYPE_FILTER_KEEP_STRUCTURE). * * Note that group objects are also ignored individually (without the entire level) * when they do not bring structure. @@ -2298,32 +2483,44 @@ enum hwloc_type_filter_e { }; /** \brief Set the filtering for the given object type. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_topology_set_type_filter(hwloc_topology_t topology, hwloc_obj_type_t type, enum hwloc_type_filter_e filter); /** \brief Get the current filtering for the given object type. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_topology_get_type_filter(hwloc_topology_t topology, hwloc_obj_type_t type, enum hwloc_type_filter_e *filter); /** \brief Set the filtering for all object types. * * If some types do not support this filtering, they are silently ignored. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_topology_set_all_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); /** \brief Set the filtering for all CPU cache object types. * * Memory-side caches are not involved since they are not CPU caches. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_topology_set_cache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); /** \brief Set the filtering for all CPU instruction cache object types. * * Memory-side caches are not involved since they are not CPU caches. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_topology_set_icache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); /** \brief Set the filtering for all I/O object types. + * + * \return 0 on success, -1 on error. */ HWLOC_DECLSPEC int hwloc_topology_set_io_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter); @@ -2343,6 +2540,9 @@ HWLOC_DECLSPEC void hwloc_topology_set_userdata(hwloc_topology_t topology, const * * Retrieve the application-given private data pointer that was * previously set with hwloc_topology_set_userdata(). + * + * \return A pointer to the private-data if any. + * \return \c NULL if no private-data was previoulsy set. */ HWLOC_DECLSPEC void * hwloc_topology_get_userdata(hwloc_topology_t topology); @@ -2395,21 +2595,32 @@ enum hwloc_restrict_flags_e { * are not included (or partially included) in the CPU set \p set. * All objects CPU and node sets are restricted accordingly. * + * By default, \p set is a CPU set. It means that the set of PUs in + * the topology is restricted. Once some PUs got removed, their parents + * may also get removed recursively if they became child-less. + * * If ::HWLOC_RESTRICT_FLAG_BYNODESET is passed in \p flags, * \p set is considered a nodeset instead of a CPU set. + * It means that the set of NUMA nodes in the topology is restricted + * (instead of PUs). Once some NUMA nodes got removed, their parents + * may also get removed recursively if they became child-less. * * \p flags is a OR'ed set of ::hwloc_restrict_flags_e. * + * \note Restricting the topology removes some locality information, + * hence the remaining objects may get reordered (including PUs and NUMA nodes), + * and their logical indexes may change. + * * \note This call may not be reverted by restricting back to a larger * set. Once dropped during restriction, objects may not be brought * back, except by loading another topology with hwloc_topology_load(). * * \return 0 on success. * - * \return -1 with errno set to EINVAL if the input set is invalid. + * \return -1 with errno set to \c EINVAL if the input set is invalid. * The topology is not modified in this case. * - * \return -1 with errno set to ENOMEM on failure to allocate internal data. + * \return -1 with errno set to \c ENOMEM on failure to allocate internal data. * The topology is reinitialized in this case. It should be either * destroyed with hwloc_topology_destroy() or configured and loaded again. */ @@ -2449,6 +2660,8 @@ enum hwloc_allow_flags_e { * * \p flags must be set to one flag among ::hwloc_allow_flags_e. * + * \return 0 on success, -1 on error. + * * \note Removing objects from a topology should rather be performed with * hwloc_topology_restrict(). */ @@ -2466,6 +2679,9 @@ HWLOC_DECLSPEC int hwloc_topology_allow(hwloc_topology_t __hwloc_restrict topolo * * The new leaf object will not have any \p cpuset. * + * The \p subtype object attribute may be defined with hwloc_obj_set_subtype() + * after successful insertion. + * * \return the newly-created object * * \return \c NULL on error. @@ -2482,10 +2698,33 @@ HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_misc_object(hwloc_topology_t to * This function returns a new Group object. * * The caller should (at least) initialize its sets before inserting - * the object in the topology. See hwloc_topology_insert_group_object(). - */ + * the object in the topology, see hwloc_topology_insert_group_object(). + * Or it may decide not to insert and just free the group object + * by calling hwloc_topology_free_group_object(). + * + * \return The allocated object on success. + * \return \c NULL on error. + * + * \note If successfully inserted by hwloc_topology_insert_group_object(), + * the object will be freed when the entire topology is freed. + * If insertion failed (e.g. \c NULL or empty CPU and node-sets), + * it is freed before returning the error. + */ HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_alloc_group_object(hwloc_topology_t topology); +/** \brief Free a group object allocated with hwloc_topology_alloc_group_object(). + * + * This function is only useful if the group object was not given + * to hwloc_topology_insert_group_object() as planned. + * + * \note \p topology must be the same as the one previously passed + * to hwloc_topology_alloc_group_object(). + * + * \return \c 0 on success. + * \return \c -1 on error, for instance if an invalid topology is given. + */ +HWLOC_DECLSPEC int hwloc_topology_free_group_object(hwloc_topology_t topology, hwloc_obj_t group); + /** \brief Add more structure to the topology by adding an intermediate Group * * The caller should first allocate a new Group object with hwloc_topology_alloc_group_object(). @@ -2493,20 +2732,38 @@ HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_alloc_group_object(hwloc_topology_t to * the final location of the Group in the topology. * Then the object can be passed to this function for actual insertion in the topology. * - * Either the cpuset or nodeset field (or both, if compatible) must be set - * to a non-empty bitmap. The complete_cpuset or complete_nodeset may be set - * instead if inserting with respect to the complete topology + * The main use case for this function is to group a subset of + * siblings among the list of children below a single parent. + * For instance, if grouping 4 cores out of a 8-core socket, + * the logical list of cores will be reordered so that the 4 grouped + * ones are consecutive. + * Then, if needed, a new depth is added between the parent and those + * children, and the Group is inserted there. + * At the end, the 4 grouped cores are now children of the Group, + * which replaces them as a child of the original parent. + * + * In practice, the grouped objects are specified through cpusets + * and/or nodesets, for instance using hwloc_obj_add_other_obj_sets() + * iteratively. + * Hence it is possible to group objects that are not children of the + * same parent, for instance some PUs below the 4 cores in example above. + * However this general case may fail if the expected Group conflicts + * with the existing hierarchy. + * For instance if each core has two PUs, it is not possible to insert + * a Group containing a single PU of each core. + * + * To specify the objects to group, either the cpuset or nodeset field + * (or both, if compatible) must be set to a non-empty bitmap. + * The complete_cpuset or complete_nodeset may be set instead if + * inserting with respect to the complete topology * (including disallowed, offline or unknown objects). - * If grouping several objects, hwloc_obj_add_other_obj_sets() is an easy way - * to build the Group sets iteratively. * These sets cannot be larger than the current topology, or they would get * restricted silently. * The core will setup the other sets after actual insertion. * - * The \p subtype object attribute may be defined (to a dynamically - * allocated string) to display something else than "Group" as the - * type name for this object in lstopo. - * Custom name/value info pairs may be added with hwloc_obj_add_info() after + * The \p subtype object attribute may be defined with hwloc_obj_set_subtype() + * to display something else than "Group" as the type name for this object in lstopo. + * Custom name-value info pairs may be added with hwloc_obj_add_info() after * insertion. * * The group \p dont_merge attribute may be set to \c 1 to prevent @@ -2519,6 +2776,18 @@ HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_alloc_group_object(hwloc_topology_t to * as \c 0xffffffff to tell hwloc that this new Group should always * be discarded in favor of any existing Group with the same locality. * + * \note Inserting a group adds some locality information to the topology, + * hence the existing objects may get reordered (including PUs and NUMA nodes), + * and their logical indexes may change. + * + * \note If the insertion fails, the input group object is freed. + * + * \note If the group object should be discarded instead of inserted, + * it may be passed to hwloc_topology_free_group_object() instead. + * + * \note \p topology must be the same as the one previously passed + * to hwloc_topology_alloc_group_object(). + * * \return The inserted object if it was properly inserted. * * \return An existing object if the Group was merged or discarded @@ -2542,6 +2811,9 @@ HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_group_object(hwloc_topology_t t * This function is convenient between hwloc_topology_alloc_group_object() * and hwloc_topology_insert_group_object(). It builds the sets of the new Group * that will be inserted as a new intermediate parent of several objects. + * + * \return 0 on success. + * \return -1 with errno set to \c ENOMEM if some internal reallocation failed. */ HWLOC_DECLSPEC int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src); @@ -2558,6 +2830,9 @@ HWLOC_DECLSPEC int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src * attributes, etc. * * See also \ref threadsafety + * + * \return 0 on success. + * \return -1 on error, for instance if some internal reallocation failed. */ HWLOC_DECLSPEC int hwloc_topology_refresh(hwloc_topology_t topology); diff --git a/src/3rdparty/hwloc/include/hwloc/autogen/config.h b/src/3rdparty/hwloc/include/hwloc/autogen/config.h index 6f45f734..b9084182 100644 --- a/src/3rdparty/hwloc/include/hwloc/autogen/config.h +++ b/src/3rdparty/hwloc/include/hwloc/autogen/config.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2023 Inria. All rights reserved. + * Copyright © 2009-2024 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -11,10 +11,10 @@ #ifndef HWLOC_CONFIG_H #define HWLOC_CONFIG_H -#define HWLOC_VERSION "2.10.0" +#define HWLOC_VERSION "2.11.2" #define HWLOC_VERSION_MAJOR 2 -#define HWLOC_VERSION_MINOR 10 -#define HWLOC_VERSION_RELEASE 0 +#define HWLOC_VERSION_MINOR 11 +#define HWLOC_VERSION_RELEASE 2 #define HWLOC_VERSION_GREEK "" #define __hwloc_restrict diff --git a/src/3rdparty/hwloc/include/hwloc/distances.h b/src/3rdparty/hwloc/include/hwloc/distances.h index 71cca4b5..5b0db873 100644 --- a/src/3rdparty/hwloc/include/hwloc/distances.h +++ b/src/3rdparty/hwloc/include/hwloc/distances.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2023 Inria. All rights reserved. + * Copyright © 2010-2024 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -28,18 +28,18 @@ extern "C" { /** \brief Matrix of distances between a set of objects. * - * This matrix often contains latencies between NUMA nodes + * The most common matrix contains latencies between NUMA nodes * (as reported in the System Locality Distance Information Table (SLIT) * in the ACPI specification), which may or may not be physically accurate. * It corresponds to the latency for accessing the memory of one node * from a core in another node. - * The corresponding kind is ::HWLOC_DISTANCES_KIND_FROM_OS | ::HWLOC_DISTANCES_KIND_FROM_USER. + * The corresponding kind is ::HWLOC_DISTANCES_KIND_MEANS_LATENCY | ::HWLOC_DISTANCES_KIND_FROM_USER. * The name of this distances structure is "NUMALatency". - * Others distance structures include and "XGMIBandwidth", "XGMIHops", - * "XeLinkBandwidth" and "NVLinkBandwidth". * * The matrix may also contain bandwidths between random sets of objects, * possibly provided by the user, as specified in the \p kind attribute. + * Others common distance structures include and "XGMIBandwidth", "XGMIHops", + * "XeLinkBandwidth" and "NVLinkBandwidth". * * Pointers \p objs and \p values should not be replaced, reallocated, freed, etc. * However callers are allowed to modify \p kind as well as the contents @@ -70,11 +70,10 @@ struct hwloc_distances_s { * The \p kind attribute of struct hwloc_distances_s is a OR'ed set * of kinds. * - * A kind of format HWLOC_DISTANCES_KIND_FROM_* specifies where the - * distance information comes from, if known. - * - * A kind of format HWLOC_DISTANCES_KIND_MEANS_* specifies whether - * values are latencies or bandwidths, if applicable. + * Each distance matrix may have only one kind among HWLOC_DISTANCES_KIND_FROM_* + * specifying where distance information comes from, + * and one kind among HWLOC_DISTANCES_KIND_MEANS_* specifying + * whether values are latencies or bandwidths. */ enum hwloc_distances_kind_e { /** \brief These distances were obtained from the operating system or hardware. @@ -357,6 +356,8 @@ typedef void * hwloc_distances_add_handle_t; * Otherwise, it will be copied internally and may later be freed by the caller. * * \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e. + * Only one kind of meaning and one kind of provenance may be given if appropriate + * (e.g. ::HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH and ::HWLOC_DISTANCES_KIND_FROM_USER). * Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically set * according to objects having different types in hwloc_distances_add_values(). * @@ -403,7 +404,8 @@ HWLOC_DECLSPEC int hwloc_distances_add_values(hwloc_topology_t topology, /** \brief Flags for adding a new distances to a topology. */ enum hwloc_distances_add_flag_e { /** \brief Try to group objects based on the newly provided distance information. - * This is ignored for distances between objects of different types. + * Grouping is only performed when the distances structure contains latencies, + * and when all objects are of the same type. * \hideinitializer */ HWLOC_DISTANCES_ADD_FLAG_GROUP = (1UL<<0), diff --git a/src/3rdparty/hwloc/include/hwloc/helper.h b/src/3rdparty/hwloc/include/hwloc/helper.h index 01619c5f..01640fb7 100644 --- a/src/3rdparty/hwloc/include/hwloc/helper.h +++ b/src/3rdparty/hwloc/include/hwloc/helper.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2023 Inria. All rights reserved. + * Copyright © 2009-2024 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -946,6 +946,14 @@ enum hwloc_distrib_flags_e { * * \return 0 on success, -1 on error. * + * \note On hybrid CPUs (or asymmetric platforms), distribution may be suboptimal + * since the number of cores or PUs inside packages or below caches may vary + * (the top-down recursive partitioning ignores these numbers until reaching their levels). + * Hence it is recommended to distribute only inside a single homogeneous domain. + * For instance on a CPU with energy-efficient E-cores and high-performance P-cores, + * one should distribute separately N tasks on E-cores and M tasks on P-cores + * instead of trying to distribute directly M+N tasks on the entire CPUs. + * * \note This function requires the \p roots objects to have a CPU set. */ static __hwloc_inline int @@ -960,7 +968,7 @@ hwloc_distrib(hwloc_topology_t topology, unsigned given, givenweight; hwloc_cpuset_t *cpusetp = set; - if (flags & ~HWLOC_DISTRIB_FLAG_REVERSE) { + if (!n || (flags & ~HWLOC_DISTRIB_FLAG_REVERSE)) { errno = EINVAL; return -1; } diff --git a/src/3rdparty/hwloc/include/hwloc/memattrs.h b/src/3rdparty/hwloc/include/hwloc/memattrs.h index 10332b8e..81b85d64 100644 --- a/src/3rdparty/hwloc/include/hwloc/memattrs.h +++ b/src/3rdparty/hwloc/include/hwloc/memattrs.h @@ -1,5 +1,5 @@ /* - * Copyright © 2019-2023 Inria. All rights reserved. + * Copyright © 2019-2024 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -69,7 +69,10 @@ extern "C" { * @{ */ -/** \brief Memory node attributes. */ +/** \brief Predefined memory attribute IDs. + * See ::hwloc_memattr_id_t for the generic definition of IDs + * for predefined or custom attributes. + */ enum hwloc_memattr_id_e { /** \brief * The \"Capacity\" is returned in bytes (local_memory attribute in objects). @@ -78,6 +81,8 @@ enum hwloc_memattr_id_e { * * No initiator is involved when looking at this attribute. * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST. + * + * Capacity values may not be modified using hwloc_memattr_set_value(). * \hideinitializer */ HWLOC_MEMATTR_ID_CAPACITY = 0, @@ -93,6 +98,8 @@ enum hwloc_memattr_id_e { * * No initiator is involved when looking at this attribute. * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST. + + * Locality values may not be modified using hwloc_memattr_set_value(). * \hideinitializer */ HWLOC_MEMATTR_ID_LOCALITY = 1, @@ -173,11 +180,19 @@ enum hwloc_memattr_id_e { /* TODO persistence? */ - HWLOC_MEMATTR_ID_MAX /**< \private Sentinel value */ + HWLOC_MEMATTR_ID_MAX /**< \private + * Sentinel value for predefined attributes. + * Dynamically registered custom attributes start here. + */ }; /** \brief A memory attribute identifier. - * May be either one of ::hwloc_memattr_id_e or a new id returned by hwloc_memattr_register(). + * + * hwloc predefines some commonly-used attributes in ::hwloc_memattr_id_e. + * One may then dynamically register custom ones with hwloc_memattr_register(), + * they will be assigned IDs immediately after the predefined ones. + * See \ref hwlocality_memattrs_manage for more information about + * existing attribute IDs. */ typedef unsigned hwloc_memattr_id_t; @@ -283,6 +298,10 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology, * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), * location \p initiator is ignored and may be \c NULL. * + * \p target_node cannot be \c NULL. If \p attribute is ::HWLOC_MEMATTR_ID_CAPACITY, + * \p target_node must be a NUMA node. If it is ::HWLOC_MEMATTR_ID_LOCALITY, + * \p target_node must have a CPU set. + * * \p flags must be \c 0 for now. * * \return 0 on success. @@ -352,6 +371,8 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology, * The returned initiator should not be modified or freed, * it belongs to the topology. * + * \p target_node cannot be \c NULL. + * * \p flags must be \c 0 for now. * * \return 0 on success. @@ -362,100 +383,10 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology, HWLOC_DECLSPEC int hwloc_memattr_get_best_initiator(hwloc_topology_t topology, hwloc_memattr_id_t attribute, - hwloc_obj_t target, + hwloc_obj_t target_node, unsigned long flags, struct hwloc_location *best_initiator, hwloc_uint64_t *value); -/** @} */ - - -/** \defgroup hwlocality_memattrs_manage Managing memory attributes - * @{ - */ - -/** \brief Return the name of a memory attribute. - * - * \return 0 on success. - * \return -1 with errno set to \c EINVAL if the attribute does not exist. - */ -HWLOC_DECLSPEC int -hwloc_memattr_get_name(hwloc_topology_t topology, - hwloc_memattr_id_t attribute, - const char **name); - -/** \brief Return the flags of the given attribute. - * - * Flags are a OR'ed set of ::hwloc_memattr_flag_e. - * - * \return 0 on success. - * \return -1 with errno set to \c EINVAL if the attribute does not exist. - */ -HWLOC_DECLSPEC int -hwloc_memattr_get_flags(hwloc_topology_t topology, - hwloc_memattr_id_t attribute, - unsigned long *flags); - -/** \brief Memory attribute flags. - * Given to hwloc_memattr_register() and returned by hwloc_memattr_get_flags(). - */ -enum hwloc_memattr_flag_e { - /** \brief The best nodes for this memory attribute are those with the higher values. - * For instance Bandwidth. - */ - HWLOC_MEMATTR_FLAG_HIGHER_FIRST = (1UL<<0), - /** \brief The best nodes for this memory attribute are those with the lower values. - * For instance Latency. - */ - HWLOC_MEMATTR_FLAG_LOWER_FIRST = (1UL<<1), - /** \brief The value returned for this memory attribute depends on the given initiator. - * For instance Bandwidth and Latency, but not Capacity. - */ - HWLOC_MEMATTR_FLAG_NEED_INITIATOR = (1UL<<2) -}; - -/** \brief Register a new memory attribute. - * - * Add a specific memory attribute that is not defined in ::hwloc_memattr_id_e. - * Flags are a OR'ed set of ::hwloc_memattr_flag_e. It must contain at least - * one of ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST or ::HWLOC_MEMATTR_FLAG_LOWER_FIRST. - * - * \return 0 on success. - * \return -1 with errno set to \c EBUSY if another attribute already uses this name. - */ -HWLOC_DECLSPEC int -hwloc_memattr_register(hwloc_topology_t topology, - const char *name, - unsigned long flags, - hwloc_memattr_id_t *id); - -/** \brief Set an attribute value for a specific target NUMA node. - * - * If the attribute does not relate to a specific initiator - * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), - * location \p initiator is ignored and may be \c NULL. - * - * The initiator will be copied into the topology, - * the caller should free anything allocated to store the initiator, - * for instance the cpuset. - * - * \p flags must be \c 0 for now. - * - * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET - * when referring to accesses performed by CPU cores. - * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, - * but users may for instance use it to provide custom information about - * host memory accesses performed by GPUs. - * - * \return 0 on success or -1 on error. - */ -HWLOC_DECLSPEC int -hwloc_memattr_set_value(hwloc_topology_t topology, - hwloc_memattr_id_t attribute, - hwloc_obj_t target_node, - struct hwloc_location *initiator, - unsigned long flags, - hwloc_uint64_t value); - /** \brief Return the target NUMA nodes that have some values for a given attribute. * * Return targets for the given attribute in the \p targets array @@ -519,6 +450,8 @@ hwloc_memattr_get_targets(hwloc_topology_t topology, * The returned initiators should not be modified or freed, * they belong to the topology. * + * \p target_node cannot be \c NULL. + * * \p flags must be \c 0 for now. * * If the attribute does not relate to a specific initiator @@ -538,6 +471,131 @@ hwloc_memattr_get_initiators(hwloc_topology_t topology, hwloc_obj_t target_node, unsigned long flags, unsigned *nr, struct hwloc_location *initiators, hwloc_uint64_t *values); + +/** @} */ + + +/** \defgroup hwlocality_memattrs_manage Managing memory attributes + * + * Memory attribues are identified by an ID (::hwloc_memattr_id_t) + * and a name. hwloc_memattr_get_name() and hwloc_memattr_get_by_name() + * convert between them (or return error if the attribute does not exist). + * + * The set of valid ::hwloc_memattr_id_t is a contigous set starting at \c 0. + * It first contains predefined attributes, as listed + * in ::hwloc_memattr_id_e (from \c 0 to \c HWLOC_MEMATTR_ID_MAX-1). + * Then custom attributes may be dynamically registered with + * hwloc_memattr_register(). They will get the following IDs + * (\c HWLOC_MEMATTR_ID_MAX for the first one, etc.). + * + * To iterate over all valid attributes + * (either predefined or dynamically registered custom ones), + * one may iterate over IDs starting from \c 0 until hwloc_memattr_get_name() + * or hwloc_memattr_get_flags() returns an error. + * + * The values for an existing attribute or for custom dynamically registered ones + * may be set or modified with hwloc_memattr_set_value(). + * + * @{ + */ + +/** \brief Return the name of a memory attribute. + * + * The output pointer \p name cannot be \c NULL. + * + * \return 0 on success. + * \return -1 with errno set to \c EINVAL if the attribute does not exist. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_name(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + const char **name); + +/** \brief Return the flags of the given attribute. + * + * Flags are a OR'ed set of ::hwloc_memattr_flag_e. + * + * The output pointer \p flags cannot be \c NULL. + * + * \return 0 on success. + * \return -1 with errno set to \c EINVAL if the attribute does not exist. + */ +HWLOC_DECLSPEC int +hwloc_memattr_get_flags(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + unsigned long *flags); + +/** \brief Memory attribute flags. + * Given to hwloc_memattr_register() and returned by hwloc_memattr_get_flags(). + */ +enum hwloc_memattr_flag_e { + /** \brief The best nodes for this memory attribute are those with the higher values. + * For instance Bandwidth. + */ + HWLOC_MEMATTR_FLAG_HIGHER_FIRST = (1UL<<0), + /** \brief The best nodes for this memory attribute are those with the lower values. + * For instance Latency. + */ + HWLOC_MEMATTR_FLAG_LOWER_FIRST = (1UL<<1), + /** \brief The value returned for this memory attribute depends on the given initiator. + * For instance Bandwidth and Latency, but not Capacity. + */ + HWLOC_MEMATTR_FLAG_NEED_INITIATOR = (1UL<<2) +}; + +/** \brief Register a new memory attribute. + * + * Add a new custom memory attribute. + * Flags are a OR'ed set of ::hwloc_memattr_flag_e. It must contain one of + * ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST or ::HWLOC_MEMATTR_FLAG_LOWER_FIRST but not both. + * + * The new attribute \p id is immediately after the last existing attribute ID + * (which is either the ID of the last registered attribute if any, + * or the ID of the last predefined attribute in ::hwloc_memattr_id_e). + * + * \return 0 on success. + * \return -1 with errno set to \c EINVAL if an invalid set of flags is given. + * \return -1 with errno set to \c EBUSY if another attribute already uses this name. + */ +HWLOC_DECLSPEC int +hwloc_memattr_register(hwloc_topology_t topology, + const char *name, + unsigned long flags, + hwloc_memattr_id_t *id); + +/** \brief Set an attribute value for a specific target NUMA node. + * + * If the attribute does not relate to a specific initiator + * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), + * location \p initiator is ignored and may be \c NULL. + * + * The initiator will be copied into the topology, + * the caller should free anything allocated to store the initiator, + * for instance the cpuset. + * + * \p target_node cannot be \c NULL. + * + * \p attribute cannot be ::HWLOC_MEMATTR_FLAG_ID_CAPACITY or + * ::HWLOC_MEMATTR_FLAG_ID_LOCALITY. + * + * \p flags must be \c 0 for now. + * + * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET + * when referring to accesses performed by CPU cores. + * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, + * but users may for instance use it to provide custom information about + * host memory accesses performed by GPUs. + * + * \return 0 on success or -1 on error. + */ +HWLOC_DECLSPEC int +hwloc_memattr_set_value(hwloc_topology_t topology, + hwloc_memattr_id_t attribute, + hwloc_obj_t target_node, + struct hwloc_location *initiator, + unsigned long flags, + hwloc_uint64_t value); + /** @} */ #ifdef __cplusplus diff --git a/src/3rdparty/hwloc/include/hwloc/opencl.h b/src/3rdparty/hwloc/include/hwloc/opencl.h index 9810504e..5e53b2aa 100644 --- a/src/3rdparty/hwloc/include/hwloc/opencl.h +++ b/src/3rdparty/hwloc/include/hwloc/opencl.h @@ -41,6 +41,15 @@ extern "C" { */ /* Copyright (c) 2008-2018 The Khronos Group Inc. */ +/* needs "cl_khr_pci_bus_info" device extension, but not strictly required for clGetDeviceInfo() */ +typedef struct { + cl_uint pci_domain; + cl_uint pci_bus; + cl_uint pci_device; + cl_uint pci_function; +} hwloc_cl_device_pci_bus_info_khr; +#define HWLOC_CL_DEVICE_PCI_BUS_INFO_KHR 0x410F + /* needs "cl_amd_device_attribute_query" device extension, but not strictly required for clGetDeviceInfo() */ #define HWLOC_CL_DEVICE_TOPOLOGY_AMD 0x4037 typedef union { @@ -78,9 +87,19 @@ hwloc_opencl_get_device_pci_busid(cl_device_id device, unsigned *domain, unsigned *bus, unsigned *dev, unsigned *func) { hwloc_cl_device_topology_amd amdtopo; + hwloc_cl_device_pci_bus_info_khr khrbusinfo; cl_uint nvbus, nvslot, nvdomain; cl_int clret; + clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_PCI_BUS_INFO_KHR, sizeof(khrbusinfo), &khrbusinfo, NULL); + if (CL_SUCCESS == clret) { + *domain = (unsigned) khrbusinfo.pci_domain; + *bus = (unsigned) khrbusinfo.pci_bus; + *dev = (unsigned) khrbusinfo.pci_device; + *func = (unsigned) khrbusinfo.pci_function; + return 0; + } + clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); if (CL_SUCCESS == clret && HWLOC_CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD == amdtopo.raw.type) { diff --git a/src/3rdparty/hwloc/include/hwloc/plugins.h b/src/3rdparty/hwloc/include/hwloc/plugins.h index f3db648c..95e68195 100644 --- a/src/3rdparty/hwloc/include/hwloc/plugins.h +++ b/src/3rdparty/hwloc/include/hwloc/plugins.h @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2022 Inria. All rights reserved. + * Copyright © 2013-2024 Inria. All rights reserved. * Copyright © 2016 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ @@ -645,6 +645,19 @@ HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_find_parent_by_busid(struct hwloc_to */ HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); + +/** @} */ + + + + +/** \defgroup hwlocality_components_distances Components and Plugins: distances + * + * \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified. + * + * @{ + */ + /** \brief Handle to a new distances structure during its addition to the topology. */ typedef void * hwloc_backend_distances_add_handle_t; diff --git a/src/3rdparty/hwloc/include/hwloc/rename.h b/src/3rdparty/hwloc/include/hwloc/rename.h index d5687b69..fca397fc 100644 --- a/src/3rdparty/hwloc/include/hwloc/rename.h +++ b/src/3rdparty/hwloc/include/hwloc/rename.h @@ -1,6 +1,6 @@ /* * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright © 2010-2022 Inria. All rights reserved. + * Copyright © 2010-2024 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -210,6 +210,7 @@ extern "C" { #define hwloc_obj_get_info_by_name HWLOC_NAME(obj_get_info_by_name) #define hwloc_obj_add_info HWLOC_NAME(obj_add_info) +#define hwloc_obj_set_subtype HWLOC_NAME(obj_set_subtype) #define HWLOC_CPUBIND_PROCESS HWLOC_NAME_CAPS(CPUBIND_PROCESS) #define HWLOC_CPUBIND_THREAD HWLOC_NAME_CAPS(CPUBIND_THREAD) @@ -232,6 +233,7 @@ extern "C" { #define HWLOC_MEMBIND_FIRSTTOUCH HWLOC_NAME_CAPS(MEMBIND_FIRSTTOUCH) #define HWLOC_MEMBIND_BIND HWLOC_NAME_CAPS(MEMBIND_BIND) #define HWLOC_MEMBIND_INTERLEAVE HWLOC_NAME_CAPS(MEMBIND_INTERLEAVE) +#define HWLOC_MEMBIND_WEIGHTED_INTERLEAVE HWLOC_NAME_CAPS(MEMBIND_WEIGHTED_INTERLEAVE) #define HWLOC_MEMBIND_NEXTTOUCH HWLOC_NAME_CAPS(MEMBIND_NEXTTOUCH) #define HWLOC_MEMBIND_MIXED HWLOC_NAME_CAPS(MEMBIND_MIXED) @@ -560,6 +562,7 @@ extern "C" { /* opencl.h */ +#define hwloc_cl_device_pci_bus_info_khr HWLOC_NAME(cl_device_pci_bus_info_khr) #define hwloc_cl_device_topology_amd HWLOC_NAME(cl_device_topology_amd) #define hwloc_opencl_get_device_pci_busid HWLOC_NAME(opencl_get_device_pci_ids) #define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset) @@ -715,6 +718,8 @@ extern "C" { #define hwloc__obj_type_is_dcache HWLOC_NAME(_obj_type_is_dcache) #define hwloc__obj_type_is_icache HWLOC_NAME(_obj_type_is_icache) +#define hwloc__pci_link_speed HWLOC_NAME(_pci_link_speed) + /* private/cpuid-x86.h */ #define hwloc_have_x86_cpuid HWLOC_NAME(have_x86_cpuid) diff --git a/src/3rdparty/hwloc/include/private/autogen/config.h b/src/3rdparty/hwloc/include/private/autogen/config.h index 5bf22fac..3002ac5e 100644 --- a/src/3rdparty/hwloc/include/private/autogen/config.h +++ b/src/3rdparty/hwloc/include/private/autogen/config.h @@ -1,6 +1,6 @@ /* * Copyright © 2009, 2011, 2012 CNRS. All rights reserved. - * Copyright © 2009-2021 Inria. All rights reserved. + * Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009, 2011, 2012, 2015 Université Bordeaux. All rights reserved. * Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ @@ -17,6 +17,10 @@ #define HWLOC_HAVE_MSVC_CPUIDEX 1 +/* #undef HAVE_MKSTEMP */ + +#define HWLOC_HAVE_X86_CPUID 1 + /* Define to 1 if the system has the type `CACHE_DESCRIPTOR'. */ #define HAVE_CACHE_DESCRIPTOR 0 @@ -128,8 +132,7 @@ #define HAVE_DECL__SC_PAGE_SIZE 0 /* Define to 1 if you have the header file. */ -/* #define HAVE_DIRENT_H 1 */ -#undef HAVE_DIRENT_H +/* #undef HAVE_DIRENT_H */ /* Define to 1 if you have the header file. */ /* #undef HAVE_DLFCN_H */ @@ -282,7 +285,7 @@ #define HAVE_STRING_H 1 /* Define to 1 if you have the `strncasecmp' function. */ -#define HAVE_STRNCASECMP 1 +/* #undef HAVE_STRNCASECMP */ /* Define to '1' if sysctl is present and usable */ /* #undef HAVE_SYSCTL */ @@ -323,8 +326,7 @@ /* #undef HAVE_UNAME */ /* Define to 1 if you have the header file. */ -/* #define HAVE_UNISTD_H 1 */ -#undef HAVE_UNISTD_H +/* #undef HAVE_UNISTD_H */ /* Define to 1 if you have the `uselocale' function. */ /* #undef HAVE_USELOCALE */ @@ -659,7 +661,7 @@ #define hwloc_pid_t HANDLE /* Define this to either strncasecmp or strncmp */ -#define hwloc_strncasecmp strncasecmp +/* #undef hwloc_strncasecmp */ /* Define this to the thread ID type */ #define hwloc_thread_t HANDLE diff --git a/src/3rdparty/hwloc/include/private/cpuid-x86.h b/src/3rdparty/hwloc/include/private/cpuid-x86.h index 2758afe0..1f87fdac 100644 --- a/src/3rdparty/hwloc/include/private/cpuid-x86.h +++ b/src/3rdparty/hwloc/include/private/cpuid-x86.h @@ -11,6 +11,22 @@ #ifndef HWLOC_PRIVATE_CPUID_X86_H #define HWLOC_PRIVATE_CPUID_X86_H +/* A macro for annotating memory as uninitialized when building with MSAN + * (and otherwise having no effect). See below for why this is used with + * our custom assembly. + */ +#ifdef __has_feature +#define HWLOC_HAS_FEATURE(name) __has_feature(name) +#else +#define HWLOC_HAS_FEATURE(name) 0 +#endif +#if HWLOC_HAS_FEATURE(memory_sanitizer) || defined(MEMORY_SANITIZER) +#include +#define HWLOC_ANNOTATE_MEMORY_IS_INITIALIZED(ptr, len) __msan_unpoison(ptr, len) +#else +#define HWLOC_ANNOTATE_MEMORY_IS_INITIALIZED(ptr, len) +#endif + #if (defined HWLOC_X86_32_ARCH) && (!defined HWLOC_HAVE_MSVC_CPUIDEX) static __hwloc_inline int hwloc_have_x86_cpuid(void) { @@ -71,12 +87,18 @@ static __hwloc_inline void hwloc_x86_cpuid(unsigned *eax, unsigned *ebx, unsigne "movl %k2,%1\n\t" : "+a" (*eax), "=m" (*ebx), "=&r"(sav_rbx), "+c" (*ecx), "=&d" (*edx)); + /* MSAN does not recognize the effect of the above assembly on the memory operand + * (`"=m"(*ebx)`). This may get improved in MSAN at some point in the future, e.g. + * see https://github.com/llvm/llvm-project/pull/77393. */ + HWLOC_ANNOTATE_MEMORY_IS_INITIALIZED(ebx, sizeof *ebx); #elif defined(HWLOC_X86_32_ARCH) __asm__( "mov %%ebx,%1\n\t" "cpuid\n\t" "xchg %%ebx,%1\n\t" : "+a" (*eax), "=&SD" (*ebx), "+c" (*ecx), "=&d" (*edx)); + /* See above. */ + HWLOC_ANNOTATE_MEMORY_IS_INITIALIZED(ebx, sizeof *ebx); #else #error unknown architecture #endif diff --git a/src/3rdparty/hwloc/include/private/misc.h b/src/3rdparty/hwloc/include/private/misc.h index bc57e98e..b5ee196c 100644 --- a/src/3rdparty/hwloc/include/private/misc.h +++ b/src/3rdparty/hwloc/include/private/misc.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2019 Inria. All rights reserved. + * Copyright © 2009-2024 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -573,4 +573,35 @@ typedef SSIZE_T ssize_t; # endif #endif +static __inline float +hwloc__pci_link_speed(unsigned generation, unsigned lanes) +{ + float lanespeed; + /* + * These are single-direction bandwidths only. + * + * Gen1 used NRZ with 8/10 encoding. + * PCIe Gen1 = 2.5GT/s signal-rate per lane x 8/10 = 0.25GB/s data-rate per lane + * PCIe Gen2 = 5 GT/s signal-rate per lane x 8/10 = 0.5 GB/s data-rate per lane + * Gen3 switched to NRZ with 128/130 encoding. + * PCIe Gen3 = 8 GT/s signal-rate per lane x 128/130 = 1 GB/s data-rate per lane + * PCIe Gen4 = 16 GT/s signal-rate per lane x 128/130 = 2 GB/s data-rate per lane + * PCIe Gen5 = 32 GT/s signal-rate per lane x 128/130 = 4 GB/s data-rate per lane + * Gen6 switched to PAM with with 242/256 FLIT (242B payload protected by 8B CRC + 6B FEC). + * PCIe Gen6 = 64 GT/s signal-rate per lane x 242/256 = 8 GB/s data-rate per lane + * PCIe Gen7 = 128GT/s signal-rate per lane x 242/256 = 16 GB/s data-rate per lane + */ + + /* lanespeed in Gbit/s */ + if (generation <= 2) + lanespeed = 2.5f * generation * 0.8f; + else if (generation <= 5) + lanespeed = 8.0f * (1<<(generation-3)) * 128/130; + else + lanespeed = 8.0f * (1<<(generation-3)) * 242/256; /* assume Gen8 will be 256 GT/s and so on */ + + /* linkspeed in GB/s */ + return lanespeed * lanes / 8; +} + #endif /* HWLOC_PRIVATE_MISC_H */ diff --git a/src/3rdparty/hwloc/src/bind.c b/src/3rdparty/hwloc/src/bind.c index 2b5d0994..580580e7 100644 --- a/src/3rdparty/hwloc/src/bind.c +++ b/src/3rdparty/hwloc/src/bind.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2024 Inria. All rights reserved. * Copyright © 2009-2010, 2012 Université Bordeaux * Copyright © 2011-2015 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -287,6 +287,7 @@ static __hwloc_inline int hwloc__check_membind_policy(hwloc_membind_policy_t pol || policy == HWLOC_MEMBIND_FIRSTTOUCH || policy == HWLOC_MEMBIND_BIND || policy == HWLOC_MEMBIND_INTERLEAVE + || policy == HWLOC_MEMBIND_WEIGHTED_INTERLEAVE || policy == HWLOC_MEMBIND_NEXTTOUCH) return 0; return -1; diff --git a/src/3rdparty/hwloc/src/bitmap.c b/src/3rdparty/hwloc/src/bitmap.c index cf071edb..b7825b46 100644 --- a/src/3rdparty/hwloc/src/bitmap.c +++ b/src/3rdparty/hwloc/src/bitmap.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2024 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -245,6 +245,7 @@ int hwloc_bitmap_copy(struct hwloc_bitmap_s * dst, const struct hwloc_bitmap_s * /* Strings always use 32bit groups */ #define HWLOC_PRIxSUBBITMAP "%08lx" #define HWLOC_BITMAP_SUBSTRING_SIZE 32 +#define HWLOC_BITMAP_SUBSTRING_FULL_VALUE 0xFFFFFFFFUL #define HWLOC_BITMAP_SUBSTRING_LENGTH (HWLOC_BITMAP_SUBSTRING_SIZE/4) #define HWLOC_BITMAP_STRING_PER_LONG (HWLOC_BITS_PER_LONG/HWLOC_BITMAP_SUBSTRING_SIZE) @@ -261,6 +262,7 @@ int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const stru const unsigned long accum_mask = ~0UL; #else /* HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE */ const unsigned long accum_mask = ((1UL << HWLOC_BITMAP_SUBSTRING_SIZE) - 1) << (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE); + int merge_with_infinite_prefix = 0; #endif /* HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE */ HWLOC__BITMAP_CHECK(set); @@ -279,6 +281,9 @@ int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const stru res = size>0 ? (int)size - 1 : 0; tmp += res; size -= res; +#if HWLOC_BITS_PER_LONG > HWLOC_BITMAP_SUBSTRING_SIZE + merge_with_infinite_prefix = 1; +#endif } i=(int) set->ulongs_count-1; @@ -294,16 +299,24 @@ int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const stru } while (i>=0 || accumed) { + unsigned long value; + /* Refill accumulator */ if (!accumed) { accum = set->ulongs[i--]; accumed = HWLOC_BITS_PER_LONG; } + value = (accum & accum_mask) >> (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE); - if (accum & accum_mask) { +#if HWLOC_BITS_PER_LONG > HWLOC_BITMAP_SUBSTRING_SIZE + if (merge_with_infinite_prefix && value == HWLOC_BITMAP_SUBSTRING_FULL_VALUE) { + /* first full subbitmap merged with infinite prefix */ + res = 0; + } else +#endif + if (value) { /* print the whole subset if not empty */ - res = hwloc_snprintf(tmp, size, needcomma ? ",0x" HWLOC_PRIxSUBBITMAP : "0x" HWLOC_PRIxSUBBITMAP, - (accum & accum_mask) >> (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE)); + res = hwloc_snprintf(tmp, size, needcomma ? ",0x" HWLOC_PRIxSUBBITMAP : "0x" HWLOC_PRIxSUBBITMAP, value); needcomma = 1; } else if (i == -1 && accumed == HWLOC_BITMAP_SUBSTRING_SIZE) { /* print a single 0 to mark the last subset */ @@ -323,6 +336,7 @@ int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const stru #else accum <<= HWLOC_BITMAP_SUBSTRING_SIZE; accumed -= HWLOC_BITMAP_SUBSTRING_SIZE; + merge_with_infinite_prefix = 0; #endif if (res >= size) @@ -362,7 +376,8 @@ int hwloc_bitmap_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restric { const char * current = string; unsigned long accum = 0; - int count=0; + int count = 0; + int ulongcount; int infinite = 0; /* count how many substrings there are */ @@ -383,9 +398,20 @@ int hwloc_bitmap_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restric count--; } - if (hwloc_bitmap_reset_by_ulongs(set, (count + HWLOC_BITMAP_STRING_PER_LONG - 1) / HWLOC_BITMAP_STRING_PER_LONG) < 0) + ulongcount = (count + HWLOC_BITMAP_STRING_PER_LONG - 1) / HWLOC_BITMAP_STRING_PER_LONG; + if (hwloc_bitmap_reset_by_ulongs(set, ulongcount) < 0) return -1; - set->infinite = 0; + + set->infinite = 0; /* will be updated later */ + +#if HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE + if (infinite && (count % HWLOC_BITMAP_STRING_PER_LONG) != 0) { + /* accumulate substrings of the first ulong that are hidden in the infinite prefix */ + int i; + for(i = (count % HWLOC_BITMAP_STRING_PER_LONG); i < HWLOC_BITMAP_STRING_PER_LONG; i++) + accum |= (HWLOC_BITMAP_SUBSTRING_FULL_VALUE << (i*HWLOC_BITMAP_SUBSTRING_SIZE)); + } +#endif while (*current != '\0') { unsigned long val; @@ -544,6 +570,9 @@ int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, co ssize_t size = buflen; char *tmp = buf; int res, ret = 0; +#if HWLOC_BITS_PER_LONG == 64 + int merge_with_infinite_prefix = 0; +#endif int started = 0; int i; @@ -563,6 +592,9 @@ int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, co res = size>0 ? (int)size - 1 : 0; tmp += res; size -= res; +#if HWLOC_BITS_PER_LONG == 64 + merge_with_infinite_prefix = 1; +#endif } i=set->ulongs_count-1; @@ -582,7 +614,11 @@ int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, co if (started) { /* print the whole subset */ #if HWLOC_BITS_PER_LONG == 64 - res = hwloc_snprintf(tmp, size, "%016lx", val); + if (merge_with_infinite_prefix && (val & 0xffffffff00000000UL) == 0xffffffff00000000UL) { + res = hwloc_snprintf(tmp, size, "%08lx", val & 0xffffffffUL); + } else { + res = hwloc_snprintf(tmp, size, "%016lx", val); + } #else res = hwloc_snprintf(tmp, size, "%08lx", val); #endif @@ -599,6 +635,9 @@ int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, co res = size>0 ? (int)size - 1 : 0; tmp += res; size -= res; +#if HWLOC_BITS_PER_LONG == 64 + merge_with_infinite_prefix = 0; +#endif } /* if didn't display anything, display 0x0 */ @@ -679,6 +718,10 @@ int hwloc_bitmap_taskset_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc goto failed; set->ulongs[count-1] = val; + if (infinite && tmpchars != HWLOC_BITS_PER_LONG/4) { + /* infinite prefix with partial substring, fill remaining bits */ + set->ulongs[count-1] |= (~0ULL)<<(4*tmpchars); + } current += tmpchars; chars -= tmpchars; diff --git a/src/3rdparty/hwloc/src/cpukinds.c b/src/3rdparty/hwloc/src/cpukinds.c index 6c7c087f..321b12d4 100644 --- a/src/3rdparty/hwloc/src/cpukinds.c +++ b/src/3rdparty/hwloc/src/cpukinds.c @@ -1,5 +1,5 @@ /* - * Copyright © 2020-2022 Inria. All rights reserved. + * Copyright © 2020-2024 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -50,6 +50,7 @@ hwloc_internal_cpukinds_dup(hwloc_topology_t new, hwloc_topology_t old) return -1; new->cpukinds = kinds; new->nr_cpukinds = old->nr_cpukinds; + new->nr_cpukinds_allocated = old->nr_cpukinds; memcpy(kinds, old->cpukinds, old->nr_cpukinds * sizeof(*kinds)); for(i=0;inr_cpukinds; i++) { diff --git a/src/3rdparty/hwloc/src/distances.c b/src/3rdparty/hwloc/src/distances.c index bfc7d61d..6dab5113 100644 --- a/src/3rdparty/hwloc/src/distances.c +++ b/src/3rdparty/hwloc/src/distances.c @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2022 Inria. All rights reserved. + * Copyright © 2010-2024 Inria. All rights reserved. * Copyright © 2011-2012 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -624,8 +624,8 @@ void * hwloc_distances_add_create(hwloc_topology_t topology, return NULL; } if ((kind & ~HWLOC_DISTANCES_KIND_ALL) - || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) != 1 - || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) != 1) { + || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) > 1 + || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) > 1) { errno = EINVAL; return NULL; } diff --git a/src/3rdparty/hwloc/src/memattrs.c b/src/3rdparty/hwloc/src/memattrs.c index ab945471..112cbcf9 100644 --- a/src/3rdparty/hwloc/src/memattrs.c +++ b/src/3rdparty/hwloc/src/memattrs.c @@ -1,5 +1,5 @@ /* - * Copyright © 2020-2023 Inria. All rights reserved. + * Copyright © 2020-2024 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -14,13 +14,26 @@ */ static __hwloc_inline -hwloc_uint64_t hwloc__memattr_get_convenience_value(hwloc_memattr_id_t id, - hwloc_obj_t node) +int hwloc__memattr_get_convenience_value(hwloc_memattr_id_t id, + hwloc_obj_t node, + hwloc_uint64_t *valuep) { - if (id == HWLOC_MEMATTR_ID_CAPACITY) - return node->attr->numanode.local_memory; - else if (id == HWLOC_MEMATTR_ID_LOCALITY) - return hwloc_bitmap_weight(node->cpuset); + if (id == HWLOC_MEMATTR_ID_CAPACITY) { + if (node->type != HWLOC_OBJ_NUMANODE) { + errno = EINVAL; + return -1; + } + *valuep = node->attr->numanode.local_memory; + return 0; + } + else if (id == HWLOC_MEMATTR_ID_LOCALITY) { + if (!node->cpuset) { + errno = EINVAL; + return -1; + } + *valuep = hwloc_bitmap_weight(node->cpuset); + return 0; + } else assert(0); return 0; /* shut up the compiler */ @@ -622,7 +635,7 @@ hwloc_memattr_get_targets(hwloc_topology_t topology, if (foundiflags & HWLOC_IMATTR_FLAG_CONVENIENCE) { /* convenience attributes */ - *valuep = hwloc__memattr_get_convenience_value(id, target_node); - return 0; + return hwloc__memattr_get_convenience_value(id, target_node, valuep); } /* normal attributes */ @@ -936,7 +948,7 @@ hwloc_memattr_set_value(hwloc_topology_t topology, { struct hwloc_internal_location_s iloc, *ilocp; - if (flags) { + if (flags || !target_node) { errno = EINVAL; return -1; } @@ -1007,10 +1019,10 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology, /* convenience attributes */ for(j=0; ; j++) { hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, j); - hwloc_uint64_t value; + hwloc_uint64_t value = 0; if (!node) break; - value = hwloc__memattr_get_convenience_value(id, node); + hwloc__memattr_get_convenience_value(id, node, &value); hwloc__update_best_target(&best, &best_value, &found, node, value, imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST); @@ -1093,7 +1105,7 @@ hwloc_memattr_get_best_initiator(hwloc_topology_t topology, int found; unsigned i; - if (flags) { + if (flags || !target_node) { errno = EINVAL; return -1; } @@ -1806,6 +1818,12 @@ hwloc__apply_memory_tiers_subtypes(hwloc_topology_t topology, } } } + if (nr_tiers > 1) { + hwloc_obj_t root = hwloc_get_root_obj(topology); + char tmp[20]; + snprintf(tmp, sizeof(tmp), "%u", nr_tiers); + hwloc__add_info_nodup(&root->infos, &root->infos_count, "MemoryTiersNr", tmp, 1); + } } int diff --git a/src/3rdparty/hwloc/src/pci-common.c b/src/3rdparty/hwloc/src/pci-common.c index b5a4b544..feb1834d 100644 --- a/src/3rdparty/hwloc/src/pci-common.c +++ b/src/3rdparty/hwloc/src/pci-common.c @@ -1,5 +1,5 @@ /* - * Copyright © 2009-2022 Inria. All rights reserved. + * Copyright © 2009-2024 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -886,36 +886,12 @@ hwloc_pcidisc_find_linkspeed(const unsigned char *config, unsigned offset, float *linkspeed) { unsigned linksta, speed, width; - float lanespeed; memcpy(&linksta, &config[offset + HWLOC_PCI_EXP_LNKSTA], 4); speed = linksta & HWLOC_PCI_EXP_LNKSTA_SPEED; /* PCIe generation */ width = (linksta & HWLOC_PCI_EXP_LNKSTA_WIDTH) >> 4; /* how many lanes */ - /* - * These are single-direction bandwidths only. - * - * Gen1 used NRZ with 8/10 encoding. - * PCIe Gen1 = 2.5GT/s signal-rate per lane x 8/10 = 0.25GB/s data-rate per lane - * PCIe Gen2 = 5 GT/s signal-rate per lane x 8/10 = 0.5 GB/s data-rate per lane - * Gen3 switched to NRZ with 128/130 encoding. - * PCIe Gen3 = 8 GT/s signal-rate per lane x 128/130 = 1 GB/s data-rate per lane - * PCIe Gen4 = 16 GT/s signal-rate per lane x 128/130 = 2 GB/s data-rate per lane - * PCIe Gen5 = 32 GT/s signal-rate per lane x 128/130 = 4 GB/s data-rate per lane - * Gen6 switched to PAM with with 242/256 FLIT (242B payload protected by 8B CRC + 6B FEC). - * PCIe Gen6 = 64 GT/s signal-rate per lane x 242/256 = 8 GB/s data-rate per lane - * PCIe Gen7 = 128GT/s signal-rate per lane x 242/256 = 16 GB/s data-rate per lane - */ - /* lanespeed in Gbit/s */ - if (speed <= 2) - lanespeed = 2.5f * speed * 0.8f; - else if (speed <= 5) - lanespeed = 8.0f * (1<<(speed-3)) * 128/130; - else - lanespeed = 8.0f * (1<<(speed-3)) * 242/256; /* assume Gen8 will be 256 GT/s and so on */ - - /* linkspeed in GB/s */ - *linkspeed = lanespeed * width / 8; + *linkspeed = hwloc__pci_link_speed(speed, width); return 0; } diff --git a/src/3rdparty/hwloc/src/topology-windows.c b/src/3rdparty/hwloc/src/topology-windows.c index e187bb12..a8d6e014 100644 --- a/src/3rdparty/hwloc/src/topology-windows.c +++ b/src/3rdparty/hwloc/src/topology-windows.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2023 Inria. All rights reserved. + * Copyright © 2009-2024 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -220,7 +220,7 @@ static void hwloc_win_get_function_ptrs(void) #pragma GCC diagnostic ignored "-Wcast-function-type" #endif - kernel32 = LoadLibrary("kernel32.dll"); + kernel32 = LoadLibrary(TEXT("kernel32.dll")); if (kernel32) { GetActiveProcessorGroupCountProc = (PFN_GETACTIVEPROCESSORGROUPCOUNT) GetProcAddress(kernel32, "GetActiveProcessorGroupCount"); @@ -249,12 +249,12 @@ static void hwloc_win_get_function_ptrs(void) } if (!QueryWorkingSetExProc) { - HMODULE psapi = LoadLibrary("psapi.dll"); + HMODULE psapi = LoadLibrary(TEXT("psapi.dll")); if (psapi) QueryWorkingSetExProc = (PFN_QUERYWORKINGSETEX) GetProcAddress(psapi, "QueryWorkingSetEx"); } - ntdll = GetModuleHandle("ntdll"); + ntdll = GetModuleHandle(TEXT("ntdll")); RtlGetVersionProc = (PFN_RTLGETVERSION) GetProcAddress(ntdll, "RtlGetVersion"); #if HWLOC_HAVE_GCC_W_CAST_FUNCTION_TYPE diff --git a/src/3rdparty/hwloc/src/topology-x86.c b/src/3rdparty/hwloc/src/topology-x86.c index 7aabd168..22f65843 100644 --- a/src/3rdparty/hwloc/src/topology-x86.c +++ b/src/3rdparty/hwloc/src/topology-x86.c @@ -1,11 +1,11 @@ /* - * Copyright © 2010-2023 Inria. All rights reserved. + * Copyright © 2010-2024 Inria. All rights reserved. * Copyright © 2010-2013 Université Bordeaux * Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. * * - * This backend is only used when the operating system does not export + * This backend is mostly used when the operating system does not export * the necessary hardware topology information to user-space applications. * Currently, FreeBSD and NetBSD only add PUs and then fallback to this * backend for CPU/Cache discovery. @@ -15,6 +15,7 @@ * on various architectures, without having to use this x86-specific code. * But this backend is still used after them to annotate some objects with * additional details (CPU info in Package, Inclusiveness in Caches). + * It may also be enabled manually to work-around bugs in native OS discovery. */ #include "private/autogen/config.h" @@ -487,7 +488,7 @@ static void read_amd_cores_legacy(struct procinfo *infos, struct cpuiddump *src_ } /* AMD unit/node from CPUID 0x8000001e leaf (topoext) */ -static void read_amd_cores_topoext(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned long flags, struct cpuiddump *src_cpuiddump) +static void read_amd_cores_topoext(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned long flags __hwloc_attribute_unused, struct cpuiddump *src_cpuiddump) { unsigned apic_id, nodes_per_proc = 0; unsigned eax, ebx, ecx, edx; @@ -496,7 +497,6 @@ static void read_amd_cores_topoext(struct hwloc_x86_backend_data_s *data, struct cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); infos->apicid = apic_id = eax; - if (flags & HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES) { if (infos->cpufamilynumber == 0x16) { /* ecx is reserved */ infos->ids[NODE] = 0; @@ -511,7 +511,6 @@ static void read_amd_cores_topoext(struct hwloc_x86_backend_data_s *data, struct || (infos->cpufamilynumber == 0x19 && nodes_per_proc > 1)) { hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc); } - } if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */ unsigned cores_per_unit; @@ -533,9 +532,9 @@ static void read_amd_cores_topoext(struct hwloc_x86_backend_data_s *data, struct } /* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration) - * or AMD complex/ccd from CPUID 0x80000026 (extended CPU topology) + * or AMD core/thread or even complex/ccd from CPUID 0x0b or 0x80000026 (extended CPU topology) */ -static void read_extended_topo(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned leaf, enum cpuid_type cpuid_type, struct cpuiddump *src_cpuiddump) +static void read_extended_topo(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned leaf, enum cpuid_type cpuid_type __hwloc_attribute_unused, struct cpuiddump *src_cpuiddump) { unsigned level, apic_nextshift, apic_type, apic_id = 0, apic_shift = 0, id; unsigned threadid __hwloc_attribute_unused = 0; /* shut-up compiler */ @@ -547,20 +546,15 @@ static void read_extended_topo(struct hwloc_x86_backend_data_s *data, struct pro eax = leaf; cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); /* Intel specifies that the 0x0b/0x1f loop should stop when we get "invalid domain" (0 in ecx[8:15]) - * (if so, we also get 0 in eax/ebx for invalid subleaves). + * (if so, we also get 0 in eax/ebx for invalid subleaves). Zhaoxin implements this too. * However AMD rather says that the 0x80000026/0x0b loop should stop when we get "no thread at this level" (0 in ebx[0:15]). - * Zhaoxin follows the Intel specs but also returns "no thread at this level" for the last *valid* level (at least on KH-4000). - * From the Linux kernel code, it's very likely that AMD also returns "invalid domain" - * (because detect_extended_topology() uses that for all x86 CPUs) - * but keep with the official doc until AMD can clarify that (see #593). + * + * Linux kernel <= 6.8 used "invalid domain" for both Intel and AMD (in detect_extended_topology()) + * but x86 discovery revamp in 6.9 now properly checks both Intel and AMD conditions (in topo_subleaf()). + * So let's assume we are allowed to break-out once one of the Intel+AMD conditions is met. */ - if (cpuid_type == amd) { - if (!(ebx & 0xffff)) - break; - } else { - if (!(ecx & 0xff00)) - break; - } + if (!(ebx & 0xffff) || !(ecx & 0xff00)) + break; apic_packageshift = eax & 0x1f; } @@ -572,13 +566,8 @@ static void read_extended_topo(struct hwloc_x86_backend_data_s *data, struct pro ecx = level; eax = leaf; cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); - if (cpuid_type == amd) { - if (!(ebx & 0xffff)) - break; - } else { - if (!(ecx & 0xff00)) - break; - } + if (!(ebx & 0xffff) || !(ecx & 0xff00)) + break; apic_nextshift = eax & 0x1f; apic_type = (ecx & 0xff00) >> 8; apic_id = edx; @@ -1825,7 +1814,7 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s goto out_with_path; } fclose(file); - if (strcmp(line, "Architecture: x86\n")) { + if (strncmp(line, "Architecture: x86", 17)) { fprintf(stderr, "hwloc/x86: Found non-x86 dumped cpuid summary in %s: %s\n", path, line); goto out_with_path; } diff --git a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c index 8ea5e385..e59738a9 100644 --- a/src/3rdparty/hwloc/src/topology-xml-nolibxml.c +++ b/src/3rdparty/hwloc/src/topology-xml-nolibxml.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2024 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -41,7 +41,7 @@ typedef struct hwloc__nolibxml_import_state_data_s { static char * hwloc__nolibxml_import_ignore_spaces(char *buffer) { - return buffer + strspn(buffer, " \t\n"); + return buffer + strspn(buffer, " \t\n\r"); } static int diff --git a/src/3rdparty/hwloc/src/topology-xml.c b/src/3rdparty/hwloc/src/topology-xml.c index 70006f63..67c62349 100644 --- a/src/3rdparty/hwloc/src/topology-xml.c +++ b/src/3rdparty/hwloc/src/topology-xml.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2023 Inria. All rights reserved. + * Copyright © 2009-2024 Inria. All rights reserved. * Copyright © 2009-2011, 2020 Université Bordeaux * Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -872,6 +872,10 @@ hwloc__xml_import_object(hwloc_topology_t topology, /* deal with possible future type */ obj->type = HWLOC_OBJ_GROUP; obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_MODULE; + } else if (!strcasecmp(attrvalue, "Cluster")) { + /* deal with possible future type */ + obj->type = HWLOC_OBJ_GROUP; + obj->attr->group.kind = HWLOC_GROUP_KIND_LINUX_CLUSTER; } else if (!strcasecmp(attrvalue, "MemCache")) { /* ignore possible future type */ obj->type = _HWLOC_OBJ_FUTURE; @@ -1344,7 +1348,7 @@ hwloc__xml_v2import_support(hwloc_topology_t topology, HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_support) == 4*sizeof(void*)); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_discovery_support) == 6); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_cpubind_support) == 11); - HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 15); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 16); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_misc_support) == 1); #endif @@ -1378,6 +1382,7 @@ hwloc__xml_v2import_support(hwloc_topology_t topology, else DO(membind,firsttouch_membind); else DO(membind,bind_membind); else DO(membind,interleave_membind); + else DO(membind,weighted_interleave_membind); else DO(membind,nexttouch_membind); else DO(membind,migrate_membind); else DO(membind,get_area_memlocation); @@ -1436,6 +1441,10 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, } else if (!strcmp(attrname, "kind")) { kind = strtoul(attrvalue, NULL, 10); + /* forward compat with "HOPS" kind in v3 */ + if (kind & (1UL<<5)) + /* hops becomes latency */ + kind = (kind & ~(1UL<<5)) | HWLOC_DISTANCES_KIND_MEANS_LATENCY; } else if (!strcmp(attrname, "name")) { name = attrvalue; @@ -3087,7 +3096,7 @@ hwloc__xml_v2export_support(hwloc__xml_export_state_t parentstate, hwloc_topolog HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_support) == 4*sizeof(void*)); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_discovery_support) == 6); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_cpubind_support) == 11); - HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 15); + HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 16); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_misc_support) == 1); #endif @@ -3132,6 +3141,7 @@ hwloc__xml_v2export_support(hwloc__xml_export_state_t parentstate, hwloc_topolog DO(membind,firsttouch_membind); DO(membind,bind_membind); DO(membind,interleave_membind); + DO(membind,weighted_interleave_membind); DO(membind,nexttouch_membind); DO(membind,migrate_membind); DO(membind,get_area_memlocation); diff --git a/src/3rdparty/hwloc/src/topology.c b/src/3rdparty/hwloc/src/topology.c index 9dc2b07c..305f807a 100644 --- a/src/3rdparty/hwloc/src/topology.c +++ b/src/3rdparty/hwloc/src/topology.c @@ -465,6 +465,20 @@ hwloc_debug_print_objects(int indent __hwloc_attribute_unused, hwloc_obj_t obj) #define hwloc_debug_print_objects(indent, obj) do { /* nothing */ } while (0) #endif /* !HWLOC_DEBUG */ +int hwloc_obj_set_subtype(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, const char *subtype) +{ + char *new = NULL; + if (subtype) { + new = strdup(subtype); + if (!new) + return -1; + } + if (obj->subtype) + free(obj->subtype); + obj->subtype = new; + return 0; +} + void hwloc__free_infos(struct hwloc_info_s *infos, unsigned count) { unsigned i;