diff --git a/src/3rdparty/hwloc/NEWS b/src/3rdparty/hwloc/NEWS index 0ec17bb6..0bf74d44 100644 --- a/src/3rdparty/hwloc/NEWS +++ b/src/3rdparty/hwloc/NEWS @@ -1,5 +1,5 @@ Copyright © 2009 CNRS -Copyright © 2009-2020 Inria. All rights reserved. +Copyright © 2009-2021 Inria. All rights reserved. Copyright © 2009-2013 Université Bordeaux Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. Copyright © 2020 Hewlett Packard Enterprise. All rights reserved. @@ -17,6 +17,76 @@ bug fixes (and other actions) for each version of hwloc since version 0.9. +Version 2.5.0 +------------- +* API + + Add hwloc/windows.h to query Windows processor groups. + + Add hwloc_get_obj_with_same_locality() to convert between objects + with same locality, for instance NUMA nodes and Packages, + or OS devices within a PCI device. + + Add hwloc_distances_transform() to modify distances structures. + - hwloc-annotate and lstopo have new distances-transform options. + + hwloc_distances_add() is replaced with _add_create() followed by + _add_values() and _add_commit(). See hwloc/distances.h for details. + + Add topology flags to mitigate binding modifications during + hwloc discovery, especially on Windows: + - HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING and _MEMBINDING + restrict discovery to PUs and NUMA nodes inside the binding. + - HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING prevents from ever + changing the binding during discovery. +* Backends + + Add a levelzero backend for oneAPI L0 devices, exposed as OS devices + of subtype "LevelZero" and name such as "ze0". + - Add hwloc/levelzero.h for interoperability between converting + between L0 API devices and hwloc cpusets or OS devices. + + Expose NEC Vector Engine cards on Linux as OS devices of subtype + "VectorEngine" and name "ve0", etc. + Thanks to Anara Kozhokanova, Tim Cramer and Erich Focht for the help. + + Add a NVLinkBandwidth distances structure between NVIDIA GPUs + (and POWER processor or NVSwitches) in the NVML backend, + and a XGMIBandwidth distances structure between AMD GPUs + in the RSMI backends. + - See "Topology Attributes: Distances, Memory Attributes and CPU Kinds" + in the documentation for details about these new distances. + + Add support for NUMA node 0 being offline in Linux, thanks to Jirka Hladky. +* Build + + Add --with-cuda-version= or look at the CUDA_VERSION + environment variable to find the appropriate CUDA pkg-config files. + Thanks to Stephen Herbein for the suggestion. + - Also add --with-cuda= to specify the CUDA installation path + manually (and its NVML and OpenCL components). + Thanks to Andrea Bocci for the suggestion. + - See "How do I enable CUDA and select which CUDA version to use?" + in the FAQ for details. +* Tools + + lstopo now has a --windows-processor-groups option on Windows. + + hwloc-ps now has a --short-name option to avoid long/truncated + command path. + + hwloc-ps now has a --single-ancestor option to return a single + (possibly too large) object where a process is bound. + + hwloc-ps --pid-cmd may now query environment variables, + including MPI-specific variables to find out process ranks. + + +Version 2.4.1 +------------- +* Fix AMD OpenCL device locality when PCI bus or device number >= 128. + Thanks to Edgar Leon for reporting the issue. + + Applications using any of the following inline functions must + be recompiled to get the fix: hwloc_opencl_get_device_pci_busid() + hwloc_opencl_get_device_cpuset(), hwloc_opencl_get_device_osdev(). +* Fix the ranking of cpukinds on non-Windows systems, + thanks to Ivan Kochin for the report. +* Fix the insertion of custom Groups after loading the topology, + thanks to Scott Hicks. +* Add support for CPU0 being offline in Linux, thanks to Garrett Clay. +* Fix missing x86 Package and Core objects FreeBSD/NetBSD. + Thanks to Thibault Payet and Yuri Victorovich for the report. +* Fix the import of very large distances with heterogeneous object types. +* Fix a memory leak in the Linux backend, + thanks to Perceval Anichini. + + Version 2.4.0 ------------- * API diff --git a/src/3rdparty/hwloc/VERSION b/src/3rdparty/hwloc/VERSION index 979c2cc8..a74f0a53 100644 --- a/src/3rdparty/hwloc/VERSION +++ b/src/3rdparty/hwloc/VERSION @@ -8,7 +8,7 @@ # Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too. major=2 -minor=4 +minor=5 release=0 # greek is used for alpha or beta release tags. If it is non-empty, @@ -22,7 +22,7 @@ greek= # The date when this release was created -date="Nov 26, 2020" +date="Jun 14, 2021" # If snapshot=1, then use the value from snapshot_version as the # entire hwloc version (i.e., ignore major, minor, release, and @@ -41,7 +41,7 @@ snapshot_version=${major}.${minor}.${release}${greek}-git # 2. Version numbers are described in the Libtool current:revision:age # format. -libhwloc_so_version=19:0:4 +libhwloc_so_version=20:0:5 libnetloc_so_version=0:0:0 # Please also update the lines in contrib/windows/libhwloc.vcxproj diff --git a/src/3rdparty/hwloc/include/hwloc.h b/src/3rdparty/hwloc/include/hwloc.h index 261626f4..88fac968 100644 --- a/src/3rdparty/hwloc/include/hwloc.h +++ b/src/3rdparty/hwloc/include/hwloc.h @@ -93,7 +93,7 @@ extern "C" { * Two stable releases of the same series usually have the same ::HWLOC_API_VERSION * even if their HWLOC_VERSION are different. */ -#define HWLOC_API_VERSION 0x00020400 +#define HWLOC_API_VERSION 0x00020500 /** \brief Indicate at runtime which hwloc API version was used at build time. * @@ -1966,7 +1966,69 @@ enum hwloc_topology_flags_e { * hwloc and machine support. * */ - HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT = (1UL<<3) + HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT = (1UL<<3), + + /** \brief Do not consider resources outside of the process CPU binding. + * + * If the binding of the process is limited to a subset of cores, + * ignore the other cores during discovery. + * + * The resulting topology is identical to what a call to hwloc_topology_restrict() + * would generate, but this flag also prevents hwloc from ever touching other + * resources during the discovery. + * + * This flag especially tells the x86 backend to never temporarily + * rebind a thread on any excluded core. This is useful on Windows + * because such temporary rebinding can change the process binding. + * Another use-case is to avoid cores that would not be able to + * perform the hwloc discovery anytime soon because they are busy + * executing some high-priority real-time tasks. + * + * If process CPU binding is not supported, + * the thread CPU binding is considered instead if supported, + * or the flag is ignored. + * + * This flag requires ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM as well + * since binding support is required. + */ + HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING = (1UL<<4), + + /** \brief Do not consider resources outside of the process memory binding. + * + * If the binding of the process is limited to a subset of NUMA nodes, + * ignore the other NUMA nodes during discovery. + * + * The resulting topology is identical to what a call to hwloc_topology_restrict() + * would generate, but this flag also prevents hwloc from ever touching other + * resources during the discovery. + * + * This flag is meant to be used together with + * ::HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING when both cores + * and NUMA nodes should be ignored outside of the process binding. + * + * If process memory binding is not supported, + * the thread memory binding is considered instead if supported, + * or the flag is ignored. + * + * This flag requires ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM as well + * since binding support is required. + */ + HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING = (1UL<<5), + + /** \brief Do not ever modify the process or thread binding during discovery. + * + * This flag disables all hwloc discovery steps that require a change of + * the process or thread binding. This currently only affects the x86 + * backend which gets entirely disabled. + * + * This is useful when hwloc_topology_load() is called while the + * application also creates additional threads or modifies the binding. + * + * This flag is also a strict way to make sure the process binding will + * not change to due thread binding changes on Windows + * (see ::HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING). + */ + HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING = (1UL<<6) }; /** \brief Set OR'ed flags to non-yet-loaded topology. diff --git a/src/3rdparty/hwloc/include/hwloc/autogen/config.h b/src/3rdparty/hwloc/include/hwloc/autogen/config.h index e490466b..eb70ba49 100644 --- a/src/3rdparty/hwloc/include/hwloc/autogen/config.h +++ b/src/3rdparty/hwloc/include/hwloc/autogen/config.h @@ -11,10 +11,10 @@ #ifndef HWLOC_CONFIG_H #define HWLOC_CONFIG_H -#define HWLOC_VERSION "2.4.1" +#define HWLOC_VERSION "2.5.0" #define HWLOC_VERSION_MAJOR 2 -#define HWLOC_VERSION_MINOR 4 -#define HWLOC_VERSION_RELEASE 1 +#define HWLOC_VERSION_MINOR 5 +#define HWLOC_VERSION_RELEASE 0 #define HWLOC_VERSION_GREEK "" #define __hwloc_restrict diff --git a/src/3rdparty/hwloc/include/hwloc/cuda.h b/src/3rdparty/hwloc/include/hwloc/cuda.h index 582270d1..72fb8ccb 100644 --- a/src/3rdparty/hwloc/include/hwloc/cuda.h +++ b/src/3rdparty/hwloc/include/hwloc/cuda.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2020 Inria. All rights reserved. + * Copyright © 2010-2021 Inria. All rights reserved. * Copyright © 2010-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -75,7 +75,7 @@ hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused /** \brief Get the CPU set of processors that are physically * close to device \p cudevice. * - * Return the CPU set describing the locality of the CUDA device \p cudevice. + * Store in \p set the CPU-set describing the locality of the CUDA device \p cudevice. * * Topology \p topology and device \p cudevice must match the local machine. * I/O devices detection and the CUDA component are not needed in the topology. @@ -120,8 +120,8 @@ hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, /** \brief Get the hwloc PCI device object corresponding to the * CUDA device \p cudevice. * - * Return the PCI device object describing the CUDA device \p cudevice. - * Return NULL if there is none. + * \return The hwloc PCI device object describing the CUDA device \p cudevice. + * \return \c NULL if none could be found. * * Topology \p topology and device \p cudevice must match the local machine. * I/O devices detection must be enabled in topology \p topology. @@ -140,8 +140,8 @@ hwloc_cuda_get_device_pcidev(hwloc_topology_t topology, CUdevice cudevice) /** \brief Get the hwloc OS device object corresponding to CUDA device \p cudevice. * - * Return the hwloc OS device object that describes the given - * CUDA device \p cudevice. Return NULL if there is none. + * \return The hwloc OS device object that describes the given CUDA device \p cudevice. + * \return \c NULL if none could be found. * * Topology \p topology and device \p cudevice must match the local machine. * I/O devices detection and the CUDA component must be enabled in the topology. @@ -183,8 +183,8 @@ hwloc_cuda_get_device_osdev(hwloc_topology_t topology, CUdevice cudevice) /** \brief Get the hwloc OS device object corresponding to the * CUDA device whose index is \p idx. * - * Return the OS device object describing the CUDA device whose - * index is \p idx. Return NULL if there is none. + * \return The hwloc OS device object describing the CUDA device whose index is \p idx. + * \return \c NULL if none could be found. * * The topology \p topology does not necessarily have to match the current * machine. For instance the topology may be an XML import of a remote host. diff --git a/src/3rdparty/hwloc/include/hwloc/cudart.h b/src/3rdparty/hwloc/include/hwloc/cudart.h index 059727ae..676cffec 100644 --- a/src/3rdparty/hwloc/include/hwloc/cudart.h +++ b/src/3rdparty/hwloc/include/hwloc/cudart.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2020 Inria. All rights reserved. + * Copyright © 2010-2021 Inria. All rights reserved. * Copyright © 2010-2011 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -72,7 +72,7 @@ hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unus /** \brief Get the CPU set of processors that are physically * close to device \p idx. * - * Return the CPU set describing the locality of the CUDA device + * Store in \p set the CPU-set describing the locality of the CUDA device * whose index is \p idx. * * Topology \p topology and device \p idx must match the local machine. @@ -117,8 +117,8 @@ hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unuse /** \brief Get the hwloc PCI device object corresponding to the * CUDA device whose index is \p idx. * - * Return the PCI device object describing the CUDA device whose - * index is \p idx. Return NULL if there is none. + * \return The hwloc PCI device object describing the CUDA device whose index is \p idx. + * \return \c NULL if none could be found. * * Topology \p topology and device \p idx must match the local machine. * I/O devices detection must be enabled in topology \p topology. @@ -138,8 +138,8 @@ hwloc_cudart_get_device_pcidev(hwloc_topology_t topology, int idx) /** \brief Get the hwloc OS device object corresponding to the * CUDA device whose index is \p idx. * - * Return the OS device object describing the CUDA device whose - * index is \p idx. Return NULL if there is none. + * \return The hwloc OS device object describing the CUDA device whose index is \p idx. + * \return \c NULL if none could be found. * * The topology \p topology does not necessarily have to match the current * machine. For instance the topology may be an XML import of a remote host. diff --git a/src/3rdparty/hwloc/include/hwloc/deprecated.h b/src/3rdparty/hwloc/include/hwloc/deprecated.h index 4a231f50..f2419dd4 100644 --- a/src/3rdparty/hwloc/include/hwloc/deprecated.h +++ b/src/3rdparty/hwloc/include/hwloc/deprecated.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2021 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -30,6 +30,15 @@ extern "C" { /* backward compat with v1.10 before Node->NUMANode clarification */ #define HWLOC_OBJ_NODE HWLOC_OBJ_NUMANODE +/** \brief Add a distances structure. + * + * Superseded by hwloc_distances_add_create()+hwloc_distances_add_values()+hwloc_distances_add_commit() + * in v2.5. + */ +HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology, + unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values, + unsigned long kind, unsigned long flags) __hwloc_attribute_deprecated; + /** \brief Insert a misc object by parent. * * Identical to hwloc_topology_insert_misc_object(). diff --git a/src/3rdparty/hwloc/include/hwloc/distances.h b/src/3rdparty/hwloc/include/hwloc/distances.h index 57e53cd5..6eac94e9 100644 --- a/src/3rdparty/hwloc/include/hwloc/distances.h +++ b/src/3rdparty/hwloc/include/hwloc/distances.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2020 Inria. All rights reserved. + * Copyright © 2010-2021 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -35,9 +35,19 @@ extern "C" { * from a core in another node. * The corresponding kind is ::HWLOC_DISTANCES_KIND_FROM_OS | ::HWLOC_DISTANCES_KIND_FROM_USER. * The name of this distances structure is "NUMALatency". + * Others distance structures include and "XGMIBandwidth" and "NVLinkBandwidth". * * The matrix may also contain bandwidths between random sets of objects, * possibly provided by the user, as specified in the \p kind attribute. + * + * Pointers \p objs and \p values should not be replaced, reallocated, freed, etc. + * However callers are allowed to modify \p kind as well as the contents + * of \p objs and \p values arrays. + * For instance, if there is a single NUMA node per Package, + * hwloc_get_obj_with_same_locality() may be used to convert between them + * and replace NUMA nodes in the \p objs array with the corresponding Packages. + * See also hwloc_distances_transform() for applying some transformations + * to the structure. */ struct hwloc_distances_s { unsigned nbobjs; /**< \brief Number of objects described by the distance matrix. */ @@ -91,6 +101,8 @@ enum hwloc_distances_kind_e { HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3), /** \brief This distances structure covers objects of different types. + * This may apply to the "NVLinkBandwidth" structure in presence + * of a NVSwitch or POWER processor NVLink port. * \hideinitializer */ HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES = (1UL<<4) @@ -147,6 +159,7 @@ hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, * Usually only one distances structure may match a given name. * * The name of the most common structure is "NUMALatency". + * Others include "XGMIBandwidth" and "NVLinkBandwidth". */ HWLOC_DECLSPEC int hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name, @@ -168,6 +181,85 @@ hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *di HWLOC_DECLSPEC void hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances); +/** \brief Transformations of distances structures. */ +enum hwloc_distances_transform_e { + /** \brief Remove \c NULL objects from the distances structure. + * + * Every object that was replaced with \c NULL in the \p objs array + * is removed and the \p values array is updated accordingly. + * + * At least \c 2 objects must remain, otherwise hwloc_distances_transform() + * will return \c -1 with \p errno set to \c EINVAL. + * + * \p kind will be updated with or without ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES + * according to the remaining objects. + * + * \hideinitializer + */ + HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL = 0, + + /** \brief Replace bandwidth values with a number of links. + * + * Usually all values will be either \c 0 (no link) or \c 1 (one link). + * However some matrices could get larger values if some pairs of + * peers are connected by different numbers of links. + * + * Values on the diagonal are set to \c 0. + * + * This transformation only applies to bandwidth matrices. + * + * \hideinitializer + */ + HWLOC_DISTANCES_TRANSFORM_LINKS = 1, + + /** \brief Merge switches with multiple ports into a single object. + * This currently only applies to NVSwitches where GPUs seem connected to different + * separate switch ports in the NVLinkBandwidth matrix. This transformation will + * replace all of them with the same port connected to all GPUs. + * Other ports are removed by applying ::HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL internally. + * \hideinitializer + */ + HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS = 2, + + /** \brief Apply a transitive closure to the matrix to connect objects across switches. + * This currently only applies to GPUs and NVSwitches in the NVLinkBandwidth matrix. + * All pairs of GPUs will be reported as directly connected. + * \hideinitializer + */ + HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE = 3 +}; + +/** \brief Apply a transformation to a distances structure. + * + * Modify a distances structure that was previously obtained with + * hwloc_distances_get() or one of its variants. + * + * This modifies the local copy of the distances structures but does + * not modify the distances information stored inside the topology + * (retrieved by another call to hwloc_distances_get() or exported to XML). + * To do so, one should add a new distances structure with same + * name, kind, objects and values (see \ref hwlocality_distances_add) + * and then remove this old one with hwloc_distances_release_remove(). + * + * \p transform must be one of the transformations listed + * in ::hwloc_distances_transform_e. + * + * These transformations may modify the contents of the \p objs or \p values arrays. + * + * \p transform_attr must be \c NULL for now. + * + * \p flags must be \c 0 for now. + * + * \note Objects in distances array \p objs may be directly modified + * in place without using hwloc_distances_transform(). + * One may use hwloc_get_obj_with_same_locality() to easily convert + * between similar objects of different types. + */ +HWLOC_DECLSPEC int hwloc_distances_transform(hwloc_topology_t topology, struct hwloc_distances_s *distances, + enum hwloc_distances_transform_e transform, + void *transform_attr, + unsigned long flags); + /** @} */ @@ -215,13 +307,84 @@ hwloc_distances_obj_pair_values(struct hwloc_distances_s *distances, -/** \defgroup hwlocality_distances_add Add or remove distances between objects +/** \defgroup hwlocality_distances_add Add distances between objects + * + * The usual way to add distances is: + * \code + * hwloc_distances_add_handle_t handle; + * int err = -1; + * handle = hwloc_distances_add_create(topology, "name", kind, 0); + * if (handle) { + * err = hwloc_distances_add_values(topology, handle, nbobjs, objs, values, 0); + * if (!err) + * err = hwloc_distances_add_commit(topology, handle, flags); + * } + * \endcode + * If \p err is \c 0 at the end, then addition was successful. + * * @{ */ +/** \brief Handle to a new distances structure during its addition to the topology. */ +typedef void * hwloc_distances_add_handle_t; + +/** \brief Create a new empty distances structure. + * + * Create an empty distances structure + * to be filled with hwloc_distances_add_values() + * and then committed with hwloc_distances_add_commit(). + * + * Parameter \p name is optional, it may be \c NULL. + * Otherwise, it will be copied internally and may later be freed by the caller. + * + * \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e. + * Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically set + * according to objects having different types in hwloc_distances_add_values(). + * + * \p flags must be \c 0 for now. + * + * \return A hwloc_distances_add_handle_t that should then be passed + * to hwloc_distances_add_values() and hwloc_distances_add_commit(). + * + * \return \c NULL on error. + */ +HWLOC_DECLSPEC hwloc_distances_add_handle_t +hwloc_distances_add_create(hwloc_topology_t topology, + const char *name, unsigned long kind, + unsigned long flags); + +/** \brief Specify the objects and values in a new empty distances structure. + * + * Specify the objects and values for a new distances structure + * that was returned as a handle by hwloc_distances_add_create(). + * The structure must then be committed with hwloc_distances_add_commit(). + * + * The number of objects is \p nbobjs and the array of objects is \p objs. + * Distance values are stored as a one-dimension array in \p values. + * The distance from object i to object j is in slot i*nbobjs+j. + * + * \p nbobjs must be at least 2. + * + * Arrays \p objs and \p values will be copied internally, + * they may later be freed by the caller. + * + * On error, the temporary distances structure and its content are destroyed. + * + * \p flags must be \c 0 for now. + * + * \return \c 0 on success. + * \return \c -1 on error. + */ +HWLOC_DECLSPEC int hwloc_distances_add_values(hwloc_topology_t topology, + hwloc_distances_add_handle_t handle, + unsigned nbobjs, hwloc_obj_t *objs, + hwloc_uint64_t *values, + unsigned long flags); + /** \brief Flags for adding a new distances to a topology. */ enum hwloc_distances_add_flag_e { /** \brief Try to group objects based on the newly provided distance information. + * This is ignored for distances between objects of different types. * \hideinitializer */ HWLOC_DISTANCES_ADD_FLAG_GROUP = (1UL<<0), @@ -233,23 +396,33 @@ enum hwloc_distances_add_flag_e { HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE = (1UL<<1) }; -/** \brief Provide a new distance matrix. +/** \brief Commit a new distances structure. * - * Provide the matrix of distances between a set of objects given by \p nbobjs - * and the \p objs array. \p nbobjs must be at least 2. - * The distances are stored as a one-dimension array in \p values. - * The distance from object i to object j is in slot i*nbobjs+j. + * This function finalizes the distances structure and inserts in it the topology. * - * \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e. - * Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically added - * if objects of different types are given. + * Parameter \p handle was previously returned by hwloc_distances_add_create(). + * Then objects and values were specified with hwloc_distances_add_values(). * * \p flags configures the behavior of the function using an optional OR'ed set of * ::hwloc_distances_add_flag_e. + * It may be used to request the grouping of existing objects based on distances. + * + * On error, the temporary distances structure and its content are destroyed. + * + * \return \c 0 on success. + * \return \c -1 on error. + */ +HWLOC_DECLSPEC int hwloc_distances_add_commit(hwloc_topology_t topology, + hwloc_distances_add_handle_t handle, + unsigned long flags); + +/** @} */ + + + +/** \defgroup hwlocality_distances_remove Remove distances between objects + * @{ */ -HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology, - unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values, - unsigned long kind, unsigned long flags); /** \brief Remove all distance matrices from a topology. * diff --git a/src/3rdparty/hwloc/include/hwloc/gl.h b/src/3rdparty/hwloc/include/hwloc/gl.h index 897ef784..56a402a8 100644 --- a/src/3rdparty/hwloc/include/hwloc/gl.h +++ b/src/3rdparty/hwloc/include/hwloc/gl.h @@ -1,6 +1,6 @@ /* * Copyright © 2012 Blue Brain Project, EPFL. All rights reserved. - * Copyright © 2012-2013 Inria. All rights reserved. + * Copyright © 2012-2021 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -39,9 +39,9 @@ extern "C" { /** \brief Get the hwloc OS device object corresponding to the * OpenGL display given by port and device index. * - * Return the OS device object describing the OpenGL display + * \return The hwloc OS device object describing the OpenGL display * whose port (server) is \p port and device (screen) is \p device. - * Return NULL if there is none. + * \return \c NULL if none could be found. * * The topology \p topology does not necessarily have to match the current * machine. For instance the topology may be an XML import of a remote host. @@ -70,9 +70,9 @@ hwloc_gl_get_display_osdev_by_port_device(hwloc_topology_t topology, /** \brief Get the hwloc OS device object corresponding to the * OpenGL display given by name. * - * Return the OS device object describing the OpenGL display + * \return The hwloc OS device object describing the OpenGL display * whose name is \p name, built as ":port.device" such as ":0.0" . - * Return NULL if there is none. + * \return \c NULL if none could be found. * * The topology \p topology does not necessarily have to match the current * machine. For instance the topology may be an XML import of a remote host. @@ -99,9 +99,10 @@ hwloc_gl_get_display_osdev_by_name(hwloc_topology_t topology, /** \brief Get the OpenGL display port and device corresponding * to the given hwloc OS object. * - * Return the OpenGL display port (server) in \p port and device (screen) + * Retrieves the OpenGL display port (server) in \p port and device (screen) * in \p screen that correspond to the given hwloc OS device object. - * Return \c -1 if there is none. + * + * \return \c -1 if none could be found. * * The topology \p topology does not necessarily have to match the current * machine. For instance the topology may be an XML import of a remote host. diff --git a/src/3rdparty/hwloc/include/hwloc/helper.h b/src/3rdparty/hwloc/include/hwloc/helper.h index 8e4d4532..f918d816 100644 --- a/src/3rdparty/hwloc/include/hwloc/helper.h +++ b/src/3rdparty/hwloc/include/hwloc/helper.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2021 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -807,6 +807,49 @@ hwloc_get_obj_below_array_by_type (hwloc_topology_t topology, int nr, hwloc_obj_ return obj; } +/** \brief Return an object of a different type with same locality. + * + * If the source object \p src is a normal or memory type, + * this function returns an object of type \p type with same + * CPU and node sets, either below or above in the hierarchy. + * + * If the source object \p src is a PCI or an OS device within a PCI + * device, the function may either return that PCI device, or another + * OS device in the same PCI parent. + * This may for instance be useful for converting between OS devices + * such as "nvml0" or "rsmi1" used in distance structures into the + * the PCI device, or the CUDA or OpenCL OS device that correspond + * to the same physical card. + * + * If not \c NULL, parameter \p subtype only select objects whose + * subtype attribute exists and is \p subtype (case-insensitively), + * for instance "OpenCL" or "CUDA". + * + * If not \c NULL, parameter \p nameprefix only selects objects whose + * name attribute exists and starts with \p nameprefix (case-insensitively), + * for instance "rsmi" for matching "rsmi0". + * + * If multiple objects match, the first one is returned. + * + * This function will not walk the hierarchy across bridges since + * the PCI locality may become different. + * This function cannot also convert between normal/memory objects + * and I/O or Misc objects. + * + * \p flags must be \c 0 for now. + * + * \return An object with identical locality, + * matching \p subtype and \p nameprefix if any. + * + * \return \c NULL if no matching object could be found, + * or if the source object and target type are incompatible, + * for instance if converting between CPU and I/O objects. + */ +HWLOC_DECLSPEC hwloc_obj_t +hwloc_get_obj_with_same_locality(hwloc_topology_t topology, hwloc_obj_t src, + hwloc_obj_type_t type, const char *subtype, const char *nameprefix, + unsigned long flags); + /** @} */ diff --git a/src/3rdparty/hwloc/include/hwloc/levelzero.h b/src/3rdparty/hwloc/include/hwloc/levelzero.h new file mode 100644 index 00000000..4c356fc8 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/levelzero.h @@ -0,0 +1,157 @@ +/* + * Copyright © 2021 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and the oneAPI Level Zero interface. + * + * Applications that use both hwloc and Level Zero may want to + * include this file so as to get topology information for L0 devices. + */ + +#ifndef HWLOC_LEVELZERO_H +#define HWLOC_LEVELZERO_H + +#include "hwloc.h" +#include "hwloc/autogen/config.h" +#include "hwloc/helper.h" +#ifdef HWLOC_LINUX_SYS +#include "hwloc/linux.h" +#endif + +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_levelzero Interoperability with the oneAPI Level Zero interface. + * + * This interface offers ways to retrieve topology information about + * devices managed by the Level Zero API. + * + * @{ + */ + +/** \brief Get the CPU set of logical processors that are physically + * close to the Level Zero device \p device + * + * Store in \p set the CPU-set describing the locality of + * the Level Zero device \p device. + * + * Topology \p topology and device \p device must match the local machine. + * The Level Zero must have been initialized with Sysman enabled + * (ZES_ENABLE_SYSMAN=1 in the environment). + * I/O devices detection and the Level Zero component are not needed in the + * topology. + * + * The function only returns the locality of the device. + * If more information about the device is needed, OS objects should + * be used instead, see hwloc_levelzero_get_device_osdev(). + * + * This function is currently only implemented in a meaningful way for + * Linux; other systems will simply get a full cpuset. + */ +static __hwloc_inline int +hwloc_levelzero_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, + ze_device_handle_t device, hwloc_cpuset_t set) +{ +#ifdef HWLOC_LINUX_SYS + /* If we're on Linux, use the sysfs mechanism to get the local cpus */ +#define HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX 128 + char path[HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX]; + zes_pci_properties_t pci; + zes_device_handle_t sdevice = device; + ze_result_t res; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return -1; + } + + res = zesDevicePciGetProperties(sdevice, &pci); + if (res != ZE_RESULT_SUCCESS) { + errno = EINVAL; + return -1; + } + + sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/local_cpus", + pci.address.domain, pci.address.bus, pci.address.device, pci.address.function); + if (hwloc_linux_read_path_as_cpumask(path, set) < 0 + || hwloc_bitmap_iszero(set)) + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#else + /* Non-Linux systems simply get a full cpuset */ + hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); +#endif + return 0; +} + +/** \brief Get the hwloc OS device object corresponding to Level Zero device + * \p device. + * + * \return The hwloc OS device object that describes the given Level Zero device \p device. + * \return \c NULL if none could be found. + * + * Topology \p topology and device \p dv_ind must match the local machine. + * I/O devices detection and the Level Zero component must be enabled in the + * topology. If not, the locality of the object may still be found using + * hwloc_levelzero_get_device_cpuset(). + * + * \note The corresponding hwloc PCI device may be found by looking + * at the result parent pointer (unless PCI devices are filtered out). + */ +static __hwloc_inline hwloc_obj_t +hwloc_levelzero_get_device_osdev(hwloc_topology_t topology, ze_device_handle_t device) +{ + zes_device_handle_t sdevice = device; + zes_pci_properties_t pci; + ze_result_t res; + hwloc_obj_t osdev; + + if (!hwloc_topology_is_thissystem(topology)) { + errno = EINVAL; + return NULL; + } + + res = zesDevicePciGetProperties(sdevice, &pci); + if (res != ZE_RESULT_SUCCESS) { + /* L0 was likely initialized without sysman, don't bother */ + errno = EINVAL; + return NULL; + } + + osdev = NULL; + while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { + hwloc_obj_t pcidev = osdev->parent; + + if (strncmp(osdev->name, "ze", 2)) + continue; + + if (pcidev + && pcidev->type == HWLOC_OBJ_PCI_DEVICE + && pcidev->attr->pcidev.domain == pci.address.domain + && pcidev->attr->pcidev.bus == pci.address.bus + && pcidev->attr->pcidev.dev == pci.address.device + && pcidev->attr->pcidev.func == pci.address.function) + return osdev; + + /* FIXME: when we'll have serialnumber, try it in case PCI is filtered-out */ + } + + return NULL; +} + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_LEVELZERO_H */ diff --git a/src/3rdparty/hwloc/include/hwloc/nvml.h b/src/3rdparty/hwloc/include/hwloc/nvml.h index 9d578903..57f36a85 100644 --- a/src/3rdparty/hwloc/include/hwloc/nvml.h +++ b/src/3rdparty/hwloc/include/hwloc/nvml.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2020 Inria. All rights reserved. + * Copyright © 2012-2021 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -39,7 +39,7 @@ extern "C" { /** \brief Get the CPU set of processors that are physically * close to NVML device \p device. * - * Return the CPU set describing the locality of the NVML device \p device. + * Store in \p set the CPU-set describing the locality of the NVML device \p device. * * Topology \p topology and device \p device must match the local machine. * I/O devices detection and the NVML component are not needed in the topology. @@ -88,8 +88,8 @@ hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, /** \brief Get the hwloc OS device object corresponding to the * NVML device whose index is \p idx. * - * Return the OS device object describing the NVML device whose - * index is \p idx. Returns NULL if there is none. + * \return The hwloc OS device object describing the NVML device whose index is \p idx. + * \return \c NULL if none could be found. * * The topology \p topology does not necessarily have to match the current * machine. For instance the topology may be an XML import of a remote host. @@ -114,8 +114,8 @@ hwloc_nvml_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx) /** \brief Get the hwloc OS device object corresponding to NVML device \p device. * - * Return the hwloc OS device object that describes the given - * NVML device \p device. Return NULL if there is none. + * \return The hwloc OS device object that describes the given NVML device \p device. + * \return \c NULL if none could be found. * * Topology \p topology and device \p device must match the local machine. * I/O devices detection and the NVML component must be enabled in the topology. diff --git a/src/3rdparty/hwloc/include/hwloc/opencl.h b/src/3rdparty/hwloc/include/hwloc/opencl.h index 9a2fdacb..395b32e3 100644 --- a/src/3rdparty/hwloc/include/hwloc/opencl.h +++ b/src/3rdparty/hwloc/include/hwloc/opencl.h @@ -113,7 +113,7 @@ hwloc_opencl_get_device_pci_busid(cl_device_id device, /** \brief Get the CPU set of processors that are physically * close to OpenCL device \p device. * - * Return the CPU set describing the locality of the OpenCL device \p device. + * Store in \p set the CPU-set describing the locality of the OpenCL device \p device. * * Topology \p topology and device \p device must match the local machine. * I/O devices detection and the OpenCL component are not needed in the topology. @@ -162,10 +162,10 @@ hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unuse /** \brief Get the hwloc OS device object corresponding to the * OpenCL device for the given indexes. * - * Return the OS device object describing the OpenCL device + * \return The hwloc OS device object describing the OpenCL device * whose platform index is \p platform_index, * and whose device index within this platform if \p device_index. - * Return NULL if there is none. + * \return \c NULL if there is none. * * The topology \p topology does not necessarily have to match the current * machine. For instance the topology may be an XML import of a remote host. @@ -192,8 +192,9 @@ hwloc_opencl_get_device_osdev_by_index(hwloc_topology_t topology, /** \brief Get the hwloc OS device object corresponding to OpenCL device \p deviceX. * - * Use OpenCL device attributes to find the corresponding hwloc OS device object. - * Return NULL if there is none or if useful attributes are not available. + * \return The hwloc OS device object corresponding to the given OpenCL device \p device. + * \return \c NULL if none could be found, for instance + * if required OpenCL attributes are not available. * * This function currently only works on AMD and NVIDIA OpenCL devices that support * relevant OpenCL extensions. hwloc_opencl_get_device_osdev_by_index() diff --git a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h index bbf25d0f..7cee137e 100644 --- a/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h +++ b/src/3rdparty/hwloc/include/hwloc/openfabrics-verbs.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2021 Inria. All rights reserved. * Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -44,7 +44,7 @@ extern "C" { /** \brief Get the CPU set of processors that are physically * close to device \p ibdev. * - * Return the CPU set describing the locality of the OpenFabrics + * Store in \p set the CPU-set describing the locality of the OpenFabrics * device \p ibdev (InfiniBand, etc). * * Topology \p topology and device \p ibdev must match the local machine. @@ -88,10 +88,11 @@ hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, /** \brief Get the hwloc OS device object corresponding to the OpenFabrics * device named \p ibname. * - * Return the OS device object describing the OpenFabrics device + * \return The hwloc OS device object describing the OpenFabrics device * (InfiniBand, Omni-Path, usNIC, etc) whose name is \p ibname * (mlx5_0, hfi1_0, usnic_0, qib0, etc). - * Returns NULL if there is none. + * \return \c NULL if none could be found. + * * The name \p ibname is usually obtained from ibv_get_device_name(). * * The topology \p topology does not necessarily have to match the current @@ -117,8 +118,9 @@ hwloc_ibv_get_device_osdev_by_name(hwloc_topology_t topology, /** \brief Get the hwloc OS device object corresponding to the OpenFabrics * device \p ibdev. * - * Return the OS device object describing the OpenFabrics device \p ibdev - * (InfiniBand, etc). Returns NULL if there is none. + * \return The hwloc OS device object describing the OpenFabrics + * device \p ibdev (InfiniBand, etc). + * \return \c NULL if none could be found. * * Topology \p topology and device \p ibdev must match the local machine. * I/O devices detection must be enabled in the topology. diff --git a/src/3rdparty/hwloc/include/hwloc/plugins.h b/src/3rdparty/hwloc/include/hwloc/plugins.h index 06e1c3e9..6e4f1291 100644 --- a/src/3rdparty/hwloc/include/hwloc/plugins.h +++ b/src/3rdparty/hwloc/include/hwloc/plugins.h @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2020 Inria. All rights reserved. + * Copyright © 2013-2021 Inria. All rights reserved. * Copyright © 2016 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ @@ -27,6 +27,9 @@ struct hwloc_backend; /** \defgroup hwlocality_disc_components Components and Plugins: Discovery components + * + * \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified. + * * @{ */ @@ -93,6 +96,9 @@ struct hwloc_disc_component { /** \defgroup hwlocality_disc_backends Components and Plugins: Discovery backends + * + * \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified. + * * @{ */ @@ -241,6 +247,9 @@ HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_backend *backend); /** \defgroup hwlocality_generic_components Components and Plugins: Generic components + * + * \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified. + * * @{ */ @@ -310,10 +319,26 @@ struct hwloc_component { /** \defgroup hwlocality_components_core_funcs Components and Plugins: Core functions to be used by components + * + * \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified. + * * @{ */ -/** \brief Check whether insertion errors are hidden */ +/** \brief Check whether error messages are hidden. + * + * Callers should print critical error messages + * (e.g. invalid hw topo info, invalid config) + * only if this function returns strictly less than 2. + * + * Callers should print non-critical error messages + * (e.g. failure to initialize CUDA) + * if this function returns 0. + * + * This function return 1 by default (show critical only), + * 0 in lstopo (show all), + * or anything set in HWLOC_HIDE_ERRORS in the environment. + */ HWLOC_DECLSPEC int hwloc_hide_errors(void); /** \brief Add an object to the topology. @@ -455,6 +480,9 @@ hwloc_plugin_check_namespace(const char *pluginname __hwloc_attribute_unused, co /** \defgroup hwlocality_components_filtering Components and Plugins: Filtering objects + * + * \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified. + * * @{ */ @@ -472,6 +500,7 @@ hwloc_filter_check_pcidev_subtype_important(unsigned classid) || baseclass == 0x0b /* PCI_BASE_CLASS_PROCESSOR */ || classid == 0x0c04 /* PCI_CLASS_SERIAL_FIBER */ || classid == 0x0c06 /* PCI_CLASS_SERIAL_INFINIBAND */ + || baseclass == 0x06 /* PCI_BASE_CLASS_BRIDGE with non-PCI downstream. the core will drop the useless ones later */ || baseclass == 0x12 /* Processing Accelerators */); } @@ -527,6 +556,9 @@ hwloc_filter_check_keep_object(hwloc_topology_t topology, hwloc_obj_t obj) /** \defgroup hwlocality_components_pcidisc Components and Plugins: helpers for PCI discovery + * + * \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified. + * * @{ */ @@ -578,18 +610,76 @@ HWLOC_DECLSPEC int hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, st /** \defgroup hwlocality_components_pcifind Components and Plugins: finding PCI objects during other discoveries + * + * \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified. + * * @{ */ -/** \brief Find the normal parent of a PCI bus ID. +/** \brief Find the object or a parent of a PCI bus ID. * - * Look at PCI affinity to find out where the given PCI bus ID should be attached. + * When attaching a new object (typically an OS device) whose locality + * is specified by PCI bus ID, this function returns the PCI object + * to use as a parent for attaching. * - * This function should be used to attach an I/O device under the corresponding - * PCI object (if any), or under a normal (non-I/O) object with same locality. + * If the exact PCI device with this bus ID exists, it is returned. + * Otherwise (for instance if it was filtered out), the function returns + * another object with similar locality (for instance a parent bridge, + * or the local CPU Package). */ HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_find_parent_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); +/** \brief Find the PCI device or bridge matching a PCI bus ID exactly. + * + * This is useful for adding specific information about some objects + * based on their PCI id. When it comes to attaching objects based on + * PCI locality, hwloc_pci_find_parent_by_busid() should be preferred. + */ +HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); + +/** \brief Handle to a new distances structure during its addition to the topology. */ +typedef void * hwloc_backend_distances_add_handle_t; + +/** \brief Create a new empty distances structure. + * + * This is identical to hwloc_distances_add_create() + * but this variant is designed for backend inserting + * distances during topology discovery. + */ +HWLOC_DECLSPEC hwloc_backend_distances_add_handle_t +hwloc_backend_distances_add_create(hwloc_topology_t topology, + const char *name, unsigned long kind, + unsigned long flags); + +/** \brief Specify the objects and values in a new empty distances structure. + * + * This is similar to hwloc_distances_add_values() + * but this variant is designed for backend inserting + * distances during topology discovery. + * + * The only semantical difference is that \p objs and \p values + * are not duplicated, but directly attached to the topology. + * On success, these arrays are given to the core and should not + * ever be freed by the caller anymore. + */ +HWLOC_DECLSPEC int +hwloc_backend_distances_add_values(hwloc_topology_t topology, + hwloc_backend_distances_add_handle_t handle, + unsigned nbobjs, hwloc_obj_t *objs, + hwloc_uint64_t *values, + unsigned long flags); + +/** \brief Commit a new distances structure. + * + * This is similar to hwloc_distances_add_commit() + * but this variant is designed for backend inserting + * distances during topology discovery. + */ +HWLOC_DECLSPEC int +hwloc_backend_distances_add_commit(hwloc_topology_t topology, + hwloc_backend_distances_add_handle_t handle, + unsigned long flags); + /** @} */ diff --git a/src/3rdparty/hwloc/include/hwloc/rename.h b/src/3rdparty/hwloc/include/hwloc/rename.h index c2a30485..ae439b51 100644 --- a/src/3rdparty/hwloc/include/hwloc/rename.h +++ b/src/3rdparty/hwloc/include/hwloc/rename.h @@ -1,6 +1,6 @@ /* * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright © 2010-2020 Inria. All rights reserved. + * Copyright © 2010-2021 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -120,6 +120,9 @@ extern "C" { #define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM) #define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) #define HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IMPORT_SUPPORT) +#define HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING HWLOC_NAME_CAPS(TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING) +#define HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING HWLOC_NAME_CAPS(TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING) +#define HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING HWLOC_NAME_CAPS(TOPOLOGY_FLAG_DONT_CHANGE_BINDING) #define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid) #define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic) @@ -356,6 +359,7 @@ extern "C" { #define hwloc_get_closest_objs HWLOC_NAME(get_closest_objs) #define hwloc_get_obj_below_by_type HWLOC_NAME(get_obj_below_by_type) #define hwloc_get_obj_below_array_by_type HWLOC_NAME(get_obj_below_array_by_type) +#define hwloc_get_obj_with_same_locality HWLOC_NAME(get_obj_with_same_locality) #define hwloc_distrib_flags_e HWLOC_NAME(distrib_flags_e) #define HWLOC_DISTRIB_FLAG_REVERSE HWLOC_NAME_CAPS(DISTRIB_FLAG_REVERSE) #define hwloc_distrib HWLOC_NAME(distrib) @@ -454,11 +458,22 @@ extern "C" { #define hwloc_distances_obj_index HWLOC_NAME(distances_obj_index) #define hwloc_distances_obj_pair_values HWLOC_NAME(distances_pair_values) +#define hwloc_distances_transform_e HWLOC_NAME(distances_transform_e) +#define HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL HWLOC_NAME_CAPS(DISTANCES_TRANSFORM_REMOVE_NULL) +#define HWLOC_DISTANCES_TRANSFORM_LINKS HWLOC_NAME_CAPS(DISTANCES_TRANSFORM_LINKS) +#define HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS HWLOC_NAME_CAPS(DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS) +#define HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE HWLOC_NAME_CAPS(DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE) +#define hwloc_distances_transform HWLOC_NAME(distances_transform) + #define hwloc_distances_add_flag_e HWLOC_NAME(distances_add_flag_e) #define HWLOC_DISTANCES_ADD_FLAG_GROUP HWLOC_NAME_CAPS(DISTANCES_ADD_FLAG_GROUP) #define HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE HWLOC_NAME_CAPS(DISTANCES_ADD_FLAG_GROUP_INACCURATE) -#define hwloc_distances_add HWLOC_NAME(distances_add) +#define hwloc_distances_add_handle_t HWLOC_NAME(distances_add_handle_t) +#define hwloc_distances_add_create HWLOC_NAME(distances_add_create) +#define hwloc_distances_add_values HWLOC_NAME(distances_add_values) +#define hwloc_distances_add_commit HWLOC_NAME(distances_add_commit) + #define hwloc_distances_remove HWLOC_NAME(distances_remove) #define hwloc_distances_remove_by_depth HWLOC_NAME(distances_remove_by_depth) #define hwloc_distances_remove_by_type HWLOC_NAME(distances_remove_by_type) @@ -523,6 +538,11 @@ extern "C" { #define hwloc_linux_get_tid_last_cpu_location HWLOC_NAME(linux_get_tid_last_cpu_location) #define hwloc_linux_read_path_as_cpumask HWLOC_NAME(linux_read_file_cpumask) +/* windows.h */ + +#define hwloc_windows_get_nr_processor_groups HWLOC_NAME(windows_get_nr_processor_groups) +#define hwloc_windows_get_processor_group_cpuset HWLOC_NAME(windows_get_processor_group_cpuset) + /* openfabrics-verbs.h */ #define hwloc_ibv_get_device_cpuset HWLOC_NAME(ibv_get_device_cpuset) @@ -564,6 +584,11 @@ extern "C" { #define hwloc_rsmi_get_device_osdev HWLOC_NAME(rsmi_get_device_osdev) #define hwloc_rsmi_get_device_osdev_by_index HWLOC_NAME(rsmi_get_device_osdev_by_index) +/* levelzero.h */ + +#define hwloc_levelzero_get_device_cpuset HWLOC_NAME(levelzero_get_device_cpuset) +#define hwloc_levelzero_get_device_osdev HWLOC_NAME(levelzero_get_device_osdev) + /* gl.h */ #define hwloc_gl_get_display_osdev_by_port_device HWLOC_NAME(gl_get_display_osdev_by_port_device) @@ -620,10 +645,18 @@ extern "C" { #define hwloc_pcidisc_tree_insert_by_busid HWLOC_NAME(pcidisc_tree_insert_by_busid) #define hwloc_pcidisc_tree_attach HWLOC_NAME(pcidisc_tree_attach) +#define hwloc_pci_find_by_busid HWLOC_NAME(pcidisc_find_by_busid) #define hwloc_pci_find_parent_by_busid HWLOC_NAME(pcidisc_find_busid_parent) +#define hwloc_backend_distances_add_handle_t HWLOC_NAME(backend_distances_add_handle_t) +#define hwloc_backend_distances_add_create HWLOC_NAME(backend_distances_add_create) +#define hwloc_backend_distances_add_values HWLOC_NAME(backend_distances_add_values) +#define hwloc_backend_distances_add_commit HWLOC_NAME(backend_distances_add_commit) + /* hwloc/deprecated.h */ +#define hwloc_distances_add HWLOC_NAME(distances_add) + #define hwloc_topology_insert_misc_object_by_parent HWLOC_NAME(topology_insert_misc_object_by_parent) #define hwloc_obj_cpuset_snprintf HWLOC_NAME(obj_cpuset_snprintf) #define hwloc_obj_type_sscanf HWLOC_NAME(obj_type_sscanf) @@ -733,6 +766,7 @@ extern "C" { #define hwloc_cuda_component HWLOC_NAME(cuda_component) #define hwloc_gl_component HWLOC_NAME(gl_component) +#define hwloc_levelzero_component HWLOC_NAME(levelzero_component) #define hwloc_nvml_component HWLOC_NAME(nvml_component) #define hwloc_rsmi_component HWLOC_NAME(rsmi_component) #define hwloc_opencl_component HWLOC_NAME(opencl_component) @@ -772,7 +806,6 @@ extern "C" { #define hwloc_pci_discovery_init HWLOC_NAME(pci_discovery_init) #define hwloc_pci_discovery_prepare HWLOC_NAME(pci_discovery_prepare) #define hwloc_pci_discovery_exit HWLOC_NAME(pci_discovery_exit) -#define hwloc_pci_find_by_busid HWLOC_NAME(pcidisc_find_by_busid) #define hwloc_find_insert_io_parent_by_complete_cpuset HWLOC_NAME(hwloc_find_insert_io_parent_by_complete_cpuset) #define hwloc__add_info HWLOC_NAME(_add_info) @@ -816,7 +849,6 @@ extern "C" { #define hwloc_internal_distances_dup HWLOC_NAME(internal_distances_dup) #define hwloc_internal_distances_refresh HWLOC_NAME(internal_distances_refresh) #define hwloc_internal_distances_destroy HWLOC_NAME(internal_distances_destroy) - #define hwloc_internal_distances_add HWLOC_NAME(internal_distances_add) #define hwloc_internal_distances_add_by_index HWLOC_NAME(internal_distances_add_by_index) #define hwloc_internal_distances_invalidate_cached_objs HWLOC_NAME(hwloc_internal_distances_invalidate_cached_objs) diff --git a/src/3rdparty/hwloc/include/hwloc/rsmi.h b/src/3rdparty/hwloc/include/hwloc/rsmi.h index a6d55b3c..55aa1272 100644 --- a/src/3rdparty/hwloc/include/hwloc/rsmi.h +++ b/src/3rdparty/hwloc/include/hwloc/rsmi.h @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2020 Inria. All rights reserved. + * Copyright © 2012-2021 Inria. All rights reserved. * Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved. * Written by Advanced Micro Devices, * See COPYING in top-level directory. @@ -41,7 +41,7 @@ extern "C" { /** \brief Get the CPU set of logical processors that are physically * close to AMD GPU device whose index is \p dv_ind. * - * Return the CPU set describing the locality of the AMD GPU device + * Store in \p set the CPU-set describing the locality of the AMD GPU device * whose index is \p dv_ind. * * Topology \p topology and device \p dv_ind must match the local machine. @@ -96,8 +96,9 @@ hwloc_rsmi_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, /** \brief Get the hwloc OS device object corresponding to the * AMD GPU device whose index is \p dv_ind. * - * Return the OS device object describing the AMD GPU device whose - * index is \p dv_ind. Returns NULL if there is none. + * \return The hwloc OS device object describing the AMD GPU device whose + * index is \p dv_ind. + * \return \c NULL if none could be found. * * The topology \p topology does not necessarily have to match the current * machine. For instance the topology may be an XML import of a remote host. @@ -124,8 +125,9 @@ hwloc_rsmi_get_device_osdev_by_index(hwloc_topology_t topology, uint32_t dv_ind) /** \brief Get the hwloc OS device object corresponding to AMD GPU device, * whose index is \p dv_ind. * - * Return the hwloc OS device object that describes the given - * AMD GPU, whose index is \p dv_ind Return NULL if there is none. + * \return The hwloc OS device object that describes the given + * AMD GPU, whose index is \p dv_ind. + * \return \c NULL if none could be found. * * Topology \p topology and device \p dv_ind must match the local machine. * I/O devices detection and the ROCm SMI component must be enabled in the diff --git a/src/3rdparty/hwloc/include/hwloc/windows.h b/src/3rdparty/hwloc/include/hwloc/windows.h new file mode 100644 index 00000000..dd6c7c99 --- /dev/null +++ b/src/3rdparty/hwloc/include/hwloc/windows.h @@ -0,0 +1,76 @@ +/* + * Copyright © 2021 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief Macros to help interaction between hwloc and Windows. + * + * Applications that use hwloc on Windows may want to include this file + * for Windows specific hwloc features. + */ + +#ifndef HWLOC_WINDOWS_H +#define HWLOC_WINDOWS_H + +#include "hwloc.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_windows Windows-specific helpers + * + * These functions query Windows processor groups. + * These groups partition the operating system into virtual sets + * of up to 64 neighbor PUs. + * Threads and processes may only be bound inside a single group. + * Although Windows processor groups may be exposed in the hwloc + * hierarchy as hwloc Groups, they are also often merged into + * existing hwloc objects such as NUMA nodes or Packages. + * This API provides explicit information about Windows processor + * groups so that applications know whether binding to a large + * set of PUs may fail because it spans over multiple Windows + * processor groups. + * + * @{ + */ + + +/** \brief Get the number of Windows processor groups + * + * \p flags must be 0 for now. + * + * \return at least \c 1 on success. + * \return -1 on error, for instance if the topology does not match + * the current system (e.g. loaded from another machine through XML). + */ +HWLOC_DECLSPEC int hwloc_windows_get_nr_processor_groups(hwloc_topology_t topology, unsigned long flags); + +/** \brief Get the CPU-set of a Windows processor group. + * + * Get the set of PU included in the processor group specified + * by \p pg_index. + * \p pg_index must be between \c 0 and the value returned + * by hwloc_windows_get_nr_processor_groups() minus 1. + * + * \p flags must be 0 for now. + * + * \return \c 0 on success. + * \return \c -1 on error, for instance if \p pg_index is invalid, + * or if the topology does not match the current system (e.g. loaded + * from another machine through XML). + */ +HWLOC_DECLSPEC int hwloc_windows_get_processor_group_cpuset(hwloc_topology_t topology, unsigned pg_index, hwloc_cpuset_t cpuset, unsigned long flags); + +/** @} */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* HWLOC_WINDOWS_H */ diff --git a/src/3rdparty/hwloc/include/private/internal-components.h b/src/3rdparty/hwloc/include/private/internal-components.h index 0b82a45c..65cfdd7d 100644 --- a/src/3rdparty/hwloc/include/private/internal-components.h +++ b/src/3rdparty/hwloc/include/private/internal-components.h @@ -1,5 +1,5 @@ /* - * Copyright © 2018-2019 Inria. All rights reserved. + * Copyright © 2018-2020 Inria. All rights reserved. * * See COPYING in top-level directory. */ @@ -31,6 +31,7 @@ HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_rsmi_component; +HWLOC_DECLSPEC extern const struct hwloc_component hwloc_levelzero_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component; diff --git a/src/3rdparty/hwloc/include/private/private.h b/src/3rdparty/hwloc/include/private/private.h index e0782659..5e216632 100644 --- a/src/3rdparty/hwloc/include/private/private.h +++ b/src/3rdparty/hwloc/include/private/private.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2021 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * @@ -166,6 +166,7 @@ struct hwloc_topology { unsigned long kind; #define HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID (1U<<0) /* if the objs array is valid below */ +#define HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED (1U<<1) /* if the distances isn't in the list yet */ unsigned iflags; /* objects are currently stored in physical_index order */ @@ -304,11 +305,6 @@ extern void hwloc_pci_discovery_init(struct hwloc_topology *topology); extern void hwloc_pci_discovery_prepare(struct hwloc_topology *topology); extern void hwloc_pci_discovery_exit(struct hwloc_topology *topology); -/* Look for an object matching the given domain/bus/func, - * either exactly or return the smallest container bridge - */ -extern struct hwloc_obj * hwloc_pci_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); - /* Look for an object matching complete cpuset exactly, or insert one. * Return NULL on failure. * Return a good fallback (object above) on failure to insert. @@ -408,10 +404,14 @@ extern void hwloc_internal_distances_prepare(hwloc_topology_t topology); extern void hwloc_internal_distances_destroy(hwloc_topology_t topology); extern int hwloc_internal_distances_dup(hwloc_topology_t new, hwloc_topology_t old); extern void hwloc_internal_distances_refresh(hwloc_topology_t topology); -extern int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags); -extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags); extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology); +/* these distances_add() functions are higher-level than those in hwloc/plugins.h + * but they may change in the future, hence they are not exported to plugins. + */ +extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags); +extern int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags); + extern void hwloc_internal_memattrs_init(hwloc_topology_t topology); extern void hwloc_internal_memattrs_prepare(hwloc_topology_t topology); extern void hwloc_internal_memattrs_destroy(hwloc_topology_t topology); diff --git a/src/3rdparty/hwloc/src/components.c b/src/3rdparty/hwloc/src/components.c index 496ed232..81e3116b 100644 --- a/src/3rdparty/hwloc/src/components.c +++ b/src/3rdparty/hwloc/src/components.c @@ -1,5 +1,5 @@ /* - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2021 Inria. All rights reserved. * Copyright © 2012 Université Bordeaux * See COPYING in top-level directory. */ @@ -124,7 +124,7 @@ hwloc_dlforeachfile(const char *_paths, *colon = '\0'; if (hwloc_plugins_verbose) - fprintf(stderr, " Looking under %s\n", path); + fprintf(stderr, "hwloc: Looking under %s\n", path); dir = opendir(path); if (!dir) @@ -198,7 +198,7 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused) char *componentsymbolname; if (hwloc_plugins_verbose) - fprintf(stderr, "Plugin dlforeach found `%s'\n", filename); + fprintf(stderr, "hwloc: Plugin dlforeach found `%s'\n", filename); basename = strrchr(filename, '/'); if (!basename) @@ -208,7 +208,7 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused) if (hwloc_plugins_blacklist && strstr(hwloc_plugins_blacklist, basename)) { if (hwloc_plugins_verbose) - fprintf(stderr, "Plugin `%s' is blacklisted in the environment\n", basename); + fprintf(stderr, "hwloc: Plugin `%s' is blacklisted in the environment\n", basename); goto out; } @@ -216,14 +216,14 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused) handle = hwloc_dlopenext(filename); if (!handle) { if (hwloc_plugins_verbose) - fprintf(stderr, "Failed to load plugin: %s\n", hwloc_dlerror()); + fprintf(stderr, "hwloc: Failed to load plugin: %s\n", hwloc_dlerror()); goto out; } componentsymbolname = malloc(strlen(basename)+10+1); if (!componentsymbolname) { if (hwloc_plugins_verbose) - fprintf(stderr, "Failed to allocation component `%s' symbol\n", + fprintf(stderr, "hwloc: Failed to allocation component `%s' symbol\n", basename); goto out_with_handle; } @@ -231,38 +231,38 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused) component = hwloc_dlsym(handle, componentsymbolname); if (!component) { if (hwloc_plugins_verbose) - fprintf(stderr, "Failed to find component symbol `%s'\n", + fprintf(stderr, "hwloc: Failed to find component symbol `%s'\n", componentsymbolname); free(componentsymbolname); goto out_with_handle; } if (component->abi != HWLOC_COMPONENT_ABI) { if (hwloc_plugins_verbose) - fprintf(stderr, "Plugin symbol ABI %u instead of %d\n", + fprintf(stderr, "hwloc: Plugin symbol ABI %u instead of %d\n", component->abi, HWLOC_COMPONENT_ABI); free(componentsymbolname); goto out_with_handle; } if (hwloc_plugins_verbose) - fprintf(stderr, "Plugin contains expected symbol `%s'\n", + fprintf(stderr, "hwloc: Plugin contains expected symbol `%s'\n", componentsymbolname); free(componentsymbolname); if (HWLOC_COMPONENT_TYPE_DISC == component->type) { if (strncmp(basename, "hwloc_", 6)) { if (hwloc_plugins_verbose) - fprintf(stderr, "Plugin name `%s' doesn't match its type DISCOVERY\n", basename); + fprintf(stderr, "hwloc: Plugin name `%s' doesn't match its type DISCOVERY\n", basename); goto out_with_handle; } } else if (HWLOC_COMPONENT_TYPE_XML == component->type) { if (strncmp(basename, "hwloc_xml_", 10)) { if (hwloc_plugins_verbose) - fprintf(stderr, "Plugin name `%s' doesn't match its type XML\n", basename); + fprintf(stderr, "hwloc: Plugin name `%s' doesn't match its type XML\n", basename); goto out_with_handle; } } else { if (hwloc_plugins_verbose) - fprintf(stderr, "Plugin name `%s' has invalid type %u\n", + fprintf(stderr, "hwloc: Plugin name `%s' has invalid type %u\n", basename, (unsigned) component->type); goto out_with_handle; } @@ -277,7 +277,7 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused) desc->handle = handle; desc->next = NULL; if (hwloc_plugins_verbose) - fprintf(stderr, "Plugin descriptor `%s' ready\n", basename); + fprintf(stderr, "hwloc: Plugin descriptor `%s' ready\n", basename); /* append to the list */ prevdesc = &hwloc_plugins; @@ -285,7 +285,7 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused) prevdesc = &((*prevdesc)->next); *prevdesc = desc; if (hwloc_plugins_verbose) - fprintf(stderr, "Plugin descriptor `%s' queued\n", basename); + fprintf(stderr, "hwloc: Plugin descriptor `%s' queued\n", basename); return 0; out_with_handle: @@ -300,7 +300,7 @@ hwloc_plugins_exit(void) struct hwloc__plugin_desc *desc, *next; if (hwloc_plugins_verbose) - fprintf(stderr, "Closing all plugins\n"); + fprintf(stderr, "hwloc: Closing all plugins\n"); desc = hwloc_plugins; while (desc) { @@ -340,7 +340,7 @@ hwloc_plugins_init(void) hwloc_plugins = NULL; if (hwloc_plugins_verbose) - fprintf(stderr, "Starting plugin dlforeach in %s\n", path); + fprintf(stderr, "hwloc: Starting plugin dlforeach in %s\n", path); err = hwloc_dlforeachfile(path, hwloc__dlforeach_cb, NULL); if (err) goto out_with_init; @@ -364,14 +364,14 @@ hwloc_disc_component_register(struct hwloc_disc_component *component, /* check that the component name is valid */ if (!strcmp(component->name, HWLOC_COMPONENT_STOP_NAME)) { if (hwloc_components_verbose) - fprintf(stderr, "Cannot register discovery component with reserved name `" HWLOC_COMPONENT_STOP_NAME "'\n"); + fprintf(stderr, "hwloc: Cannot register discovery component with reserved name `" HWLOC_COMPONENT_STOP_NAME "'\n"); return -1; } if (strchr(component->name, HWLOC_COMPONENT_EXCLUDE_CHAR) || strchr(component->name, HWLOC_COMPONENT_PHASESEP_CHAR) || strcspn(component->name, HWLOC_COMPONENT_SEPS) != strlen(component->name)) { if (hwloc_components_verbose) - fprintf(stderr, "Cannot register discovery component with name `%s' containing reserved characters `%c" HWLOC_COMPONENT_SEPS "'\n", + fprintf(stderr, "hwloc: Cannot register discovery component with name `%s' containing reserved characters `%c" HWLOC_COMPONENT_SEPS "'\n", component->name, HWLOC_COMPONENT_EXCLUDE_CHAR); return -1; } @@ -386,8 +386,9 @@ hwloc_disc_component_register(struct hwloc_disc_component *component, |HWLOC_DISC_PHASE_MISC |HWLOC_DISC_PHASE_ANNOTATE |HWLOC_DISC_PHASE_TWEAK))) { - fprintf(stderr, "Cannot register discovery component `%s' with invalid phases 0x%x\n", - component->name, component->phases); + if (hwloc_hide_errors() < 2) + fprintf(stderr, "hwloc: Cannot register discovery component `%s' with invalid phases 0x%x\n", + component->name, component->phases); return -1; } @@ -398,13 +399,13 @@ hwloc_disc_component_register(struct hwloc_disc_component *component, if ((*prev)->priority < component->priority) { /* drop the existing component */ if (hwloc_components_verbose) - fprintf(stderr, "Dropping previously registered discovery component `%s', priority %u lower than new one %u\n", + fprintf(stderr, "hwloc: Dropping previously registered discovery component `%s', priority %u lower than new one %u\n", (*prev)->name, (*prev)->priority, component->priority); *prev = (*prev)->next; } else { /* drop the new one */ if (hwloc_components_verbose) - fprintf(stderr, "Ignoring new discovery component `%s', priority %u lower than previously registered one %u\n", + fprintf(stderr, "hwloc: Ignoring new discovery component `%s', priority %u lower than previously registered one %u\n", component->name, component->priority, (*prev)->priority); return -1; } @@ -412,7 +413,7 @@ hwloc_disc_component_register(struct hwloc_disc_component *component, prev = &((*prev)->next); } if (hwloc_components_verbose) - fprintf(stderr, "Registered discovery component `%s' phases 0x%x with priority %u (%s%s)\n", + fprintf(stderr, "hwloc: Registered discovery component `%s' phases 0x%x with priority %u (%s%s)\n", component->name, component->phases, component->priority, filename ? "from plugin " : "statically build", filename ? filename : ""); @@ -475,15 +476,16 @@ hwloc_components_init(void) /* hwloc_static_components is created by configure in static-components.h */ for(i=0; NULL != hwloc_static_components[i]; i++) { if (hwloc_static_components[i]->flags) { - fprintf(stderr, "Ignoring static component with invalid flags %lx\n", - hwloc_static_components[i]->flags); + if (hwloc_hide_errors() < 2) + fprintf(stderr, "hwloc: Ignoring static component with invalid flags %lx\n", + hwloc_static_components[i]->flags); continue; } /* initialize the component */ if (hwloc_static_components[i]->init && hwloc_static_components[i]->init(0) < 0) { if (hwloc_components_verbose) - fprintf(stderr, "Ignoring static component, failed to initialize\n"); + fprintf(stderr, "hwloc: Ignoring static component, failed to initialize\n"); continue; } /* queue ->finalize() callback if any */ @@ -503,15 +505,16 @@ hwloc_components_init(void) #ifdef HWLOC_HAVE_PLUGINS for(desc = hwloc_plugins; NULL != desc; desc = desc->next) { if (desc->component->flags) { - fprintf(stderr, "Ignoring plugin `%s' component with invalid flags %lx\n", - desc->name, desc->component->flags); + if (hwloc_hide_errors() < 2) + fprintf(stderr, "hwloc: Ignoring plugin `%s' component with invalid flags %lx\n", + desc->name, desc->component->flags); continue; } /* initialize the component */ if (desc->component->init && desc->component->init(0) < 0) { if (hwloc_components_verbose) - fprintf(stderr, "Ignoring plugin `%s', failed to initialize\n", desc->name); + fprintf(stderr, "hwloc: Ignoring plugin `%s', failed to initialize\n", desc->name); continue; } /* queue ->finalize() callback if any */ @@ -608,7 +611,7 @@ hwloc_disc_component_blacklist_one(struct hwloc_topology *topology, /* replace linuxpci and linuxio with linux (with IO phases) * for backward compatibility with pre-v2.0 and v2.0 respectively */ if (hwloc_components_verbose) - fprintf(stderr, "Replacing deprecated component `%s' with `linux' IO phases in blacklisting\n", name); + fprintf(stderr, "hwloc: Replacing deprecated component `%s' with `linux' IO phases in blacklisting\n", name); comp = hwloc_disc_component_find("linux", NULL); phases = HWLOC_DISC_PHASE_PCI | HWLOC_DISC_PHASE_IO | HWLOC_DISC_PHASE_MISC | HWLOC_DISC_PHASE_ANNOTATE; @@ -624,7 +627,7 @@ hwloc_disc_component_blacklist_one(struct hwloc_topology *topology, } if (hwloc_components_verbose) - fprintf(stderr, "Blacklisting component `%s` phases 0x%x\n", comp->name, phases); + fprintf(stderr, "hwloc: Blacklisting component `%s` phases 0x%x\n", comp->name, phases); for(i=0; inr_blacklisted_components; i++) { if (topology->blacklisted_components[i].component == comp) { @@ -727,7 +730,7 @@ hwloc_disc_component_try_enable(struct hwloc_topology *topology, if (hwloc_components_verbose) /* do not warn if envvar_forced since system-wide HWLOC_COMPONENTS must be silently ignored after set_xml() etc. */ - fprintf(stderr, "Excluding discovery component `%s' phases 0x%x, conflicts with excludes 0x%x\n", + fprintf(stderr, "hwloc: Excluding discovery component `%s' phases 0x%x, conflicts with excludes 0x%x\n", comp->name, comp->phases, topology->backend_excluded_phases); return -1; } @@ -735,8 +738,8 @@ hwloc_disc_component_try_enable(struct hwloc_topology *topology, backend = comp->instantiate(topology, comp, topology->backend_excluded_phases | blacklisted_phases, NULL, NULL, NULL); if (!backend) { - if (hwloc_components_verbose || envvar_forced) - fprintf(stderr, "Failed to instantiate discovery component `%s'\n", comp->name); + if (hwloc_components_verbose || (envvar_forced && hwloc_hide_errors() < 2)) + fprintf(stderr, "hwloc: Failed to instantiate discovery component `%s'\n", comp->name); return -1; } @@ -817,7 +820,7 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology) name = curenv; if (!strcmp(name, "linuxpci") || !strcmp(name, "linuxio")) { if (hwloc_components_verbose) - fprintf(stderr, "Replacing deprecated component `%s' with `linux' in envvar forcing\n", name); + fprintf(stderr, "hwloc: Replacing deprecated component `%s' with `linux' in envvar forcing\n", name); name = "linux"; } @@ -832,7 +835,8 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology) if (comp->phases & ~blacklisted_phases) hwloc_disc_component_try_enable(topology, comp, 1 /* envvar forced */, blacklisted_phases); } else { - fprintf(stderr, "Cannot find discovery component `%s'\n", name); + if (hwloc_hide_errors() < 2) + fprintf(stderr, "hwloc: Cannot find discovery component `%s'\n", name); } /* restore chars (the second loop below needs env to be unmodified) */ @@ -864,7 +868,7 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology) if (!(comp->phases & ~blacklisted_phases)) { if (hwloc_components_verbose) - fprintf(stderr, "Excluding blacklisted discovery component `%s' phases 0x%x\n", + fprintf(stderr, "hwloc: Excluding blacklisted discovery component `%s' phases 0x%x\n", comp->name, comp->phases); goto nextcomp; } @@ -879,7 +883,7 @@ nextcomp: /* print a summary */ int first = 1; backend = topology->backends; - fprintf(stderr, "Final list of enabled discovery components: "); + fprintf(stderr, "hwloc: Final list of enabled discovery components: "); while (backend != NULL) { fprintf(stderr, "%s%s(0x%x)", first ? "" : ",", backend->component->name, backend->phases); backend = backend->next; @@ -935,7 +939,7 @@ hwloc_backend_alloc(struct hwloc_topology *topology, /* filter-out component phases that are excluded */ backend->phases = component->phases & ~topology->backend_excluded_phases; if (backend->phases != component->phases && hwloc_components_verbose) - fprintf(stderr, "Trying discovery component `%s' with phases 0x%x instead of 0x%x\n", + fprintf(stderr, "hwloc: Trying discovery component `%s' with phases 0x%x instead of 0x%x\n", component->name, backend->phases, component->phases); backend->flags = 0; backend->discover = NULL; @@ -963,8 +967,9 @@ hwloc_backend_enable(struct hwloc_backend *backend) /* check backend flags */ if (backend->flags) { - fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x with unknown flags %lx\n", - backend->component->name, backend->component->phases, backend->flags); + if (hwloc_hide_errors() < 2) + fprintf(stderr, "hwloc: Cannot enable discovery component `%s' phases 0x%x with unknown flags %lx\n", + backend->component->name, backend->component->phases, backend->flags); return -1; } @@ -973,7 +978,7 @@ hwloc_backend_enable(struct hwloc_backend *backend) while (NULL != *pprev) { if ((*pprev)->component == backend->component) { if (hwloc_components_verbose) - fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x twice\n", + fprintf(stderr, "hwloc: Cannot enable discovery component `%s' phases 0x%x twice\n", backend->component->name, backend->component->phases); hwloc_backend_disable(backend); errno = EBUSY; @@ -983,7 +988,7 @@ hwloc_backend_enable(struct hwloc_backend *backend) } if (hwloc_components_verbose) - fprintf(stderr, "Enabling discovery component `%s' with phases 0x%x (among 0x%x)\n", + fprintf(stderr, "hwloc: Enabling discovery component `%s' with phases 0x%x (among 0x%x)\n", backend->component->name, backend->phases, backend->component->phases); /* enqueue at the end */ @@ -1067,7 +1072,7 @@ hwloc_backends_disable_all(struct hwloc_topology *topology) while (NULL != (backend = topology->backends)) { struct hwloc_backend *next = backend->next; if (hwloc_components_verbose) - fprintf(stderr, "Disabling discovery component `%s'\n", + fprintf(stderr, "hwloc: Disabling discovery component `%s'\n", backend->component->name); hwloc_backend_disable(backend); topology->backends = next; diff --git a/src/3rdparty/hwloc/src/cpukinds.c b/src/3rdparty/hwloc/src/cpukinds.c index ef6297d7..074b7a73 100644 --- a/src/3rdparty/hwloc/src/cpukinds.c +++ b/src/3rdparty/hwloc/src/cpukinds.c @@ -343,7 +343,8 @@ enum hwloc_cpukinds_ranking { HWLOC_CPUKINDS_RANKING_DEFAULT, /* forced + frequency on ARM, forced + coretype_frequency otherwise */ HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY, /* default without forced */ HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY, - HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY, + HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY, /* either coretype or frequency or both */ + HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY_STRICT, /* both coretype and frequency are required */ HWLOC_CPUKINDS_RANKING_CORETYPE, HWLOC_CPUKINDS_RANKING_FREQUENCY, HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX, @@ -358,9 +359,9 @@ hwloc__cpukinds_try_rank_by_info(struct hwloc_topology *topology, { unsigned i; - if (HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY == heuristics) { - hwloc_debug("Trying to rank cpukinds by coretype+frequency...\n"); - /* we need intel_core_type + (base or max freq) for all kinds */ + if (HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY_STRICT == heuristics) { + hwloc_debug("Trying to rank cpukinds by coretype+frequency_strict...\n"); + /* we need intel_core_type AND (base or max freq) for all kinds */ if (!summary->have_intel_core_type || (!summary->have_max_freq && !summary->have_base_freq)) return -1; @@ -373,6 +374,21 @@ hwloc__cpukinds_try_rank_by_info(struct hwloc_topology *topology, kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].max_freq; } + } else if (HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY == heuristics) { + hwloc_debug("Trying to rank cpukinds by coretype+frequency...\n"); + /* we need intel_core_type OR (base or max freq) for all kinds */ + if (!summary->have_intel_core_type + && (!summary->have_max_freq && !summary->have_base_freq)) + return -1; + /* rank first by coretype (Core>>Atom) then by frequency, base if available, max otherwise */ + for(i=0; inr_cpukinds; i++) { + struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; + if (summary->have_base_freq) + kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].base_freq; + else + kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].max_freq; + } + } else if (HWLOC_CPUKINDS_RANKING_CORETYPE == heuristics) { hwloc_debug("Trying to rank cpukinds by coretype...\n"); /* we need intel_core_type */ @@ -469,6 +485,8 @@ hwloc_internal_cpukinds_rank(struct hwloc_topology *topology) heuristics = HWLOC_CPUKINDS_RANKING_NONE; else if (!strcmp(env, "coretype+frequency")) heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY; + else if (!strcmp(env, "coretype+frequency_strict")) + heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY_STRICT; else if (!strcmp(env, "coretype")) heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE; else if (!strcmp(env, "frequency")) @@ -481,16 +499,14 @@ hwloc_internal_cpukinds_rank(struct hwloc_topology *topology) heuristics = HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY; else if (!strcmp(env, "no_forced_efficiency")) heuristics = HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY; - else if (!hwloc_hide_errors()) - fprintf(stderr, "Failed to recognize HWLOC_CPUKINDS_RANKING value %s\n", env); + else if (hwloc_hide_errors() < 2) + fprintf(stderr, "hwloc: Failed to recognize HWLOC_CPUKINDS_RANKING value %s\n", env); } if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT || heuristics == HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY) { /* default is forced_efficiency first */ struct hwloc_cpukinds_info_summary summary; - enum hwloc_cpukinds_ranking subheuristics; - const char *arch; if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT) hwloc_debug("Using default ranking strategy...\n"); @@ -508,16 +524,7 @@ hwloc_internal_cpukinds_rank(struct hwloc_topology *topology) goto failed; hwloc__cpukinds_summarize_info(topology, &summary); - arch = hwloc_obj_get_info_by_name(topology->levels[0][0], "Architecture"); - /* TODO: rather coretype_frequency only on x86/Intel? */ - if (arch && (!strncmp(arch, "arm", 3) || !strncmp(arch, "aarch", 5))) - /* then frequency on ARM */ - subheuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY; - else - /* or coretype+frequency otherwise */ - subheuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY; - - err = hwloc__cpukinds_try_rank_by_info(topology, subheuristics, &summary); + err = hwloc__cpukinds_try_rank_by_info(topology, HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY, &summary); free(summary.summaries); if (!err) goto ready; diff --git a/src/3rdparty/hwloc/src/distances.c b/src/3rdparty/hwloc/src/distances.c index c4854956..252c253e 100644 --- a/src/3rdparty/hwloc/src/distances.c +++ b/src/3rdparty/hwloc/src/distances.c @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2020 Inria. All rights reserved. + * Copyright © 2010-2021 Inria. All rights reserved. * Copyright © 2011-2012 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -17,6 +17,37 @@ static struct hwloc_internal_distances_s * hwloc__internal_distances_from_public(hwloc_topology_t topology, struct hwloc_distances_s *distances); +static void +hwloc__groups_by_distances(struct hwloc_topology *topology, unsigned nbobjs, struct hwloc_obj **objs, uint64_t *values, unsigned long kind, unsigned nbaccuracies, float *accuracies, int needcheck); + +static void +hwloc_internal_distances_restrict(hwloc_obj_t *objs, + uint64_t *indexes, + hwloc_obj_type_t *different_types, + uint64_t *values, + unsigned nbobjs, unsigned disappeared); + +static void +hwloc_internal_distances_print_matrix(struct hwloc_internal_distances_s *dist) +{ + unsigned nbobjs = dist->nbobjs; + hwloc_obj_t *objs = dist->objs; + hwloc_uint64_t *values = dist->values; + int gp = !HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type); + unsigned i, j; + + fprintf(stderr, "%s", gp ? "gp_index" : "os_index"); + for(j=0; jgp_index : objs[j]->os_index)); + fprintf(stderr, "\n"); + for(i=0; igp_index : objs[i]->os_index)); + for(j=0; jname); + free(dist->indexes); + free(dist->objs); + free(dist->different_types); + free(dist->values); + free(dist); +} -/* insert a distance matrix in the topology. - * the caller gives us the distances and objs pointers, we'll free them later. +/* prepare a distances handle for later commit in the topology. + * we duplicate the caller's name. */ -static int -hwloc_internal_distances__add(hwloc_topology_t topology, const char *name, - hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, - unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values, - unsigned long kind, unsigned iflags) +hwloc_backend_distances_add_handle_t +hwloc_backend_distances_add_create(hwloc_topology_t topology, + const char *name, unsigned long kind, unsigned long flags) { struct hwloc_internal_distances_s *dist; - if (different_types) { - kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES; /* the user isn't forced to give it */ - } else if (kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES) { + if (flags) { errno = EINVAL; goto err; } @@ -273,110 +310,54 @@ hwloc_internal_distances__add(hwloc_topology_t topology, const char *name, if (!dist) goto err; - if (name) + if (name) { dist->name = strdup(name); /* ignore failure */ - - dist->unique_type = unique_type; - dist->different_types = different_types; - dist->nbobjs = nbobjs; - dist->kind = kind; - dist->iflags = iflags; - - assert(!!(iflags & HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID) == !!objs); - - if (!objs) { - assert(indexes); - /* we only have indexes, we'll refresh objs from there */ - dist->indexes = indexes; - dist->objs = calloc(nbobjs, sizeof(hwloc_obj_t)); - if (!dist->objs) + if (!dist->name) goto err_with_dist; - - } else { - unsigned i; - assert(!indexes); - /* we only have objs, generate the indexes arrays so that we can refresh objs later */ - dist->objs = objs; - dist->indexes = malloc(nbobjs * sizeof(*dist->indexes)); - if (!dist->indexes) - goto err_with_dist; - if (HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type)) { - for(i=0; iindexes[i] = objs[i]->os_index; - } else { - for(i=0; iindexes[i] = objs[i]->gp_index; - } } - dist->values = values; + dist->kind = kind; + dist->iflags = HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED; + + dist->unique_type = HWLOC_OBJ_TYPE_NONE; + dist->different_types = NULL; + dist->nbobjs = 0; + dist->indexes = NULL; + dist->objs = NULL; + dist->values = NULL; dist->id = topology->next_dist_id++; - - if (topology->last_dist) - topology->last_dist->next = dist; - else - topology->first_dist = dist; - dist->prev = topology->last_dist; - dist->next = NULL; - topology->last_dist = dist; - return 0; + return dist; err_with_dist: - if (name) - free(dist->name); - free(dist); + hwloc_backend_distances_add__cancel(dist); err: - free(different_types); - free(objs); - free(indexes); - free(values); - return -1; + return NULL; } -int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, - hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, - unsigned long kind, unsigned long flags) +/* attach objects and values to a distances handle. + * on success, objs and values arrays are attached and will be freed with the distances. + * on failure, the handle is freed. + */ +int +hwloc_backend_distances_add_values(hwloc_topology_t topology __hwloc_attribute_unused, + hwloc_backend_distances_add_handle_t handle, + unsigned nbobjs, hwloc_obj_t *objs, + hwloc_uint64_t *values, + unsigned long flags) { - unsigned iflags = 0; /* objs not valid */ - - if (nbobjs < 2) { - errno = EINVAL; - goto err; - } - - /* cannot group without objects, - * and we don't group from XML anyway since the hwloc that generated the XML should have grouped already. - */ - if (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) { - errno = EINVAL; - goto err; - } - - return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, NULL, indexes, values, kind, iflags); - - err: - free(indexes); - free(values); - free(different_types); - return -1; -} - -static void -hwloc_internal_distances_restrict(hwloc_obj_t *objs, - uint64_t *indexes, - uint64_t *values, - unsigned nbobjs, unsigned disappeared); - -int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, - unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, - unsigned long kind, unsigned long flags) -{ - hwloc_obj_type_t unique_type, *different_types; + struct hwloc_internal_distances_s *dist = handle; + hwloc_obj_type_t unique_type, *different_types = NULL; + hwloc_uint64_t *indexes = NULL; unsigned i, disappeared = 0; - unsigned iflags = HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; - if (nbobjs < 2) { + if (dist->nbobjs || !(dist->iflags & HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED)) { + /* target distances is already set */ + errno = EINVAL; + goto err; + } + + if (flags || nbobjs < 2 || !objs || !values) { errno = EINVAL; goto err; } @@ -389,15 +370,18 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, /* some objects are NULL */ if (disappeared == nbobjs) { /* nothing left, drop the matrix */ - free(objs); - free(values); - return 0; + errno = ENOENT; + goto err; } /* restrict the matrix */ - hwloc_internal_distances_restrict(objs, NULL, values, nbobjs, disappeared); + hwloc_internal_distances_restrict(objs, NULL, NULL, values, nbobjs, disappeared); nbobjs -= disappeared; } + indexes = malloc(nbobjs * sizeof(*indexes)); + if (!indexes) + goto err; + unique_type = objs[0]->type; for(i=1; itype != unique_type) { @@ -408,16 +392,108 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, /* heterogeneous types */ different_types = malloc(nbobjs * sizeof(*different_types)); if (!different_types) - goto err; + goto err_with_indexes; for(i=0; itype; - - } else { - /* homogeneous types */ - different_types = NULL; } - if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) && !different_types) { + dist->nbobjs = nbobjs; + dist->objs = objs; + dist->iflags |= HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; + dist->indexes = indexes; + dist->unique_type = unique_type; + dist->different_types = different_types; + dist->values = values; + + if (different_types) + dist->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES; + + if (HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type)) { + for(i=0; iindexes[i] = objs[i]->os_index; + } else { + for(i=0; iindexes[i] = objs[i]->gp_index; + } + + return 0; + + err_with_indexes: + free(indexes); + err: + hwloc_backend_distances_add__cancel(dist); + return -1; +} + +/* attach objects and values to a distance handle. + * on success, objs and values arrays are attached and will be freed with the distances. + * on failure, the handle is freed. + */ +static int +hwloc_backend_distances_add_values_by_index(hwloc_topology_t topology __hwloc_attribute_unused, + hwloc_backend_distances_add_handle_t handle, + unsigned nbobjs, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, hwloc_uint64_t *indexes, + hwloc_uint64_t *values) +{ + struct hwloc_internal_distances_s *dist = handle; + hwloc_obj_t *objs; + + if (dist->nbobjs || !(dist->iflags & HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED)) { + /* target distances is already set */ + errno = EINVAL; + goto err; + } + if (nbobjs < 2 || !indexes || !values || (unique_type == HWLOC_OBJ_TYPE_NONE && !different_types)) { + errno = EINVAL; + goto err; + } + + objs = malloc(nbobjs * sizeof(*objs)); + if (!objs) + goto err; + + dist->nbobjs = nbobjs; + dist->objs = objs; + dist->indexes = indexes; + dist->unique_type = unique_type; + dist->different_types = different_types; + dist->values = values; + + if (different_types) + dist->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES; + + return 0; + + err: + hwloc_backend_distances_add__cancel(dist); + return -1; +} + +/* commit a distances handle. + * on failure, the handle is freed with its objects and values arrays. + */ +int +hwloc_backend_distances_add_commit(hwloc_topology_t topology, + hwloc_backend_distances_add_handle_t handle, + unsigned long flags) +{ + struct hwloc_internal_distances_s *dist = handle; + + if (!dist->nbobjs || !(dist->iflags & HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED)) { + /* target distances not ready for commit */ + errno = EINVAL; + goto err; + } + + if ((flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) && !dist->objs) { + /* cannot group without objects, + * and we don't group from XML anyway since the hwloc that generated the XML should have grouped already. + */ + errno = EINVAL; + goto err; + } + + if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) && !dist->different_types) { float full_accuracy = 0.f; float *accuracies; unsigned nbaccuracies; @@ -431,26 +507,94 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, } if (topology->grouping_verbose) { - unsigned j; - int gp = !HWLOC_DIST_TYPE_USE_OS_INDEX(unique_type); fprintf(stderr, "Trying to group objects using distance matrix:\n"); - fprintf(stderr, "%s", gp ? "gp_index" : "os_index"); - for(j=0; jgp_index : objs[j]->os_index)); - fprintf(stderr, "\n"); - for(i=0; igp_index : objs[i]->os_index)); - for(j=0; jnbobjs, dist->objs, dist->values, + dist->kind, nbaccuracies, accuracies, 1 /* check the first matrix */); } - return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, objs, NULL, values, kind, iflags); + if (topology->last_dist) + topology->last_dist->next = dist; + else + topology->first_dist = dist; + dist->prev = topology->last_dist; + dist->next = NULL; + topology->last_dist = dist; + + dist->iflags &= ~HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED; + return 0; + + err: + hwloc_backend_distances_add__cancel(dist); + return -1; +} + +/* all-in-one backend function not exported to plugins, only used by XML for now */ +int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, + hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, + unsigned long kind, unsigned long flags) +{ + hwloc_backend_distances_add_handle_t handle; + int err; + + handle = hwloc_backend_distances_add_create(topology, name, kind, 0); + if (!handle) + goto err; + + err = hwloc_backend_distances_add_values_by_index(topology, handle, + nbobjs, unique_type, different_types, indexes, + values); + if (err < 0) + goto err; + + /* arrays are now attached to the handle */ + indexes = NULL; + different_types = NULL; + values = NULL; + + err = hwloc_backend_distances_add_commit(topology, handle, flags); + if (err < 0) + goto err; + + return 0; + + err: + free(indexes); + free(different_types); + free(values); + return -1; +} + +/* all-in-one backend function not exported to plugins, used by OS backends */ +int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, + unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, + unsigned long kind, unsigned long flags) +{ + hwloc_backend_distances_add_handle_t handle; + int err; + + handle = hwloc_backend_distances_add_create(topology, name, kind, 0); + if (!handle) + goto err; + + err = hwloc_backend_distances_add_values(topology, handle, + nbobjs, objs, + values, + 0); + if (err < 0) + goto err; + + /* arrays are now attached to the handle */ + objs = NULL; + values = NULL; + + err = hwloc_backend_distances_add_commit(topology, handle, flags); + if (err < 0) + goto err; + + return 0; err: free(objs); @@ -458,44 +602,54 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, return -1; } +/******************************** + * User API for adding distances + */ + #define HWLOC_DISTANCES_KIND_FROM_ALL (HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_FROM_USER) #define HWLOC_DISTANCES_KIND_MEANS_ALL (HWLOC_DISTANCES_KIND_MEANS_LATENCY|HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH) -#define HWLOC_DISTANCES_KIND_ALL (HWLOC_DISTANCES_KIND_FROM_ALL|HWLOC_DISTANCES_KIND_MEANS_ALL) +#define HWLOC_DISTANCES_KIND_ALL (HWLOC_DISTANCES_KIND_FROM_ALL|HWLOC_DISTANCES_KIND_MEANS_ALL|HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES) #define HWLOC_DISTANCES_ADD_FLAG_ALL (HWLOC_DISTANCES_ADD_FLAG_GROUP|HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE) -/* The actual function exported to the user - */ -int hwloc_distances_add(hwloc_topology_t topology, - unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values, - unsigned long kind, unsigned long flags) +void * hwloc_distances_add_create(hwloc_topology_t topology, + const char *name, unsigned long kind, + unsigned long flags) +{ + if (!topology->is_loaded) { + errno = EINVAL; + return NULL; + } + if (topology->adopted_shmem_addr) { + errno = EPERM; + return NULL; + } + if ((kind & ~HWLOC_DISTANCES_KIND_ALL) + || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) != 1 + || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) != 1) { + errno = EINVAL; + return NULL; + } + + return hwloc_backend_distances_add_create(topology, name, kind, flags); +} + +int hwloc_distances_add_values(hwloc_topology_t topology, + void *handle, + unsigned nbobjs, hwloc_obj_t *objs, + hwloc_uint64_t *values, + unsigned long flags) { unsigned i; uint64_t *_values; hwloc_obj_t *_objs; int err; - if (nbobjs < 2 || !objs || !values || !topology->is_loaded) { - errno = EINVAL; - return -1; - } - if (topology->adopted_shmem_addr) { - errno = EPERM; - return -1; - } - if ((kind & ~HWLOC_DISTANCES_KIND_ALL) - || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) != 1 - || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) != 1 - || (flags & ~HWLOC_DISTANCES_ADD_FLAG_ALL)) { - errno = EINVAL; - return -1; - } - /* no strict need to check for duplicates, things shouldn't break */ for(i=1; iindexes, dist->values, nbobjs, disappeared); + hwloc_internal_distances_restrict(objs, dist->indexes, dist->different_types, dist->values, nbobjs, disappeared); dist->nbobjs -= disappeared; } @@ -1087,3 +1300,210 @@ hwloc__groups_by_distances(struct hwloc_topology *topology, out_with_groupids: free(groupids); } + +static int +hwloc__distances_transform_remove_null(struct hwloc_distances_s *distances) +{ + hwloc_uint64_t *values = distances->values; + hwloc_obj_t *objs = distances->objs; + unsigned i, nb, nbobjs = distances->nbobjs; + hwloc_obj_type_t unique_type; + + for(i=0, nb=0; inbobjs = nb; + + /* update HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES for convenience */ + unique_type = objs[0]->type; + for(i=1; itype != unique_type) { + unique_type = HWLOC_OBJ_TYPE_NONE; + break; + } + if (unique_type == HWLOC_OBJ_TYPE_NONE) + distances->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES; + else + distances->kind &= ~HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES; + + return 0; +} + +static int +hwloc__distances_transform_links(struct hwloc_distances_s *distances) +{ + /* FIXME: we should look for the greatest common denominator + * but we just use the smallest positive value, that's enough for current use-cases. + * We'll return -1 in other cases. + */ + hwloc_uint64_t divider, *values = distances->values; + unsigned i, nbobjs = distances->nbobjs; + + if (!(distances->kind & HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH)) { + errno = EINVAL; + return -1; + } + + for(i=0; isubtype && !strcmp(obj->subtype, "NVSwitch"); +} + +static int +hwloc__distances_transform_merge_switch_ports(hwloc_topology_t topology, + struct hwloc_distances_s *distances) +{ + struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances); + hwloc_obj_t *objs = distances->objs; + hwloc_uint64_t *values = distances->values; + unsigned first, i, j, nbobjs = distances->nbobjs; + + if (strcmp(dist->name, "NVLinkBandwidth")) { + errno = EINVAL; + return -1; + } + + /* find the first port */ + first = (unsigned) -1; + for(i=0; iobjs; + hwloc_uint64_t *values = distances->values; + unsigned nbobjs = distances->nbobjs; + unsigned i, j, k; + + if (strcmp(dist->name, "NVLinkBandwidth")) { + errno = EINVAL; + return -1; + } + + for(i=0; i bw_sw2j ? bw_sw2j : bw_i2sw; + } + } + + return 0; +} + +int +hwloc_distances_transform(hwloc_topology_t topology, + struct hwloc_distances_s *distances, + enum hwloc_distances_transform_e transform, + void *transform_attr, + unsigned long flags) +{ + if (flags || transform_attr) { + errno = EINVAL; + return -1; + } + + switch (transform) { + case HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL: + return hwloc__distances_transform_remove_null(distances); + case HWLOC_DISTANCES_TRANSFORM_LINKS: + return hwloc__distances_transform_links(distances); + case HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS: + { + int err; + err = hwloc__distances_transform_merge_switch_ports(topology, distances); + if (!err) + err = hwloc__distances_transform_remove_null(distances); + return err; + } + case HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE: + return hwloc__distances_transform_transitive_closure(topology, distances); + default: + errno = EINVAL; + return -1; + } +} diff --git a/src/3rdparty/hwloc/src/pci-common.c b/src/3rdparty/hwloc/src/pci-common.c index 1149113b..24626860 100644 --- a/src/3rdparty/hwloc/src/pci-common.c +++ b/src/3rdparty/hwloc/src/pci-common.c @@ -1,5 +1,5 @@ /* - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2021 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -146,8 +146,9 @@ hwloc_pci_discovery_prepare(struct hwloc_topology *topology) } free(buffer); } else { - fprintf(stderr, "Ignoring HWLOC_PCI_LOCALITY file `%s' too large (%lu bytes)\n", - env, (unsigned long) st.st_size); + if (hwloc_hide_errors() < 2) + fprintf(stderr, "hwloc/pci: Ignoring HWLOC_PCI_LOCALITY file `%s' too large (%lu bytes)\n", + env, (unsigned long) st.st_size); } } close(fd); @@ -206,8 +207,11 @@ hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused, else hwloc_debug("%s Bridge [%04x:%04x]", busid, pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id); - hwloc_debug(" to %04x:[%02x:%02x]\n", - pcidev->attr->bridge.downstream.pci.domain, pcidev->attr->bridge.downstream.pci.secondary_bus, pcidev->attr->bridge.downstream.pci.subordinate_bus); + if (pcidev->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI) + hwloc_debug(" to %04x:[%02x:%02x]\n", + pcidev->attr->bridge.downstream.pci.domain, pcidev->attr->bridge.downstream.pci.secondary_bus, pcidev->attr->bridge.downstream.pci.subordinate_bus); + else + assert(0); } else hwloc_debug("%s Device [%04x:%04x (%04x:%04x) rev=%02x class=%04x]\n", busid, pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id, @@ -251,11 +255,11 @@ hwloc_pci_compare_busids(struct hwloc_obj *a, struct hwloc_obj *b) if (a->attr->pcidev.domain > b->attr->pcidev.domain) return HWLOC_PCI_BUSID_HIGHER; - if (a->type == HWLOC_OBJ_BRIDGE + if (a->type == HWLOC_OBJ_BRIDGE && a->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI && b->attr->pcidev.bus >= a->attr->bridge.downstream.pci.secondary_bus && b->attr->pcidev.bus <= a->attr->bridge.downstream.pci.subordinate_bus) return HWLOC_PCI_BUSID_SUPERSET; - if (b->type == HWLOC_OBJ_BRIDGE + if (b->type == HWLOC_OBJ_BRIDGE && b->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI && a->attr->pcidev.bus >= b->attr->bridge.downstream.pci.secondary_bus && a->attr->pcidev.bus <= b->attr->bridge.downstream.pci.subordinate_bus) return HWLOC_PCI_BUSID_INCLUDED; @@ -302,7 +306,7 @@ hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_firs new->next_sibling = *curp; *curp = new; new->parent = parent; - if (new->type == HWLOC_OBJ_BRIDGE) { + if (new->type == HWLOC_OBJ_BRIDGE && new->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI) { /* look at remaining siblings and move some below new */ childp = &new->io_first_child; curp = &new->next_sibling; @@ -329,7 +333,7 @@ hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_firs } case HWLOC_PCI_BUSID_EQUAL: { static int reported = 0; - if (!reported && !hwloc_hide_errors()) { + if (!reported && hwloc_hide_errors() < 2) { fprintf(stderr, "*********************************************************\n"); fprintf(stderr, "* hwloc %s received invalid PCI information.\n", HWLOC_VERSION); fprintf(stderr, "*\n"); @@ -411,7 +415,7 @@ hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology, dstnextp = &child->next_sibling; /* compute hostbridge secondary/subordinate buses */ - if (child->type == HWLOC_OBJ_BRIDGE + if (child->type == HWLOC_OBJ_BRIDGE && child->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI && child->attr->bridge.downstream.pci.subordinate_bus > current_subordinate) current_subordinate = child->attr->bridge.downstream.pci.subordinate_bus; @@ -486,7 +490,8 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide if (env) { static int reported = 0; if (!topology->pci_has_forced_locality && !reported) { - fprintf(stderr, "Environment variable %s is deprecated, please use HWLOC_PCI_LOCALITY instead.\n", env); + if (!hwloc_hide_errors()) + fprintf(stderr, "hwloc/pci: Environment variable %s is deprecated, please use HWLOC_PCI_LOCALITY instead.\n", env); reported = 1; } if (*env) { @@ -565,7 +570,7 @@ hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tre assert(pciobj->type == HWLOC_OBJ_PCI_DEVICE || (pciobj->type == HWLOC_OBJ_BRIDGE && pciobj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)); - if (obj->type == HWLOC_OBJ_BRIDGE) { + if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI) { domain = obj->attr->bridge.downstream.pci.domain; bus_min = obj->attr->bridge.downstream.pci.secondary_bus; bus_max = obj->attr->bridge.downstream.pci.subordinate_bus; diff --git a/src/3rdparty/hwloc/src/topology-windows.c b/src/3rdparty/hwloc/src/topology-windows.c index b6458b6f..d67c6b99 100644 --- a/src/3rdparty/hwloc/src/topology-windows.c +++ b/src/3rdparty/hwloc/src/topology-windows.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2021 Inria. All rights reserved. * Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -11,6 +11,7 @@ #include "private/autogen/config.h" #include "hwloc.h" +#include "hwloc/windows.h" #include "private/private.h" #include "private/debug.h" @@ -190,9 +191,6 @@ typedef struct _PROCESSOR_NUMBER { typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORGROUPCOUNT)(void); static PFN_GETACTIVEPROCESSORGROUPCOUNT GetActiveProcessorGroupCountProc; -static unsigned long nr_processor_groups = 1; -static unsigned long max_numanode_index = 0; - typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORCOUNT)(WORD); static PFN_GETACTIVEPROCESSORCOUNT GetActiveProcessorCountProc; @@ -270,9 +268,6 @@ static void hwloc_win_get_function_ptrs(void) (PFN_VIRTUALFREEEX) GetProcAddress(kernel32, "VirtualFreeEx"); } - if (GetActiveProcessorGroupCountProc) - nr_processor_groups = GetActiveProcessorGroupCountProc(); - if (!QueryWorkingSetExProc) { HMODULE psapi = LoadLibrary("psapi.dll"); if (psapi) @@ -363,6 +358,171 @@ static int hwloc_bitmap_to_single_ULONG_PTR(hwloc_const_bitmap_t set, unsigned * return 0; } +/********************** + * Processor Groups + */ + +static unsigned long max_numanode_index = 0; + +static unsigned long nr_processor_groups = 1; +static hwloc_cpuset_t * processor_group_cpusets = NULL; + +static void +hwloc_win_get_processor_groups(void) +{ + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX procInfoTotal, tmpprocInfoTotal, procInfo; + DWORD length; + unsigned i; + + hwloc_debug("querying windows processor groups\n"); + + if (!GetActiveProcessorGroupCountProc || !GetLogicalProcessorInformationExProc) + goto error; + + nr_processor_groups = GetActiveProcessorGroupCountProc(); + if (!nr_processor_groups) + goto error; + + hwloc_debug("found %lu windows processor groups\n", nr_processor_groups); + + if (nr_processor_groups > 1 && SIZEOF_VOID_P == 4) { + if (!hwloc_hide_errors()) + fprintf(stderr, "hwloc: multiple processor groups found on 32bits Windows, topology may be invalid/incomplete.\n"); + } + + length = 0; + procInfoTotal = NULL; + + while (1) { + if (GetLogicalProcessorInformationExProc(RelationGroup, procInfoTotal, &length)) + break; + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) + goto error; + tmpprocInfoTotal = realloc(procInfoTotal, length); + if (!tmpprocInfoTotal) + goto error_with_procinfo; + procInfoTotal = tmpprocInfoTotal; + } + + processor_group_cpusets = calloc(nr_processor_groups, sizeof(*processor_group_cpusets)); + if (!processor_group_cpusets) + goto error_with_procinfo; + + for (procInfo = procInfoTotal; + (void*) procInfo < (void*) ((uintptr_t) procInfoTotal + length); + procInfo = (void*) ((uintptr_t) procInfo + procInfo->Size)) { + unsigned id; + + assert(procInfo->Relationship == RelationGroup); + + for (id = 0; id < procInfo->Group.ActiveGroupCount; id++) { + KAFFINITY mask; + hwloc_bitmap_t set; + + set = hwloc_bitmap_alloc(); + if (!set) + goto error_with_cpusets; + + mask = procInfo->Group.GroupInfo[id].ActiveProcessorMask; + hwloc_debug("group %u %d cpus mask %lx\n", id, + procInfo->Group.GroupInfo[id].ActiveProcessorCount, mask); + /* KAFFINITY is ULONG_PTR */ + hwloc_bitmap_set_ith_ULONG_PTR(set, id, mask); + /* FIXME: what if running 32bits on a 64bits windows with 64-processor groups? + * ULONG_PTR is 32bits, so half the group is invisible? + * maybe scale id to id*8/sizeof(ULONG_PTR) so that groups are 64-PU aligned? + */ + hwloc_debug_2args_bitmap("group %u %d bitmap %s\n", id, procInfo->Group.GroupInfo[id].ActiveProcessorCount, set); + processor_group_cpusets[id] = set; + } + } + + free(procInfoTotal); + return; + + error_with_cpusets: + for(i=0; iis_loaded || !topology->is_thissystem) { + errno = EINVAL; + return -1; + } + + if (flags) { + errno = EINVAL; + return -1; + } + + return nr_processor_groups; +} + +int +hwloc_windows_get_processor_group_cpuset(hwloc_topology_t topology, unsigned pg_index, hwloc_cpuset_t cpuset, unsigned long flags) +{ + if (!topology->is_loaded || !topology->is_thissystem) { + errno = EINVAL; + return -1; + } + + if (!cpuset) { + errno = EINVAL; + return -1; + } + + if (flags) { + errno = EINVAL; + return -1; + } + + if (pg_index >= nr_processor_groups) { + errno = ENOENT; + return -1; + } + + if (!processor_group_cpusets) { + assert(nr_processor_groups == 1); + /* we found no processor groups, return the entire topology as a single one */ + hwloc_bitmap_copy(cpuset, topology->levels[0][0]->cpuset); + return 0; + } + + if (!processor_group_cpusets[pg_index]) { + errno = ENOENT; + return -1; + } + + hwloc_bitmap_copy(cpuset, processor_group_cpusets[pg_index]); + return 0; +} + /************************************************************** * hwloc PU numbering with respect to Windows processor groups * @@ -1328,11 +1488,13 @@ hwloc_set_windows_hooks(struct hwloc_binding_hooks *hooks, static int hwloc_windows_component_init(unsigned long flags __hwloc_attribute_unused) { hwloc_win_get_function_ptrs(); + hwloc_win_get_processor_groups(); return 0; } static void hwloc_windows_component_finalize(unsigned long flags __hwloc_attribute_unused) { + hwloc_win_free_processor_groups(); } static struct hwloc_backend * diff --git a/src/3rdparty/hwloc/src/topology-x86.c b/src/3rdparty/hwloc/src/topology-x86.c index 71e396e2..c326371b 100644 --- a/src/3rdparty/hwloc/src/topology-x86.c +++ b/src/3rdparty/hwloc/src/topology-x86.c @@ -7,11 +7,14 @@ * * This backend is only used when the operating system does not export * the necessary hardware topology information to user-space applications. - * Currently, only the FreeBSD backend relies on this x86 backend. + * Currently, FreeBSD and NetBSD only add PUs and then fallback to this + * backend for CPU/Cache discovery. * * Other backends such as Linux have their own way to retrieve various * pieces of hardware topology information from the operating system * on various architectures, without having to use this x86-specific code. + * But this backend is still used after them to annotate some objects with + * additional details (CPU info in Package, Inclusiveness in Caches). */ #include "private/autogen/config.h" @@ -1257,7 +1260,8 @@ static int look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long flags, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type, int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags), - int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags)) + int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags), + hwloc_bitmap_t restrict_set) { struct hwloc_x86_backend_data_s *data = backend->private_data; struct hwloc_topology *topology = backend->topology; @@ -1277,6 +1281,12 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long for (i = 0; i < nbprocs; i++) { struct cpuiddump *src_cpuiddump = NULL; + + if (restrict_set && !hwloc_bitmap_isset(restrict_set, i)) { + /* skip this CPU outside of the binding mask */ + continue; + } + if (data->src_cpuiddump_path) { src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, i); if (!src_cpuiddump) @@ -1410,6 +1420,7 @@ static int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) { struct hwloc_x86_backend_data_s *data = backend->private_data; + struct hwloc_topology *topology = backend->topology; unsigned nbprocs = data->nbprocs; unsigned eax, ebx, ecx = 0, edx; unsigned i; @@ -1425,9 +1436,21 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) struct hwloc_topology_membind_support memsupport __hwloc_attribute_unused; int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags) = NULL; int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags) = NULL; + hwloc_bitmap_t restrict_set = NULL; struct cpuiddump *src_cpuiddump = NULL; int ret = -1; + /* check if binding works */ + memset(&hooks, 0, sizeof(hooks)); + support.membind = &memsupport; + /* We could just copy the main hooks (except in some corner cases), + * but the current overhead is negligible, so just always reget them. + */ + hwloc_set_native_binding_hooks(&hooks, &support); + /* in theory, those are only needed if !data->src_cpuiddump_path || HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_BINDING + * but that's the vast majority of cases anyway, and the overhead is very small. + */ + if (data->src_cpuiddump_path) { /* Just read cpuid from the dump (implies !topology->is_thissystem by default) */ src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, 0); @@ -1440,13 +1463,6 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) * we may still force use this backend when debugging with !thissystem. */ - /* check if binding works */ - memset(&hooks, 0, sizeof(hooks)); - support.membind = &memsupport; - /* We could just copy the main hooks (except in some corner cases), - * but the current overhead is negligible, so just always reget them. - */ - hwloc_set_native_binding_hooks(&hooks, &support); if (hooks.get_thisthread_cpubind && hooks.set_thisthread_cpubind) { get_cpubind = hooks.get_thisthread_cpubind; set_cpubind = hooks.set_thisthread_cpubind; @@ -1466,6 +1482,20 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) } } + if (topology->flags & HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING) { + restrict_set = hwloc_bitmap_alloc(); + if (!restrict_set) + goto out; + if (hooks.get_thisproc_cpubind) + hooks.get_thisproc_cpubind(topology, restrict_set, 0); + else if (hooks.get_thisthread_cpubind) + hooks.get_thisthread_cpubind(topology, restrict_set, 0); + if (hwloc_bitmap_iszero(restrict_set)) { + hwloc_bitmap_free(restrict_set); + restrict_set = NULL; + } + } + if (!src_cpuiddump && !hwloc_have_x86_cpuid()) goto out; @@ -1530,7 +1560,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) ret = look_procs(backend, infos, flags, highest_cpuid, highest_ext_cpuid, features, cpuid_type, - get_cpubind, set_cpubind); + get_cpubind, set_cpubind, restrict_set); if (!ret) /* success, we're done */ goto out_with_os_state; @@ -1555,6 +1585,7 @@ out_with_infos: } out: + hwloc_bitmap_free(restrict_set); if (src_cpuiddump) cpuiddump_free(src_cpuiddump); return ret; @@ -1571,6 +1602,11 @@ hwloc_x86_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dsta assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + if (topology->flags & HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING) { + /* TODO: Things would work if there's a single PU, no need to rebind */ + return 0; + } + if (getenv("HWLOC_X86_TOPOEXT_NUMANODES")) { flags |= HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES; } diff --git a/src/3rdparty/hwloc/src/topology-xml.c b/src/3rdparty/hwloc/src/topology-xml.c index 6aacc052..87e91010 100644 --- a/src/3rdparty/hwloc/src/topology-xml.c +++ b/src/3rdparty/hwloc/src/topology-xml.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2021 Inria. All rights reserved. * Copyright © 2009-2011, 2020 Université Bordeaux * Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -192,8 +192,9 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, || lvalue == HWLOC_OBJ_CACHE_INSTRUCTION) obj->attr->cache.type = (hwloc_obj_cache_type_t) lvalue; else - fprintf(stderr, "%s: ignoring invalid cache_type attribute %lu\n", - state->global->msgprefix, lvalue); + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring invalid cache_type attribute %lu\n", + state->global->msgprefix, lvalue); } else if (hwloc__xml_verbose()) fprintf(stderr, "%s: ignoring cache_type attribute for non-cache object type\n", state->global->msgprefix); @@ -262,8 +263,8 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, #ifndef HWLOC_HAVE_32BITS_PCI_DOMAIN } else if (domain > 0xffff) { static int warned = 0; - if (!warned && !hwloc_hide_errors()) - fprintf(stderr, "Ignoring PCI device with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n"); + if (!warned && hwloc_hide_errors() < 2) + fprintf(stderr, "hwloc/xml: Ignoring PCI device with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n"); warned = 1; *ignore = 1; #endif @@ -337,6 +338,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, } else { obj->attr->bridge.upstream_type = (hwloc_obj_bridge_type_t) upstream_type; obj->attr->bridge.downstream_type = (hwloc_obj_bridge_type_t) downstream_type; + /* FIXME verify that upstream/downstream type is valid */ }; break; } @@ -361,12 +363,13 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, #ifndef HWLOC_HAVE_32BITS_PCI_DOMAIN } else if (domain > 0xffff) { static int warned = 0; - if (!warned && !hwloc_hide_errors()) - fprintf(stderr, "Ignoring bridge to PCI with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n"); + if (!warned && hwloc_hide_errors() < 2) + fprintf(stderr, "hwloc/xml: Ignoring bridge to PCI with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n"); warned = 1; *ignore = 1; #endif } else { + /* FIXME verify that downstream type vs pci info are valid */ obj->attr->bridge.downstream.pci.domain = domain; obj->attr->bridge.downstream.pci.secondary_bus = secbus; obj->attr->bridge.downstream.pci.subordinate_bus = subbus; @@ -1232,7 +1235,7 @@ hwloc__xml_import_object(hwloc_topology_t topology, /* next should be before cur */ if (!childrengotignored) { static int reported = 0; - if (!reported && !hwloc_hide_errors()) { + if (!reported && hwloc_hide_errors() < 2) { hwloc__xml_import_report_outoforder(topology, next, cur); reported = 1; } @@ -1565,7 +1568,7 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, } } - hwloc_internal_distances_add_by_index(topology, name, unique_type, different_types, nbobjs, indexes, u64values, kind, 0); + hwloc_internal_distances_add_by_index(topology, name, unique_type, different_types, nbobjs, indexes, u64values, kind, 0 /* assume grouping was applied when this matrix was discovered before exporting to XML */); /* prevent freeing below */ indexes = NULL; @@ -2647,7 +2650,8 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo logical_to_v2array = malloc(nbobjs * sizeof(*logical_to_v2array)); if (!logical_to_v2array) { - fprintf(stderr, "xml/export/v1: failed to allocated logical_to_v2array\n"); + if (!hwloc_hide_errors()) + fprintf(stderr, "hwloc/xml/export/v1: failed to allocated logical_to_v2array\n"); continue; } diff --git a/src/3rdparty/hwloc/src/topology.c b/src/3rdparty/hwloc/src/topology.c index 3944f3c1..01e5a863 100644 --- a/src/3rdparty/hwloc/src/topology.c +++ b/src/3rdparty/hwloc/src/topology.c @@ -52,6 +52,42 @@ #include #endif +/* + * Define ZES_ENABLE_SYSMAN=1 early so that the LevelZero backend gets Sysman enabled. + * Use the constructor if supported and/or the Windows DllMain callback. + * Do it in the main hwloc library instead of the levelzero component because + * the latter could be loaded later as a plugin. + * + * L0 seems to be using getenv() to check this variable on Windows + * (at least in the Intel Compute-Runtime of March 2021), + * so use putenv() to set the variable. + * + * For the record, Get/SetEnvironmentVariable() is not exactly the same as getenv/putenv(): + * - getenv() doesn't see what was set with SetEnvironmentVariable() + * - GetEnvironmentVariable() doesn't see putenv() in cygwin (while it does in MSVC and MinGW). + * Hence, if L0 ever switches from getenv() to GetEnvironmentVariable(), + * it will break in cygwin, we'll have to use both putenv() and SetEnvironmentVariable(). + * Hopefully L0 will be provide a way to enable Sysman without env vars before it happens. + */ +#ifdef HWLOC_HAVE_ATTRIBUTE_CONSTRUCTOR +static void hwloc_constructor(void) __attribute__((constructor)); +static void hwloc_constructor(void) +{ + if (!getenv("ZES_ENABLE_SYSMAN")) + putenv((char *) "ZES_ENABLE_SYSMAN=1"); +} +#endif +#ifdef HWLOC_WIN_SYS +BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) +{ + if (fdwReason == DLL_PROCESS_ATTACH) { + if (!getenv("ZES_ENABLE_SYSMAN")) + putenv((char *) "ZES_ENABLE_SYSMAN=1"); + } + return TRUE; +} +#endif + unsigned hwloc_get_api_version(void) { return HWLOC_API_VERSION; @@ -64,7 +100,7 @@ int hwloc_topology_abi_check(hwloc_topology_t topology) int hwloc_hide_errors(void) { - static int hide = 0; + static int hide = 1; /* only show critical errors by default. lstopo will show others */ static int checked = 0; if (!checked) { const char *envvar = getenv("HWLOC_HIDE_ERRORS"); @@ -106,7 +142,7 @@ static void report_insert_error(hwloc_obj_t new, hwloc_obj_t old, const char *ms { static int reported = 0; - if (reason && !reported && !hwloc_hide_errors()) { + if (reason && !reported && hwloc_hide_errors() < 2) { char newstr[512]; char oldstr[512]; report_insert_error_format_obj(newstr, sizeof(newstr), new); @@ -2307,9 +2343,15 @@ hwloc__filter_bridges(hwloc_topology_t topology, hwloc_obj_t root, unsigned dept child->attr->bridge.depth = depth; - if (child->type == HWLOC_OBJ_BRIDGE - && filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT - && !child->io_first_child) { + /* remove bridges that have no child, + * and pci-to-non-pci bridges (pcidev) that no child either. + * keep NVSwitch since they may be used in NVLink matrices. + */ + if (filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT + && !child->io_first_child + && (child->type == HWLOC_OBJ_BRIDGE + || (child->type == HWLOC_OBJ_PCI_DEVICE && (child->attr->pcidev.class_id >> 8) == 0x06 + && (!child->subtype || strcmp(child->subtype, "NVSwitch"))))) { unlink_and_free_single_object(pchild); topology->modified = 1; } @@ -3088,7 +3130,8 @@ hwloc_connect_levels(hwloc_topology_t topology) tmpnbobjs = realloc(topology->level_nbobjects, 2 * topology->nb_levels_allocated * sizeof(*topology->level_nbobjects)); if (!tmplevels || !tmpnbobjs) { - fprintf(stderr, "hwloc failed to realloc level arrays to %u\n", topology->nb_levels_allocated * 2); + if (hwloc_hide_errors() < 2) + fprintf(stderr, "hwloc: failed to realloc level arrays to %u\n", topology->nb_levels_allocated * 2); /* if one realloc succeeded, make sure the caller will free the new buffer */ if (tmplevels) @@ -3470,15 +3513,18 @@ hwloc_discover(struct hwloc_topology *topology, hwloc_debug("%s", "\nRemoving empty objects\n"); remove_empty(topology, &topology->levels[0][0]); if (!topology->levels[0][0]) { - fprintf(stderr, "Topology became empty, aborting!\n"); + if (hwloc_hide_errors() < 2) + fprintf(stderr, "hwloc: Topology became empty, aborting!\n"); return -1; } if (hwloc_bitmap_iszero(topology->levels[0][0]->cpuset)) { - fprintf(stderr, "Topology does not contain any PU, aborting!\n"); + if (hwloc_hide_errors() < 2) + fprintf(stderr, "hwloc: Topology does not contain any PU, aborting!\n"); return -1; } if (hwloc_bitmap_iszero(topology->levels[0][0]->nodeset)) { - fprintf(stderr, "Topology does not contain any NUMA node, aborting!\n"); + if (hwloc_hide_errors() < 2) + fprintf(stderr, "hwloc: Topology does not contain any NUMA node, aborting!\n"); return -1; } hwloc_debug_print_objects(0, topology->levels[0][0]); @@ -3716,7 +3762,18 @@ hwloc_topology_set_flags (struct hwloc_topology *topology, unsigned long flags) return -1; } - if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES|HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT)) { + if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES|HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT|HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING|HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING|HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING)) { + errno = EINVAL; + return -1; + } + + if ((flags & (HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) == HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING) { + /* RESTRICT_TO_CPUBINDING requires THISSYSTEM for binding */ + errno = EINVAL; + return -1; + } + if ((flags & (HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) == HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING) { + /* RESTRICT_TO_MEMBINDING requires THISSYSTEM for binding */ errno = EINVAL; return -1; } @@ -4003,6 +4060,31 @@ hwloc_topology_load (struct hwloc_topology *topology) topology->is_loaded = 1; + if (topology->flags & HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING) { + /* FIXME: filter directly in backends during the discovery. + * Only x86 does it because binding may cause issues on Windows. + */ + hwloc_bitmap_t set = hwloc_bitmap_alloc(); + if (set) { + err = hwloc_get_cpubind(topology, set, HWLOC_CPUBIND_STRICT); + if (!err) + hwloc_topology_restrict(topology, set, 0); + hwloc_bitmap_free(set); + } + } + if (topology->flags & HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING) { + /* FIXME: filter directly in backends during the discovery. + */ + hwloc_bitmap_t set = hwloc_bitmap_alloc(); + hwloc_membind_policy_t policy; + if (set) { + err = hwloc_get_membind(topology, set, &policy, HWLOC_MEMBIND_STRICT | HWLOC_MEMBIND_BYNODESET); + if (!err) + hwloc_topology_restrict(topology, set, HWLOC_RESTRICT_FLAG_BYNODESET); + hwloc_bitmap_free(set); + } + } + if (topology->backend_phases & HWLOC_DISC_PHASE_TWEAK) { dstatus.phase = HWLOC_DISC_PHASE_TWEAK; hwloc_discover_by_phase(topology, &dstatus, "TWEAK"); diff --git a/src/3rdparty/hwloc/src/traversal.c b/src/3rdparty/hwloc/src/traversal.c index f9076ab5..6765d702 100644 --- a/src/3rdparty/hwloc/src/traversal.c +++ b/src/3rdparty/hwloc/src/traversal.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2020 Inria. All rights reserved. + * Copyright © 2009-2021 Inria. All rights reserved. * Copyright © 2009-2010, 2020 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -395,6 +395,8 @@ hwloc_type_sscanf(const char *string, hwloc_obj_type_t *typep, } else if (hwloc__type_match(string, "pcibridge", 5)) { type = HWLOC_OBJ_BRIDGE; ubtype = HWLOC_OBJ_BRIDGE_PCI; + /* if downstream_type can ever be non-PCI, we'll have to make strings more precise, + * or relax the hwloc_type_sscanf test */ } else if (hwloc__type_match(string, "pcidev", 3)) { type = HWLOC_OBJ_PCI_DEVICE; @@ -448,7 +450,9 @@ hwloc_type_sscanf(const char *string, hwloc_obj_type_t *typep, attrp->group.depth = depthattr; } else if (type == HWLOC_OBJ_BRIDGE && attrsize >= sizeof(attrp->bridge)) { attrp->bridge.upstream_type = ubtype; - attrp->bridge.downstream_type = HWLOC_OBJ_BRIDGE_PCI; /* nothing else so far */ + attrp->bridge.downstream_type = HWLOC_OBJ_BRIDGE_PCI; + /* if downstream_type can ever be non-PCI, we'll have to make strings more precise, + * or relax the hwloc_type_sscanf test */ } else if (type == HWLOC_OBJ_OS_DEVICE && attrsize >= sizeof(attrp->osdev)) { attrp->osdev.type = ostype; } @@ -531,6 +535,9 @@ hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t else return hwloc_snprintf(string, size, "%s", hwloc_obj_type_string(type)); case HWLOC_OBJ_BRIDGE: + /* if downstream_type can ever be non-PCI, we'll have to make strings more precise, + * or relax the hwloc_type_sscanf test */ + assert(obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI); return hwloc_snprintf(string, size, obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI ? "PCIBridge" : "HostBridge"); case HWLOC_OBJ_PCI_DEVICE: return hwloc_snprintf(string, size, "PCI"); @@ -648,8 +655,11 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t } else *up = '\0'; /* downstream is_PCI */ - snprintf(down, sizeof(down), "buses=%04x:[%02x-%02x]", - obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus); + if (obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI) { + snprintf(down, sizeof(down), "buses=%04x:[%02x-%02x]", + obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus); + } else + assert(0); if (*up) res = hwloc_snprintf(string, size, "%s%s%s", up, separator, down); else @@ -736,3 +746,92 @@ int hwloc_bitmap_singlify_per_core(hwloc_topology_t topology, hwloc_bitmap_t cpu } return 0; } + +hwloc_obj_t +hwloc_get_obj_with_same_locality(hwloc_topology_t topology, hwloc_obj_t src, + hwloc_obj_type_t type, const char *subtype, const char *nameprefix, + unsigned long flags) +{ + if (flags) { + errno = EINVAL; + return NULL; + } + + if (hwloc_obj_type_is_normal(src->type) || hwloc_obj_type_is_memory(src->type)) { + /* normal/memory type, look for normal/memory type with same sets */ + hwloc_obj_t obj; + + if (!hwloc_obj_type_is_normal(type) && !hwloc_obj_type_is_memory(type)) { + errno = EINVAL; + return NULL; + } + + obj = NULL; + while ((obj = hwloc_get_next_obj_by_type(topology, type, obj)) != NULL) { + if (!hwloc_bitmap_isequal(src->cpuset, obj->cpuset) + || !hwloc_bitmap_isequal(src->nodeset, obj->nodeset)) + continue; + if (subtype && (!obj->subtype || strcasecmp(subtype, obj->subtype))) + continue; + if (nameprefix && (!obj->name || hwloc_strncasecmp(nameprefix, obj->name, strlen(nameprefix)))) + continue; + return obj; + } + errno = ENOENT; + return NULL; + + } else if (hwloc_obj_type_is_io(src->type)) { + /* I/O device, look for PCI/OS in same PCI */ + hwloc_obj_t pci; + + if ((src->type != HWLOC_OBJ_OS_DEVICE && src->type != HWLOC_OBJ_PCI_DEVICE) + || (type != HWLOC_OBJ_OS_DEVICE && type != HWLOC_OBJ_PCI_DEVICE)) { + errno = EINVAL; + return NULL; + } + + /* walk up to find the container */ + pci = src; + while (pci->type == HWLOC_OBJ_OS_DEVICE) + pci = pci->parent; + + if (type == HWLOC_OBJ_PCI_DEVICE) { + if (pci->type != HWLOC_OBJ_PCI_DEVICE) { + errno = ENOENT; + return NULL; + } + if (subtype && (!pci->subtype || strcasecmp(subtype, pci->subtype))) { + errno = ENOENT; + return NULL; + } + if (nameprefix && (!pci->name || hwloc_strncasecmp(nameprefix, pci->name, strlen(nameprefix)))) { + errno = ENOENT; + return NULL; + } + return pci; + + } else { + /* find a matching osdev child */ + assert(type == HWLOC_OBJ_OS_DEVICE); + /* FIXME: won't work if we ever store osdevs in osdevs */ + hwloc_obj_t child; + for(child = pci->io_first_child; child; child = child->next_sibling) { + if (child->type != HWLOC_OBJ_OS_DEVICE) + /* FIXME: should never occur currently */ + continue; + if (subtype && (!child->subtype || strcasecmp(subtype, child->subtype))) + continue; + if (nameprefix && (!child->name || hwloc_strncasecmp(nameprefix, child->name, strlen(nameprefix)))) + continue; + return child; + } + } + errno = ENOENT; + return NULL; + + } else { + /* nothing for Misc */ + errno = EINVAL; + return NULL; + } +}