Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package openucx for openSUSE:Factory checked in at 2023-07-26 13:22:10 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/openucx (Old) and /work/SRC/openSUSE:Factory/.openucx.new.15225 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "openucx" Wed Jul 26 13:22:10 2023 rev:27 rq:1100646 version:1.14.1 Changes: -------- --- /work/SRC/openSUSE:Factory/openucx/openucx.changes 2023-04-01 23:26:52.987300557 +0200 +++ /work/SRC/openSUSE:Factory/.openucx.new.15225/openucx.changes 2023-07-26 13:22:25.935479120 +0200 @@ -1,0 +2,16 @@ +Tue Jul 25 13:08:44 UTC 2023 - Nicolas Morey <nicolas.mo...@suse.com> + +- Update to v1.14.1 + - Fixed ROCm to prevent the locking of host pinned memory + - Added CUDA 12 based UCX builds to the release flow + - Increased the maximal number of endpoint configurations + - Fixed filter for a slow-lanes in selection logic + - Fixed TCP transport bandwidth calculation + - Fixed device detection for ROCM + - Fixed compatibility with CUDA 12 + - Fixed rendezvous threshold for multi-path configurations + - Fixed error message in case of static link + - Fixed BlueField-3 detection + - Multiple fixes for Azure CI pipeline + +------------------------------------------------------------------- Old: ---- ucx-1.14.0.tar.gz New: ---- ucx-1.14.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ openucx.spec ++++++ --- /var/tmp/diff_new_pack.NaLvZ5/_old 2023-07-26 13:22:26.807484056 +0200 +++ /var/tmp/diff_new_pack.NaLvZ5/_new 2023-07-26 13:22:26.811484079 +0200 @@ -20,7 +20,7 @@ %define version_suf %{nil} Name: openucx -Version: 1.14.0 +Version: 1.14.1 Release: 0 Summary: Communication layer for Message Passing (MPI) License: BSD-3-Clause ++++++ ucx-1.14.0.tar.gz -> ucx-1.14.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/NEWS new/ucx-1.14.1/NEWS --- old/ucx-1.14.0/NEWS 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/NEWS 2023-05-22 10:48:57.000000000 +0200 @@ -11,6 +11,20 @@ ### Features: ### Bugfixes: +## 1.14.1 (May 22, 2023) +### Bugfixes: +* Fixed ROCm to prevent the locking of host pinned memory +* Added CUDA 12 based UCX builds to the release flow +* Increased the maximal number of endpoint configurations +* Fixed filter for a slow-lanes in selection logic +* Fixed TCP transport bandwidth calculation +* Fixed device detection for ROCM +* Fixed compatibility with CUDA 12 +* Fixed rendezvous threshold for multi-path configurations +* Fixed error message in case of static link +* Fixed BlueField-3 detection +* Multiple fixes for Azure CI pipeline + ## 1.14.0 (March 13, 2023) ### Features: #### Core diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/bindings/java/src/test/java/org/openucx/jucx/UcpEndpointTest.java new/ucx-1.14.1/bindings/java/src/test/java/org/openucx/jucx/UcpEndpointTest.java --- old/ucx-1.14.0/bindings/java/src/test/java/org/openucx/jucx/UcpEndpointTest.java 2023-01-26 22:33:58.000000000 +0100 +++ new/ucx-1.14.1/bindings/java/src/test/java/org/openucx/jucx/UcpEndpointTest.java 2023-04-12 16:42:15.000000000 +0200 @@ -735,7 +735,8 @@ sendData.getMemory().getAddress(), 2L, UcpConstants.UCP_AM_FLAG_PERSISTENT_DATA, null, new UcpRequestParams().setMemoryType(memType).setMemoryHandle(sendData.getMemory())); - while (!Arrays.stream(requests).allMatch(r -> (r != null) && r.isCompleted())) { + while (!Arrays.stream(requests).allMatch(r -> (r != null) && r.isCompleted()) || + (persistantAmData.get() == null)) { worker1.progress(); worker2.progress(); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/cmake/ucx-config-version.cmake new/ucx-1.14.1/cmake/ucx-config-version.cmake --- old/ucx-1.14.0/cmake/ucx-config-version.cmake 2023-03-13 21:24:15.000000000 +0100 +++ new/ucx-1.14.1/cmake/ucx-config-version.cmake 2023-05-22 10:53:08.000000000 +0200 @@ -10,7 +10,7 @@ # the requested version string are exactly the same and it sets # PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version. -set(PACKAGE_VERSION 1.14.0) +set(PACKAGE_VERSION 1.14.1) if (PACKAGE_FIND_VERSION_RANGE) # Package version must be in the requested version range diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/configure new/ucx-1.14.1/configure --- old/ucx-1.14.0/configure 2023-03-13 21:23:46.000000000 +0100 +++ new/ucx-1.14.1/configure 2023-05-22 10:52:42.000000000 +0200 @@ -4291,7 +4291,7 @@ if test x"${GITBIN}" = x"yes"; then : # remove preceding "refs/heads/" (11 characters) for symbolic ref - SCM_VERSION=ae505b9 + SCM_VERSION=04897a0 else @@ -5250,7 +5250,7 @@ MAJOR_VERSION=1 MINOR_VERSION=14 -PATCH_VERSION=0 +PATCH_VERSION=1 EXTRA_VERSION= VERSION=$MAJOR_VERSION.$MINOR_VERSION.$PATCH_VERSION$EXTRA_VERSION SOVERSION=0:0:0 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/configure.ac new/ucx-1.14.1/configure.ac --- old/ucx-1.14.0/configure.ac 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/configure.ac 2023-05-22 10:48:57.000000000 +0200 @@ -10,7 +10,7 @@ define([ucx_ver_major], 1) # Major version. Usually does not change. define([ucx_ver_minor], 14) # Minor version. Increased for each release. -define([ucx_ver_patch], 0) # Patch version. Increased for a bugfix release. +define([ucx_ver_patch], 1) # Patch version. Increased for a bugfix release. define([ucx_ver_extra], ) # Extra version string. Empty for a general release. define([ts], esyscmd([sh -c "date +%Y%m%d%H%M%S"])) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/debian/changelog new/ucx-1.14.1/debian/changelog --- old/ucx-1.14.0/debian/changelog 2023-03-13 21:24:14.000000000 +0100 +++ new/ucx-1.14.1/debian/changelog 2023-05-22 10:53:08.000000000 +0200 @@ -1,4 +1,4 @@ -ucx (1.14.ae505b9) unstable; urgency=low +ucx (1.14.04897a0) unstable; urgency=low * Initial release (Closes: #nnnn) <nnnn is the bug number of your ITP> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/ucm/util/sys.c new/ucx-1.14.1/src/ucm/util/sys.c --- old/ucx-1.14.0/src/ucm/util/sys.c 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/ucm/util/sys.c 2023-05-22 10:48:57.000000000 +0200 @@ -322,7 +322,7 @@ (void)dlerror(); dl = dlopen(info.dli_fname, flags); if (dl == NULL) { - ucm_warn("failed to load '%s': %s", info.dli_fname, dlerror()); + ucm_diag("failed to load '%s': %s", info.dli_fname, dlerror()); continue; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/ucp/core/ucp_ep.c new/ucx-1.14.1/src/ucp/core/ucp_ep.c --- old/ucx-1.14.0/src/ucp/core/ucp_ep.c 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/ucp/core/ucp_ep.c 2023-05-22 10:48:57.000000000 +0200 @@ -1864,8 +1864,9 @@ ucp_ep_thresh_params_t *params, int eager) { - ucp_context_h context = worker->context; - ucp_md_map_t md_map = 0; + ucp_context_h context = worker->context; + uint8_t num_paths[UCP_MAX_RESOURCES] = {}; + ucp_md_map_t md_map = 0; ucp_lane_index_t lane; ucp_rsc_index_t rsc_index; ucp_md_index_t md_index; @@ -1880,6 +1881,11 @@ memset(params, 0, sizeof(*params)); for (i = 0; (i < UCP_MAX_LANES) && (lanes[i] != UCP_NULL_LANE); i++) { + rsc_index = config->key.lanes[lanes[i]].rsc_index; + ++num_paths[rsc_index]; + } + + for (i = 0; (i < UCP_MAX_LANES) && (lanes[i] != UCP_NULL_LANE); i++) { lane = lanes[i]; rsc_index = config->key.lanes[lane].rsc_index; if (rsc_index == UCP_NULL_RESOURCE) { @@ -1901,7 +1907,9 @@ } } - bw = ucp_tl_iface_bandwidth(context, &iface_attr->bandwidth); + bw = ucp_tl_iface_bandwidth(context, &iface_attr->bandwidth) / + num_paths[rsc_index]; + if (eager && (iface_attr->cap.am.max_bcopy > 0)) { /* Eager protocol has overhead for each fragment */ perf_attr.field_mask = UCT_PERF_ATTR_FIELD_OPERATION | diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/ucp/core/ucp_ep.inl new/ucx-1.14.1/src/ucp/core/ucp_ep.inl --- old/ucx-1.14.0/src/ucp/core/ucp_ep.inl 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/ucp/core/ucp_ep.inl 2023-04-03 16:15:14.000000000 +0200 @@ -20,7 +20,7 @@ static inline ucp_ep_config_t *ucp_ep_config(ucp_ep_h ep) { ucs_assert(ep->cfg_index != UCP_WORKER_CFG_INDEX_NULL); - return &ep->worker->ep_config[ep->cfg_index]; + return &ucs_array_elem(&ep->worker->ep_config, ep->cfg_index); } static UCS_F_ALWAYS_INLINE uct_ep_h ucp_ep_get_fast_lane(ucp_ep_h ep, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/ucp/core/ucp_rkey.inl new/ucx-1.14.1/src/ucp/core/ucp_rkey.inl --- old/ucx-1.14.0/src/ucp/core/ucp_rkey.inl 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/ucp/core/ucp_rkey.inl 2023-05-22 10:48:57.000000000 +0200 @@ -53,7 +53,8 @@ ucp_ep_rkey_unpack_reachable(ucp_ep_h ep, const void *buffer, size_t length, ucp_rkey_h *rkey_p) { - ucp_ep_config_t *config = &ep->worker->ep_config[ep->cfg_index]; + ucp_ep_config_t *config = &ucs_array_elem(&ep->worker->ep_config, + ep->cfg_index); return ucp_ep_rkey_unpack_internal(ep, buffer, length, config->key.reachable_md_map, rkey_p); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/ucp/core/ucp_types.h new/ucx-1.14.1/src/ucp/core/ucp_types.h --- old/ucx-1.14.0/src/ucp/core/ucp_types.h 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/ucp/core/ucp_types.h 2023-05-22 10:48:57.000000000 +0200 @@ -56,7 +56,7 @@ /* Worker configuration index for endpoint and rkey */ typedef uint8_t ucp_worker_cfg_index_t; -#define UCP_WORKER_MAX_EP_CONFIG 64 +#define UCP_WORKER_MAX_EP_CONFIG UINT8_MAX #define UCP_WORKER_MAX_RKEY_CONFIG 128 #define UCP_WORKER_CFG_INDEX_NULL UINT8_MAX diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/ucp/core/ucp_version.c new/ucx-1.14.1/src/ucp/core/ucp_version.c --- old/ucx-1.14.0/src/ucp/core/ucp_version.c 2023-03-13 21:24:14.000000000 +0100 +++ new/ucx-1.14.1/src/ucp/core/ucp_version.c 2023-05-22 10:53:08.000000000 +0200 @@ -9,10 +9,10 @@ { *major_version = 1; *minor_version = 14; - *release_number = 0; + *release_number = 1; } const char *ucp_get_version_string() { - return "1.14.0"; + return "1.14.1"; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/ucp/core/ucp_worker.c new/ucx-1.14.1/src/ucp/core/ucp_worker.c --- old/ucx-1.14.0/src/ucp/core/ucp_worker.c 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/ucp/core/ucp_worker.c 2023-05-22 10:48:57.000000000 +0200 @@ -1975,28 +1975,31 @@ "empty endpoint configurations are not allowed"); /* Search for the given key in the ep_config array */ - for (ep_cfg_index = 0; ep_cfg_index < worker->ep_config_count; - ++ep_cfg_index) { - if (ucp_ep_config_is_equal(&worker->ep_config[ep_cfg_index].key, key)) { + ucs_array_for_each(ep_config, &worker->ep_config) { + if (ucp_ep_config_is_equal(&ep_config->key, key)) { + ep_cfg_index = ep_config - worker->ep_config.buffer; goto out; } } - if (worker->ep_config_count >= UCP_WORKER_MAX_EP_CONFIG) { + /* Create new configuration */ + ucs_array_append(ep_config_arr, &worker->ep_config, + return UCS_ERR_NO_MEMORY); + if (ucs_array_length(&worker->ep_config) >= UCP_WORKER_MAX_EP_CONFIG) { + ucs_array_pop_back(&worker->ep_config); ucs_error("too many ep configurations: %d (max: %d)", - worker->ep_config_count, UCP_WORKER_MAX_EP_CONFIG); + ucs_array_length(&worker->ep_config), + UCP_WORKER_MAX_EP_CONFIG); return UCS_ERR_EXCEEDS_LIMIT; } - /* Create new configuration */ - ep_cfg_index = worker->ep_config_count; - ep_config = &worker->ep_config[ep_cfg_index]; - status = ucp_ep_config_init(worker, ep_config, key); + ep_config = ucs_array_last(&worker->ep_config); + status = ucp_ep_config_init(worker, ep_config, key); if (status != UCS_OK) { return status; } - ++worker->ep_config_count; + ep_cfg_index = ucs_array_length(&worker->ep_config) - 1; if (ep_init_flags & UCP_EP_INIT_FLAG_INTERNAL) { /* Do not initialize short protocol thresholds for internal endpoints, @@ -2040,7 +2043,8 @@ const ucs_sys_dev_distance_t *lanes_distance, ucp_worker_cfg_index_t *cfg_index_p) { - const ucp_ep_config_t *ep_config = &worker->ep_config[key->ep_cfg_index]; + const ucp_ep_config_t *ep_config = &ucs_array_elem(&worker->ep_config, + key->ep_cfg_index); ucp_worker_cfg_index_t rkey_cfg_index; ucp_rkey_config_t *rkey_config; ucp_lane_index_t lane; @@ -2131,15 +2135,17 @@ static void ucp_worker_destroy_configs(ucp_worker_h worker) { - unsigned i; + ucp_ep_config_t *ep_config; + ucp_rkey_config_t *rkey_config; - for (i = 0; i < worker->ep_config_count; ++i) { - ucp_ep_config_cleanup(worker, &worker->ep_config[i]); + ucs_array_for_each(ep_config, &worker->ep_config) { + ucp_ep_config_cleanup(worker, ep_config); } - worker->ep_config_count = 0; + ucs_array_cleanup_dynamic(&worker->ep_config); - for (i = 0; i < worker->rkey_config_count; ++i) { - ucp_proto_select_cleanup(&worker->rkey_config[i].proto_select); + ucs_carray_for_each(rkey_config, worker->rkey_config, + worker->rkey_config_count) { + ucp_proto_select_cleanup(&rkey_config->proto_select); } worker->rkey_config_count = 0; } @@ -2250,7 +2256,6 @@ worker->flush_ops_count = 0; worker->inprogress = 0; worker->rkey_config_count = 0; - worker->ep_config_count = 0; worker->num_active_ifaces = 0; worker->num_ifaces = 0; worker->am_message_id = ucs_generate_uuid(0); @@ -2333,6 +2338,8 @@ goto err_destroy_ep_map; } + ucs_array_init_dynamic(&worker->ep_config); + /* Create statistics */ status = UCS_STATS_NODE_ALLOC(&worker->stats, &ucp_worker_stats_class, ucs_stats_get_root(), "-%p", worker); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/ucp/core/ucp_worker.h new/ucx-1.14.1/src/ucp/core/ucp_worker.h --- old/ucx-1.14.0/src/ucp/core/ucp_worker.h 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/ucp/core/ucp_worker.h 2023-05-22 10:48:57.000000000 +0200 @@ -23,6 +23,8 @@ #include <ucs/datastruct/ptr_map.h> #include <ucs/arch/bitops.h> +#include <ucs/datastruct/array.inl> + /* The size of the private buffer in UCT descriptor headroom, which UCP may * use for its own needs. This size does not include ucp_recv_desc_t length, @@ -221,6 +223,9 @@ KHASH_TYPE(ucp_worker_mpool_hash, ucp_worker_mpool_key_t, ucs_mpool_t); typedef khash_t(ucp_worker_mpool_hash) ucp_worker_mpool_hash_t; +/* EP configurations storage */ +UCS_ARRAY_DECLARE_TYPE(ep_config_arr, unsigned, ucp_ep_config_t); + /** * UCP worker iface, which encapsulates UCT iface, its attributes and * some auxiliary info needed for tag matching offloads. @@ -324,8 +329,7 @@ UCS_PTR_MAP_T(request) request_map; /* UCP requests key to ptr mapping */ - unsigned ep_config_count; /* Current number of ep configurations */ - ucp_ep_config_t ep_config[UCP_WORKER_MAX_EP_CONFIG]; + ucs_array_t(ep_config_arr) ep_config; /* EP configurations storage */ unsigned rkey_config_count; /* Current number of rkey configurations */ ucp_rkey_config_t rkey_config[UCP_WORKER_MAX_RKEY_CONFIG]; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/ucp/core/ucp_worker.inl new/ucx-1.14.1/src/ucp/core/ucp_worker.inl --- old/ucx-1.14.0/src/ucp/core/ucp_worker.inl 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/ucp/core/ucp_worker.inl 2023-05-22 10:48:57.000000000 +0200 @@ -22,6 +22,10 @@ ucp_worker_cfg_index_t, 1, ucp_rkey_config_hash_func, ucp_rkey_config_is_equal); +/* EP configurations storage */ +UCS_ARRAY_IMPL(ep_config_arr, unsigned, ucp_ep_config_t, + static UCS_F_ALWAYS_INLINE); + /** * Resolve remote key configuration key to a remote key configuration index. * diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/ucp/proto/proto_select.c new/ucx-1.14.1/src/ucp/proto/proto_select.c --- old/ucx-1.14.0/src/ucp/proto/proto_select.c 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/ucp/proto/proto_select.c 2023-05-22 10:48:57.000000000 +0200 @@ -216,7 +216,8 @@ init_params.select_param = select_param; init_params.ep_cfg_index = ep_cfg_index; init_params.rkey_cfg_index = rkey_cfg_index; - init_params.ep_config_key = &worker->ep_config[ep_cfg_index].key; + init_params.ep_config_key = &ucs_array_elem(&worker->ep_config, + ep_cfg_index).key; if (rkey_cfg_index == UCP_WORKER_CFG_INDEX_NULL) { init_params.rkey_config_key = NULL; @@ -818,7 +819,7 @@ if (rkey_cfg_index == UCP_WORKER_CFG_INDEX_NULL) { *new_rkey_cfg_index = UCP_WORKER_CFG_INDEX_NULL; - return &worker->ep_config[ep_cfg_index].proto_select; + return &ucs_array_elem(&worker->ep_config, ep_cfg_index).proto_select; } else { rkey_config_key = worker->rkey_config[rkey_cfg_index].key; @@ -844,7 +845,8 @@ .priv = proto_config->priv, .worker = worker, .select_param = &proto_config->select_param, - .ep_config_key = &worker->ep_config[proto_config->ep_cfg_index].key, + .ep_config_key = &ucs_array_elem(&worker->ep_config, + proto_config->ep_cfg_index).key, .msg_length = msg_length }; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/ucp/rndv/proto_rndv.c new/ucx-1.14.1/src/ucp/rndv/proto_rndv.c --- old/ucx-1.14.0/src/ucp/rndv/proto_rndv.c 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/ucp/rndv/proto_rndv.c 2023-05-22 10:48:57.000000000 +0200 @@ -98,7 +98,8 @@ const ucp_ep_config_t *ep_config; uint64_t remote_md_map; - ep_config = &worker->ep_config[params->super.super.ep_cfg_index]; + ep_config = &ucs_array_elem(&worker->ep_config, + params->super.super.ep_cfg_index); remote_md_map = 0; ucs_carray_for_each(lane_cfg, ep_config->key.lanes, @@ -127,7 +128,8 @@ { ucp_worker_h worker = params->super.super.worker; ucp_worker_cfg_index_t ep_cfg_index = params->super.super.ep_cfg_index; - const ucp_ep_config_t *ep_config = &worker->ep_config[ep_cfg_index]; + const ucp_ep_config_t *ep_config = &ucs_array_elem(&worker->ep_config, + ep_cfg_index); ucs_sys_dev_distance_t lanes_distance[UCP_MAX_LANES]; const ucp_proto_select_elem_t *select_elem; ucp_rkey_config_key_t rkey_config_key; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/ucp/wireup/select.c new/ucx-1.14.1/src/ucp/wireup/select.c --- old/ucx-1.14.0/src/ucp/wireup/select.c 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/ucp/wireup/select.c 2023-05-22 10:48:57.000000000 +0200 @@ -14,6 +14,7 @@ #include "address.h" #include <ucs/algorithm/qsort_r.h> +#include <ucs/datastruct/array.inl> #include <ucs/datastruct/queue.h> #include <ucs/sys/sock.h> #include <ucp/core/ucp_ep.inl> @@ -98,6 +99,8 @@ ucp_tl_bitmap_t tl_bitmap; /* TL bitmap of selected resources */ } ucp_wireup_select_context_t; +UCS_ARRAY_DEFINE_INLINE(select_info, unsigned, ucp_wireup_select_info_t); + static const char *ucp_wireup_cmpt_flags[] = { [ucs_ilog2(UCT_COMPONENT_FLAG_RKEY_PTR)] = "obtain remote memory pointer", }; @@ -1392,6 +1395,70 @@ return size / t * 1e-5; } +static double ucp_wireup_get_lane_bw(ucp_worker_h worker, + const ucp_wireup_select_info_t *sinfo, + const ucp_address_entry_t *address) +{ + ucp_context_h context = worker->context; + const uct_iface_attr_t *iface_attr; + double bw_local, bw_remote; + + iface_attr = ucp_worker_iface_get_attr(worker, sinfo->rsc_index); + bw_local = ucp_tl_iface_bandwidth(context, &iface_attr->bandwidth); + bw_remote = address[sinfo->addr_index].iface_attr.bandwidth; + + return ucs_min(bw_local, bw_remote); +} + +static unsigned +ucp_wireup_add_fast_lanes(ucp_worker_h worker, + const ucp_wireup_select_params_t *select_params, + const ucs_array_t(select_info) *sinfo_array, + ucp_lane_type_t lane_type, + ucp_wireup_select_context_t *select_ctx) +{ + ucp_lane_index_t num_lanes = 0; + double max_bw = 0; + ucp_context_h context = worker->context; + const double max_ratio = 1. / context->config.ext.multi_lane_max_ratio; + const ucp_address_entry_t *address_list; + ucs_status_t status; + double lane_bw; + const ucp_wireup_select_info_t *sinfo; + + address_list = select_params->address->address_list; + + /* Iterate over all elements and calculate max BW */ + ucs_array_for_each(sinfo, sinfo_array) { + lane_bw = ucp_wireup_get_lane_bw(worker, sinfo, address_list); + max_bw = ucs_max(lane_bw, max_bw); + } + + /* Compare each element to max BW and filter only fast lanes */ + ucs_array_for_each(sinfo, sinfo_array) { + lane_bw = ucp_wireup_get_lane_bw(worker, sinfo, address_list); + + if (lane_bw < (max_bw * max_ratio)) { + ucs_trace(UCT_TL_RESOURCE_DESC_FMT + " : bandwidth %.2f lower than %.2f x %.2f, dropping lane", + UCT_TL_RESOURCE_DESC_ARG( + &context->tl_rscs[sinfo->rsc_index].tl_rsc), + lane_bw, max_ratio, max_bw); + continue; + } + + status = ucp_wireup_add_lane(select_params, sinfo, lane_type, + num_lanes == 0, select_ctx); + if (status != UCS_OK) { + break; + } + + num_lanes++; + } + + return num_lanes; +} + static unsigned ucp_wireup_add_bw_lanes(const ucp_wireup_select_params_t *select_params, ucp_wireup_select_bw_info_t *bw_info, @@ -1400,21 +1467,19 @@ { ucp_ep_h ep = select_params->ep; ucp_context_h context = ep->worker->context; - ucp_wireup_select_info_t sinfo = {0}; ucp_wireup_dev_usage_count dev_count = {}; + UCS_ARRAY_DEFINE_ONSTACK(sinfo_array, select_info, UCP_MAX_LANES); const uct_iface_attr_t *iface_attr; const ucp_address_entry_t *ae; ucs_status_t status; - unsigned num_lanes; uint64_t local_dev_bitmap; uint64_t remote_dev_bitmap; ucp_rsc_index_t dev_index; ucp_md_map_t md_map; ucp_rsc_index_t rsc_index; unsigned addr_index; - int show_error; + ucp_wireup_select_info_t *sinfo; - num_lanes = 0; md_map = bw_info->md_map; local_dev_bitmap = bw_info->local_dev_bitmap; remote_dev_bitmap = bw_info->remote_dev_bitmap; @@ -1423,30 +1488,23 @@ /* lookup for requested number of lanes or limit of MD map * (we have to limit MD's number to avoid malloc in * memory registration) */ - while ((num_lanes < bw_info->max_lanes) && + while ((ucs_array_length(&sinfo_array) < bw_info->max_lanes) && (ucs_popcount(md_map) < UCP_MAX_OP_MDS)) { if (excl_lane == UCP_NULL_LANE) { + sinfo = ucs_array_append_fixed(select_info, &sinfo_array); status = ucp_wireup_select_transport(select_ctx, select_params, &bw_info->criteria, tl_bitmap, UINT64_MAX, local_dev_bitmap, - remote_dev_bitmap, 0, &sinfo); - if (status != UCS_OK) { - break; - } - - rsc_index = sinfo.rsc_index; - addr_index = sinfo.addr_index; - dev_index = context->tl_rscs[rsc_index].dev_index; - sinfo.path_index = dev_count.local[dev_index]; - show_error = (num_lanes == 0); - status = ucp_wireup_add_lane(select_params, &sinfo, - bw_info->criteria.lane_type, - show_error, select_ctx); + remote_dev_bitmap, 0, sinfo); if (status != UCS_OK) { + ucs_array_pop_back(&sinfo_array); break; } - num_lanes++; + rsc_index = sinfo->rsc_index; + addr_index = sinfo->addr_index; + dev_index = context->tl_rscs[rsc_index].dev_index; + sinfo->path_index = dev_count.local[dev_index]; } else { /* disqualify/count lane_desc_idx */ addr_index = select_ctx->lane_descs[excl_lane].addr_index; @@ -1476,7 +1534,8 @@ bw_info->criteria.arg = NULL; /* To suppress compiler warning */ - return num_lanes; + return ucp_wireup_add_fast_lanes(ep->worker, select_params, &sinfo_array, + bw_info->criteria.lane_type, select_ctx); } static ucs_status_t diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/uct/api/version.h new/ucx-1.14.1/src/uct/api/version.h --- old/ucx-1.14.0/src/uct/api/version.h 2023-03-13 21:24:14.000000000 +0100 +++ new/ucx-1.14.1/src/uct/api/version.h 2023-05-22 10:53:08.000000000 +0200 @@ -10,10 +10,10 @@ #define UCT_VERNO_MAJOR 1 #define UCT_VERNO_MINOR 14 -#define UCT_VERNO_PATCH 0 +#define UCT_VERNO_PATCH 1 #define UCT_VERNO_EXTRA "" -#define UCT_VERNO_STRING "1.14.0" -#define UCT_SCM_VERSION "ae505b9" +#define UCT_VERNO_STRING "1.14.1" +#define UCT_SCM_VERSION "04897a0" #define UCT_SCM_BRANCH "" #define UCT_MINOR_BIT (16UL) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/uct/cuda/base/cuda_md.c new/ucx-1.14.1/src/uct/cuda/base/cuda_md.c --- old/ucx-1.14.0/src/uct/cuda/base/cuda_md.c 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/uct/cuda/base/cuda_md.c 2023-05-22 10:48:57.000000000 +0200 @@ -231,6 +231,17 @@ /* Get fxn ptr for cuMemGetHandleForAddressRange in case installed libcuda * does not have the definition for it even though 11.7 header includes the * declaration and avoid link error */ +#if CUDA_VERSION >= 12000 + CUdriverProcAddressQueryResult proc_addr_res; + cu_err = cuGetProcAddress("cuMemGetHandleForAddressRange", + (void**)&get_handle_func, 12000, + CU_GET_PROC_ADDRESS_DEFAULT, &proc_addr_res); + if ((cu_err != CUDA_SUCCESS) || + (proc_addr_res != CU_GET_PROC_ADDRESS_SUCCESS)) { + ucs_debug("cuMemGetHandleForAddressRange not found"); + return UCT_DMABUF_FD_INVALID; + } +#else cu_err = cuGetProcAddress("cuMemGetHandleForAddressRange", (void**)&get_handle_func, 11070, CU_GET_PROC_ADDRESS_DEFAULT); @@ -238,6 +249,7 @@ ucs_debug("cuMemGetHandleForAddressRange not found"); return UCT_DMABUF_FD_INVALID; } +#endif cu_err = get_handle_func((void*)&fd, (uintptr_t)addr_mem_info->base_address, addr_mem_info->alloc_length, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/uct/ib/base/ib_device.c new/ucx-1.14.1/src/uct/ib/base/ib_device.c --- old/ucx-1.14.0/src/uct/ib/base/ib_device.c 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/uct/ib/base/ib_device.c 2023-05-22 10:48:58.000000000 +0200 @@ -145,6 +145,9 @@ {"BlueField 2", {0x15b3, 0xa2d6}, UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | UCT_IB_DEVICE_FLAG_DC_V2, 61}, + {"BlueField 3", {0x15b3, 0xa2dc}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | + UCT_IB_DEVICE_FLAG_DC_V2, 61}, {"Generic HCA", {0, 0}, 0, 0}, {NULL} }; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/uct/rocm/base/rocm_base.c new/ucx-1.14.1/src/uct/rocm/base/rocm_base.c --- old/ucx-1.14.0/src/uct/rocm/base/rocm_base.c 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/uct/rocm/base/rocm_base.c 2023-05-22 10:48:58.000000000 +0200 @@ -175,8 +175,12 @@ *base_size = info.sizeInBytes; } if (dev_type != NULL) { - status = hsa_agent_get_info(info.agentOwner, HSA_AGENT_INFO_DEVICE, - dev_type); + if (info.type == HSA_EXT_POINTER_TYPE_UNKNOWN) { + *dev_type = HSA_DEVICE_TYPE_CPU; + } else { + status = hsa_agent_get_info(info.agentOwner, HSA_AGENT_INFO_DEVICE, + dev_type); + } } return status; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/uct/rocm/copy/rocm_copy_ep.c new/ucx-1.14.1/src/uct/rocm/copy/rocm_copy_ep.c --- old/ucx-1.14.0/src/uct/rocm/copy/rocm_copy_ep.c 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/uct/rocm/copy/rocm_copy_ep.c 2023-05-22 10:48:58.000000000 +0200 @@ -119,11 +119,14 @@ return UCS_ERR_IO_ERROR; } - if ((remote_addr_mem_type == HSA_EXT_POINTER_TYPE_HSA) && - (dev_type == HSA_DEVICE_TYPE_GPU)) { - /* UCS_MEMORY_TYPE_ROCM */ + if (remote_addr_mem_type == HSA_EXT_POINTER_TYPE_HSA) { remote_addr_mod = (void*)remote_addr; - agent = tmp_agent; + if (dev_type == HSA_DEVICE_TYPE_GPU) { + /* UCS_MEMORY_TYPE_ROCM */ + agent = tmp_agent; + } else { + remote_addr_is_host = 1; + } } else if ((remote_addr_mem_type == HSA_EXT_POINTER_TYPE_LOCKED) && (size == dev_size)) { /* locked host memory, e.g. hipHostRegister, OR previously registered */ @@ -152,11 +155,14 @@ return UCS_ERR_IO_ERROR; } - if ((iov_buffer_mem_type == HSA_EXT_POINTER_TYPE_HSA) && - (dev_type == HSA_DEVICE_TYPE_GPU)) { - /* UCS_MEMORY_TYPE_ROCM */ + if (iov_buffer_mem_type == HSA_EXT_POINTER_TYPE_HSA) { iov_buffer_mod = iov->buffer; - agent = tmp_agent; + if (dev_type == HSA_DEVICE_TYPE_GPU) { + /* UCS_MEMORY_TYPE_ROCM */ + agent = tmp_agent; + } else { + iov_buffer_is_host = 1; + } } else if ((iov_buffer_mem_type == HSA_EXT_POINTER_TYPE_LOCKED) && (size == dev_size)) { /* locked host memory (e.g. hipHostRegister) OR previously registered */ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/uct/tcp/tcp.h new/ucx-1.14.1/src/uct/tcp/tcp.h --- old/ucx-1.14.0/src/uct/tcp/tcp.h 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/uct/tcp/tcp.h 2023-04-03 16:15:14.000000000 +0200 @@ -407,6 +407,7 @@ unsigned syn_cnt; /* Number of SYN retransmits that TCP should send * before aborting the attempt to connect. * It cannot exceed 255. */ + double max_bw; /* Upper bound to TCP iface bandwidth */ struct { ucs_time_t idle; /* The time the connection needs to remain * idle before TCP starts sending keepalive @@ -447,6 +448,7 @@ uct_iface_mpool_config_t tx_mpool; uct_iface_mpool_config_t rx_mpool; ucs_range_spec_t port_range; + double max_bw; struct { ucs_time_t idle; unsigned long cnt; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/src/uct/tcp/tcp_iface.c new/ucx-1.14.1/src/uct/tcp/tcp_iface.c --- old/ucx-1.14.0/src/uct/tcp/tcp_iface.c 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/src/uct/tcp/tcp_iface.c 2023-05-22 10:48:58.000000000 +0200 @@ -17,6 +17,7 @@ #include <sys/poll.h> #include <netinet/tcp.h> #include <dirent.h> +#include <float.h> #define UCT_TCP_IFACE_NETDEV_DIR "/sys/class/net" @@ -86,6 +87,10 @@ "let the operating system select the port number.", ucs_offsetof(uct_tcp_iface_config_t, port_range), UCS_CONFIG_TYPE_RANGE_SPEC}, + {"MAX_BW", "2200MBs", + "Upper bound to TCP iface bandwidth. 'auto' means BW is unlimited.", + ucs_offsetof(uct_tcp_iface_config_t, max_bw), UCS_CONFIG_TYPE_BW}, + #ifdef UCT_TCP_EP_KEEPALIVE {"KEEPIDLE", UCS_PP_MAKE_STRING(UCT_TCP_EP_DEFAULT_KEEPALIVE_IDLE) "s", "The time the connection needs to remain idle before TCP starts sending " @@ -221,7 +226,7 @@ ucs_status_t status; int is_default; char sysfs_path[PATH_MAX]; - double pci_bw, network_bw; + double pci_bw, network_bw, calculated_bw; uct_base_iface_query(&iface->super, attr); @@ -233,7 +238,10 @@ ucs_snprintf_safe(sysfs_path, PATH_MAX, "%s/%s/device", UCT_TCP_IFACE_NETDEV_DIR, iface->if_name); pci_bw = ucs_topo_get_pci_bw(iface->if_name, sysfs_path); - attr->bandwidth.shared = ucs_min(pci_bw, network_bw); + calculated_bw = ucs_min(pci_bw, network_bw); + + /* Bandwidth is bounded by TCP stack computation time */ + attr->bandwidth.shared = ucs_min(calculated_bw, iface->config.max_bw); attr->ep_addr_len = sizeof(uct_tcp_ep_addr_t); attr->iface_addr_len = sizeof(uct_tcp_iface_addr_t); @@ -664,6 +672,10 @@ ucs_time_from_sec(UCT_TCP_EP_DEFAULT_KEEPALIVE_IDLE); } + self->config.max_bw = UCS_CONFIG_DBL_IS_AUTO(config->max_bw) ? + DBL_MAX : + config->max_bw; + if (self->config.tx_seg_size > self->config.rx_seg_size) { ucs_error("RX segment size (%zu) must be >= TX segment size (%zu)", self->config.rx_seg_size, self->config.tx_seg_size); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/test/gtest/ucp/test_ucp_proto.cc new/ucx-1.14.1/test/gtest/ucp/test_ucp_proto.cc --- old/ucx-1.14.0/test/gtest/ucp/test_ucp_proto.cc 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/test/gtest/ucp/test_ucp_proto.cc 2023-05-22 10:48:58.000000000 +0200 @@ -128,9 +128,11 @@ ucp_worker_cfg_index_t ep_cfg_index = sender().ep()->cfg_index; ucp_worker_cfg_index_t rkey_cfg_index = UCP_WORKER_CFG_INDEX_NULL; - auto select_elem = ucp_proto_select_lookup( - worker, &worker->ep_config[ep_cfg_index].proto_select, ep_cfg_index, - rkey_cfg_index, &select_param, 0); + auto proto_select = &ucs_array_elem(&worker->ep_config, + ep_cfg_index).proto_select; + auto select_elem = ucp_proto_select_lookup(worker, proto_select, + ep_cfg_index, rkey_cfg_index, + &select_param, 0); EXPECT_NE(nullptr, select_elem); ucp_ep_print_info(sender().ep(), stdout); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/test/gtest/ucp/test_ucp_worker.cc new/ucx-1.14.1/test/gtest/ucp/test_ucp_worker.cc --- old/ucx-1.14.0/test/gtest/ucp/test_ucp_worker.cc 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/test/gtest/ucp/test_ucp_worker.cc 2023-05-22 10:48:58.000000000 +0200 @@ -209,6 +209,14 @@ } flush_req = sender().flush_worker_nb(0); + /* In some cases, there will be nothing to flush, so we need to skip + * the progress loop */ + if ((flush_req == NULL) && + (get_variant_value() & TEST_DISCARD_DISABLED)) { + UCS_TEST_MESSAGE << "all EPs returned UCS_OK in 'flush_worker_nb'"; + goto out; + } + ASSERT_FALSE(flush_req == NULL); ASSERT_TRUE(UCS_PTR_IS_PTR(flush_req)); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ucx-1.14.0/ucx.spec.in new/ucx-1.14.1/ucx.spec.in --- old/ucx-1.14.0/ucx.spec.in 2023-03-13 21:22:52.000000000 +0100 +++ new/ucx-1.14.1/ucx.spec.in 2023-05-22 10:48:58.000000000 +0200 @@ -349,6 +349,8 @@ %endif %changelog +* Tue Mar 14 2023 Yossi Itigin <yos...@mellanox.com> 1.14.1-1 +- Bump version to 1.14.1 * Sat Apr 16 2022 Yossi Itigin <yos...@mellanox.com> 1.14.0-1 - Bump version to 1.14.0 * Wed Nov 10 2021 Yossi Itigin <yos...@mellanox.com> 1.13.0-1