Script 'mail_helper' called by obssrc
Hello community,
here is the log from the commit of package google-guest-configs for
openSUSE:Factory checked in at 2026-01-13 21:34:44
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/google-guest-configs (Old)
and /work/SRC/openSUSE:Factory/.google-guest-configs.new.1928 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "google-guest-configs"
Tue Jan 13 21:34:44 2026 rev:37 rq:1327003 version:20260112.00
Changes:
--------
---
/work/SRC/openSUSE:Factory/google-guest-configs/google-guest-configs.changes
2025-11-11 19:20:44.521978532 +0100
+++
/work/SRC/openSUSE:Factory/.google-guest-configs.new.1928/google-guest-configs.changes
2026-01-13 21:35:09.821481137 +0100
@@ -1,0 +2,18 @@
+Tue Jan 13 08:42:31 UTC 2026 - John Paul Adrian Glaubitz
<[email protected]>
+
+- Update to version 20260112.00
+ * Make c4x a "multinic accelerator platform"
+ * Merge pull request #140 from a-r-n:xps-many-numa
+ * set_multiqueue xps: stop assuming 2 numa nodes
+ * Merge pull request #137 from a-r-n:a4x-pick
+ * Add IDPF irq setting; improve a4x-max performance
+ * Merge pull request #133 from a-r-n:master
+ * Allow test injection of the root directory and metadata server endpoint
+ * add nic naming support for connextx VF in baremetal
+ * bugfix for idpf only rename got skipped.
+ * add a4x-max to google_set_multiqueue is_multinic_accelerator_platform
+ * remove unnecessary link up and down
+ * fix inconsistent NIC index between smart NICs and GPU NICs.
+- Mark %{_modprobedir}/gce-blacklist.conf as %config(noreplace) (bsc#1198323)
+
+-------------------------------------------------------------------
Old:
----
google-guest-configs-20251014.00.tar.gz
New:
----
google-guest-configs-20260112.00.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ google-guest-configs.spec ++++++
--- /var/tmp/diff_new_pack.YPEHA5/_old 2026-01-13 21:35:10.749519467 +0100
+++ /var/tmp/diff_new_pack.YPEHA5/_new 2026-01-13 21:35:10.749519467 +0100
@@ -1,7 +1,7 @@
#
# spec file for package google-guest-configs
#
-# Copyright (c) 2025 SUSE LLC
+# Copyright (c) 2026 SUSE LLC and contributors
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -23,7 +23,7 @@
%define _udevdir %(pkg-config --variable udev_dir udev)
%endif
Name: google-guest-configs
-Version: 20251014.00
+Version: 20260112.00
Release: 0
Summary: Google Cloud Guest Configs
License: Apache-2.0
@@ -87,7 +87,7 @@
%endif
%attr(0755, root, root) %{_bindir}/gce-nic-naming
%dir %{_sysconfdir}/rsyslog.d
-%{_modprobedir}/gce-blacklist.conf
+%config(noreplace) %{_modprobedir}/gce-blacklist.conf
%config %{_sysconfdir}/rsyslog.d/*
%config %{_sysconfdir}/sysconfig/network/scripts/*
%{_sysctldir}/*
++++++ google-guest-configs-20251014.00.tar.gz ->
google-guest-configs-20260112.00.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/guest-configs-20251014.00/src/usr/bin/gce-nic-naming
new/guest-configs-20260112.00/src/usr/bin/gce-nic-naming
--- old/guest-configs-20251014.00/src/usr/bin/gce-nic-naming 2025-10-14
20:26:30.000000000 +0200
+++ new/guest-configs-20260112.00/src/usr/bin/gce-nic-naming 2026-01-10
02:55:06.000000000 +0100
@@ -24,17 +24,26 @@
# Path to the PCI bus devices
declare PCI_BUS_PATH='/sys/bus/pci/devices'
declare SYS_PREPEND_PATH='/sys'
-# 0x15b3:0x101e is the vendor and device ID for Mellanox CX7
# 0x8086:0x145c is the vendor and device ID for Intel IDPF VF
# 0x8086:0x1452 is the vendor and device ID for Intel NIC
-readonly ETHERNET_DEVICES_VENDORS=('15b3:101e' '8086:145c' '8086:1452')
-# 0x10de is the vendor ID for Nvidia
-readonly GPU_DEVICES_VENDORS=('10de' '10de')
+readonly ETHERNET_DEVICES_VENDORS=('8086:145c' '8086:1452')
+# 0x8086:0x145c is the vendor and device ID for Intel IDPF VF for RDMA
+readonly IRDMA_DEVICES_VENDORS=('8086:145c')
+# 0x15b3:0x101e is the vendor and device ID for Mellanox CX7
+# 0x15b3:0x1023 is the vendor and device ID for Mellanox CX8
+readonly GPU_NIC_DEVICES_VENDORS=('15b3:101e' '15b3:1023')
+# 0x10de:0x2330 is the vendor and device ID for Nvidia H100 -> A3 (not using
this script)
+# 0x10de:0x2335 is the vendor and device ID for Nvidia H200 -> A3U
+# 0x10de:0x2901 is the vendor and device ID for Nvidia B200 -> A4
+# 0x10de:0x2941 is the vendor and device ID for Nvidia GB200 -> A4X
+# 0x10de:0x31c2 is the vendor and device ID for Nvidia GB300 -> A4X Metal
+readonly GPU_DEVICES_VENDORS=('10de:2330' '10de:2335' '10de:2901' '10de:2941'
'10de:31c2')
# PCI BUS ID path is in the format of 0000:00:04.0
readonly PCI_ID_REGEX='[0-9a-fA-F]+:[0-9a-fA-F]+:[0-9a-fA-F]+\.[0-9a-fA-F]+'
# Array of devices by with vendor info
declare -a ethernet_devices
+declare -a gpu_nic_devices
declare -a accelerator_devices
# Ratio of Processor to Ethernet devices
@@ -107,16 +116,16 @@
###############################
# Determine the ratio of Processor to Ethernet devices
# Globals:
-# ethernet_devices: Array of ethernet devices
-# accelerator_devices: Array of processor devices
+# gpu_nic_devices: Array of gpu nic devices
+# accelerator_devices: Array of accelerator devices
# ethernet_to_accelerator_ratio: Ratio of Processor to Ethernet devices
# accelerator_to_ethernet_ratio: Ratio of Ethernet to Processor devices
# Arguments:
# None
##############################
function determine_index_ratios() {
- ethernet_to_accelerator_ratio="$(( ${#ethernet_devices[@]} /
${#accelerator_devices[@]} ))"
- accelerator_to_ethernet_ratio="$(( ${#accelerator_devices[@]} /
${#ethernet_devices[@]} ))"
+ ethernet_to_accelerator_ratio="$(( ${#gpu_nic_devices[@]} /
${#accelerator_devices[@]} ))"
+ accelerator_to_ethernet_ratio="$(( ${#accelerator_devices[@]} /
${#gpu_nic_devices[@]} ))"
debug "ethernet_to_accelerator_ratio: ${ethernet_to_accelerator_ratio}"
debug "accelerator_to_ethernet_ratio: ${accelerator_to_ethernet_ratio}"
}
@@ -127,11 +136,13 @@
# Globals:
# ETHERNET_DEVICES_VENDORS: Array of ethernet device vendors
# relevant to intent based naming
+# GPU_NIC_DEVICES_VENDORS: Array of gpu nic device vendors
+# relevant to intent based naming
# GPU_DEVICES_VENDORS: Array of gpu device vendors relevant to intent based
# naming
# ethernet_devices: Array of ethernet devices
+# gpu_nic_devices: Array of gpu nic devices
# accelerator_devices: Array of processor devices
-# ethernet_to_accelerator_ratio: Ratio of Processor to Ethernet devices
# Arguments:
# $1: Name reference to the array of ethernet devices
# $2: Name reference to the array of processor devices
@@ -139,7 +150,9 @@
###############################
function list_devices() {
local -n ethernet_map="$1"
- local -n accelerator_map="$2"
+ local -n gpu_nic_map="$2"
+ local -n accelerator_map="$3"
+ shift
shift
shift
local paths=("$@")
@@ -159,13 +172,25 @@
if [[ " ${ETHERNET_DEVICES_VENDORS[*]} " =~
[[:space:]]${device_id}[[:space:]] ]]; then
ethernet_map[${int_id}]=${device_id}
fi
+ if [[ " ${GPU_NIC_DEVICES_VENDORS[*]} " =~
[[:space:]]${device_id}[[:space:]] ]]; then
+ local physfn_path="${pci_device}/physfn"
+ # Skip CX VF in baremetal; workaround to avoid pci topology change causes
+ # nic name inconsistency.
+ # note: CX-7 VF in VM doesn't have physfn, so it is not impacted.
+ if [[ -d "${physfn_path}" ]]; then
+ continue
+ fi
- if [[ " ${GPU_DEVICES_VENDORS[*]} " =~ [[:space:]]${vendor}[[:space:]] ]];
then
- accelerator_map[${int_id}]=${vendor}
+ gpu_nic_map[${int_id}]=${device_id}
+ fi
+ if [[ " ${GPU_DEVICES_VENDORS[*]} " =~ [[:space:]]${device_id}[[:space:]]
]]; then
+ accelerator_map[${int_id}]=${device_id}
fi
done
debug $(printf "Generated ethernet_devices_map: %s %s" "${!ethernet_map[*]}"
\
"${ethernet_map[*]}")
+ debug $(printf "Generated gpu_nic_devices_map: %s %s" "${!gpu_nic_map[*]}" \
+ "${gpu_nic_map[*]}")
debug $(printf "Generated accelerator_devices_map: %s %s"
"${!accelerator_map[*]}" \
"${accelerator_map[*]}")
}
@@ -176,7 +201,8 @@
# Runs recursively up device tree starting from DEVPATH
# Globals:
# bus_specific_ethernet_device_map: Array of ethernet devices
-# bus_specific_accelerator_device_map: Array of processor devices
+# bus_specific_gpu_nic_device_map: Array of gpu nic devices
+# bus_specific_accelerator_device_map: Array of accelerator devices
# ethernet_to_accelerator_ratio: Ratio of Processor to Ethernet devices
# accelerator_to_ethernet_ratio: Ratio of Ethernet to Processor devices
# Arguments:
@@ -187,8 +213,10 @@
# Clear the arrays
unset bus_specific_ethernet_device_map
+ unset bus_specific_gpu_nic_device_map
unset bus_specific_accelerator_device_map
declare -ga bus_specific_ethernet_device_map
+ declare -ga bus_specific_gpu_nic_device_map
declare -ga bus_specific_accelerator_device_map
if [[ "${path}" == "" ]]; then
@@ -212,13 +240,12 @@
if [[ ${line_count} -gt 1 ]]; then
# build array
# NOTE: Calling in a $() will not modify the array in this instance
- list_devices bus_specific_ethernet_device_map
bus_specific_accelerator_device_map "${output}"
-
- if [[ ${#bus_specific_ethernet_device_map[@]} -ne 0 ]] && [[
${#bus_specific_accelerator_device_map[@]} -ne 0 ]]; then
+ list_devices bus_specific_ethernet_device_map
bus_specific_gpu_nic_device_map bus_specific_accelerator_device_map "${output}"
+ if [[ ${#bus_specific_gpu_nic_device_map[@]} -ne 0 ]] && [[
${#bus_specific_accelerator_device_map[@]} -ne 0 ]]; then
- local -i eth_to_acc_ratio="$(( ${#bus_specific_ethernet_device_map[@]} /
${#bus_specific_accelerator_device_map[@]} ))"
- local -i acc_to_eth_ratio="$((
${#bus_specific_accelerator_device_map[@]} /
${#bus_specific_ethernet_device_map[@]} ))"
+ local -i eth_to_acc_ratio="$(( ${#bus_specific_gpu_nic_device_map[@]} /
${#bus_specific_accelerator_device_map[@]} ))"
+ local -i acc_to_eth_ratio="$((
${#bus_specific_accelerator_device_map[@]} /
${#bus_specific_gpu_nic_device_map[@]} ))"
if [[ ${eth_to_acc_ratio} -eq ${ethernet_to_accelerator_ratio} ]] || [[
${acc_to_eth_ratio} -eq ${accelerator_to_ethernet_ratio} ]]; then
return
@@ -310,7 +337,7 @@
################################
# Get the index of the accelerator device based on the ethernet device index
# Globals:
-# bus_specific_ethernet_device_map: Array of ethernet devices
+# bus_specific_gpu_nic_device_map: Array of gpu nic devices
# bus_specific_accelerator_device_map: Array of processor devices
# ethernet_to_accelerator_ratio: Ratio of Processor to Ethernet devices
# accelerator_to_ethernet_ratio: Ratio of Ethernet to Processor devices
@@ -321,7 +348,7 @@
#################################
function get_accelerator_index() {
local -i eth_id=${1}
- local -i eth_index="$(get_index ${eth_id}
"${!bus_specific_ethernet_device_map[@]}")"
+ local -i eth_index="$(get_index ${eth_id}
"${!bus_specific_gpu_nic_device_map[@]}")"
if [[ ${ethernet_to_accelerator_ratio} != 0 ]]; then
local -i gpu_index=$((${eth_index} / ${ethernet_to_accelerator_ratio}))
@@ -343,11 +370,11 @@
# For the case where there are equal or more ethernet devices than
# accelerator devices
# Globals:
-# ETHERNET_DEVICES_VENDORS: Array of ethernet device vendors
+# GPU_NIC_DEVICES_VENDORS: Array of ethernet device vendors
# relevant to intent based naming
# GPU_DEVICES_VENDORS: Array of gpu device vendors relevant to intent based
# naming
-# ethernet_devices: Array of ethernet devices
+# gpu_nic_devices: Array of ethernet devices
# accelerator_devices: Array of processor devices
# Environment Variables:
# DEVPATH: Path of the device to name that includes PCI bus id
@@ -367,7 +394,7 @@
# Search for relevant vendors in arrays and generate name based on results
# Minimum requirement is the vendor of the ethernet device in relevant list
# Add additional cases here for new hardware vendors or name cases
- if [[ " ${ETHERNET_DEVICES_VENDORS[*]} " =~ \
+ if [[ " ${GPU_NIC_DEVICES_VENDORS[*]} " =~ \
[[:space:]]${eth_device_vendor}[[:space:]] ]]; then
name_builder="rdma$((${rdma_index}))"
else
@@ -388,11 +415,11 @@
#
# For the case where there are more accelerator devices than ethernet devices
# Globals:
-# ETHERNET_DEVICES_VENDORS: Array of ethernet device vendors
+# GPU_NIC_DEVICES_VENDORS: Array of ethernet device vendors
# relevant to intent based naming
# GPU_DEVICES_VENDORS: Array of gpu device vendors relevant to intent based
# naming
-# ethernet_devices: Array of ethernet devices
+# gpu_nic_devices: Array of ethernet devices
# accelerator_devices: Array of processor devices
# Environment Variables:
# DEVPATH: Path of the device to name that includes PCI bus id
@@ -413,7 +440,7 @@
# Search for relevant vendors in arrays and generate name based on results
# Minimum requirement is the vendor of the ethernet device in relevant list
# Add additional cases here for new hardware vendors or name cases
- if [[ " ${ETHERNET_DEVICES_VENDORS[*]} " =~ \
+ if [[ " ${GPU_NIC_DEVICES_VENDORS[*]} " =~ \
[[:space:]]${eth_device_vendor}[[:space:]] ]]; then
name_builder="rdma$((${rdma_index}))"
fi
@@ -430,83 +457,198 @@
echo ${name_builder}
}
-###############################
-# Generate the name of the device based on the vendor
-# Globals:
-# ETHERNET_DEVICES_VENDORS: Array of ethernet device vendors
-# relevant to intent based naming
-# GPU_DEVICES_VENDORS: Array of gpu device vendors relevant to intent based
-# naming
-# ethernet_devices: Array of ethernet devices
-# accelerator_devices: Array of processor devices
+function run_ip_cmd() {
+ debug "Running command: ip $@"
+ if [[ "$TEST" != "test" ]]; then
+ ip "$@"
+ fi
+}
+
+#################################
+# Generate the ethernet name of the device based on the vendor
+#
# Arguments:
-# $1: Path of the device to name that includes PCI bus id
+# $1: Vendor of the ethernet device
+# $2: Index of the ethernet device in ethernet_devices array
+# $3: Device path of the ethernet device
# Outputs:
-# Name of the device
-###############################
-function generate_name() {
- local device_path=$1
- local -i int_id=$(get_id_from_path "${device_path}")
- debug "Path discovered int_id: ${int_id}"
- # Pass the array of keys to find index of this device
- local -i eth_index=$(get_index ${int_id} "${!ethernet_devices[@]}")
- debug "ethernet device index: ${eth_index}"
- if [[ ${eth_index} -eq -1 ]]; then
- error_and_exit "Device not found in ethernet devices"
- fi
+# Name of the device (eth0, eth2, rdma4, etc.)
+#################################
+function generate_ethernet_name() {
+ local int_id=$1
+ local eth_index=$2
+ local device_path=$3
local eth_device_vendor="${ethernet_devices[${int_id}]}"
local name_builder=""
# Diorite NIC
if [[ ${eth_device_vendor} == "8086:1452" ]]; then
+ # Pass the array of keys to find index of this device
+ local -i eth_index=$(get_index ${int_id} "${!ethernet_devices[@]}")
local old_name=$(basename ${device_path})
local new_name="eth${eth_index}"
name_builder="$new_name"
# Temporarily rename to avoid naming collisions: udev will overwrite the
# temporary name with the correct, final name
- if [[ "$new_name" != "$old_name" ]]; then
- notice "Renaming ${old_name} to ${new_name}"
- /sbin/ip link set $new_name down
- /sbin/ip link set $new_name name "${new_name}tmp"
- /sbin/ip link set "${new_name}tmp" up
+ if [[ "$new_name" != "$old_name" ]]; then # IDPF NIC
+ notice "Renaming ${old_name} to ${new_name}"
+ run_ip_cmd link set $new_name name "${new_name}tmp"
fi
- elif [[ ${accelerator_devices[*]} == "" ]] ; then
- if [[ " ${ETHERNET_DEVICES_VENDORS[*]} " =~ \
- [[:space:]]${eth_device_vendor}[[:space:]] ]]; then
- if [[ "${SUBSYSTEM}" == "net" ]] && [[ -d
"${SYS_PREPEND_PATH}${DEVPATH}/device/${RDMA_TEST_FOLDER}" ]]; then
- name_builder="rdma${eth_index}"
- elif [[ "${SUBSYSTEM}" == "net" ]] && [[ -d
"${SYS_PREPEND_PATH}${DEVPATH}/device" ]]; then
- # If this is a VF device and not an RDMA we do not want this device
- # to claim the eth0 name so give it a PCI path based name instead.
- #
- notice "Non RDMA VF NIC. Setting name to path name"
- name_builder=$ID_NET_NAME_PATH
- else
- # If device path is empty it indicates other changes happening so this
script will skip
- error_and_exit "DEVPATH provided is empty, skipping naming.
Path:${SYS_PREPEND_PATH}${DEVPATH}"
- fi
+ elif [[ " ${IRDMA_DEVICES_VENDORS[*]} " =~
[[:space:]]${eth_device_vendor}[[:space:]] ]]; then # RDMA NIC
+ if [[ "${SUBSYSTEM}" == "net" ]] && [[ -d
"${device_path}/device/${RDMA_TEST_FOLDER}" ]]; then
+ name_builder="rdma${eth_index}"
+ elif [[ "${SUBSYSTEM}" == "net" ]] && [[ -d "${device_path}/device" ]];
then
+ # If this is a VF device and not an RDMA we do not want this device
+ # to claim the eth0 name so give it a PCI path based name instead.
+ #
+ notice "Non RDMA VF NIC. Setting name to path name"
+ name_builder=$ID_NET_NAME_PATH
else
- error_and_exit "Device is not for intent based name: "\
- "Device vendors: eth:${eth_device_vendor}"
+ # If device path is empty it indicates other changes happening so this
script will skip
+ error_and_exit "DEVPATH provided is empty, skipping naming.
Path:${device_path}"
fi
- elif [[ ${ethernet_to_accelerator_ratio} != 0 ]]; then
- gather_lowest_denominator_devices "${SYS_PREPEND_PATH}${DEVPATH}"
- local -i gpu_index=$(get_accelerator_index ${int_id})
- name_builder=$(name_ethernet_greater_equal_gpu "${eth_device_vendor}"
${eth_index} ${gpu_index})
+ else
+ error_and_exit "Ethernet device is not for intent based name: "\
+ "Device vendors: eth:${eth_device_vendor}"
+ fi
+
+ notice "eth nic name: ${name_builder} for device_path: ${device_path}"
+ echo ${name_builder}
+}
+
+#################################
+# Generate the gpu nic name of the device based on the vendor
+#
+# Arguments:
+# $1: Vendor of the gpu nic device
+# $2: Index of the gpu nic device in gpu_nic_devices array
+# $3: Device path of the gpu nic device
+# Outputs:
+# Name of the device (gpu0rdma1, gpu1rdma0, etc.)
+# In case of CX VF representation it will return (gpu0rdma2_rep, etc.)
+#################################
+function generate_gpu_nic_name() {
+ local int_id=$1
+ local gpu_nic_index=$2
+ local device_path=$3
+
+ local gpu_nic_device_vendor="${gpu_nic_devices[${int_id}]}"
+ local name_builder=""
+
+ local -i gpu_index=-1
+ if [[ ${ethernet_to_accelerator_ratio} != 0 ]]; then
+ gather_lowest_denominator_devices "${device_path}"
+ gpu_index=$(get_accelerator_index ${int_id})
+ name_builder=$(name_ethernet_greater_equal_gpu "${gpu_nic_device_vendor}"
${gpu_nic_index} ${gpu_index})
+ elif [[ ${accelerator_to_ethernet_ratio} != 0 ]]; then
+ gather_lowest_denominator_devices "${device_path}"
+ gpu_index=$(get_accelerator_index ${int_id})
+ name_builder=$(name_gpu_greater_ethernet "${gpu_nic_device_vendor}"
${gpu_nic_index} ${gpu_index})
+ else
+ error_and_exit "No index ratios found"
+ fi
+
+ # Handle CX VF representation
+ local phys_port_name=$(cat "${device_path}/phys_port_name" 2>/dev/null)
+ if [[ "${phys_port_name}" == *"vf"* ]]; then
+ local -i vf_index=${phys_port_name##*vf}
+ local -i eth_index=$(( vf_index + 2 )) # slot 0&1 are PF, VF starts at 2
+ name_builder="gpu${gpu_index}rdma${eth_index}_rep"
+ fi
+
+ notice "gpu nic name: ${name_builder} for device_path: ${device_path}"
+ echo ${name_builder}
+}
+
+#################################
+# Generate the gpu nic name for the VF device based on the vendor
+#
+# Arguments:
+# $1: Vendor of the gpu nic device
+# $2: Device path of the gpu nic VF device
+# $3: Device path of the gpu nic PF device
+# Outputs:
+# Name of the device (gpu0rdma2, gpu1rdma3, etc.)
+#################################
+function generate_gpu_nic_name_has_physfn() {
+ local -i int_id=$1
+ local vf_path=$2
+ local physfn_path=$3
+ local -i physfn_int_id=$(get_id_from_path "${physfn_path}")
+
+ local vf_vendor=$(cat "${vf_path}/device/vendor")
+ local vf_device=$(cat "${vf_path}/device/device")
+ local vf_device_id="${vf_vendor#0x}:${vf_device#0x}"
+ if [[ ! " ${GPU_NIC_DEVICES_VENDORS[*]} " =~
[[:space:]]${vf_device_id}[[:space:]] ]]; then
+ error_and_exit "VF device is not for intent based name: ${path}"
+ fi
+
+ # Get the index of the accelerator device based on the NIC pf device index
+ local -i gpu_index=-1
+ if [[ ${ethernet_to_accelerator_ratio} != 0 ]]; then
+ gather_lowest_denominator_devices "${physfn_path}"
+ gpu_index=$(get_accelerator_index ${physfn_int_id})
elif [[ ${accelerator_to_ethernet_ratio} != 0 ]]; then
- gather_lowest_denominator_devices "${SYS_PREPEND_PATH}${DEVPATH}"
- local -i gpu_index=$(get_accelerator_index ${int_id})
- name_builder=$(name_gpu_greater_ethernet "${eth_device_vendor}"
${eth_index} ${gpu_index})
+ gather_lowest_denominator_devices "${physfn_path}"
+ gpu_index=$(get_accelerator_index ${physfn_int_id})
else
error_and_exit "No index ratios found"
fi
- notice "Device name: ${name_builder}"
+ # using the slot number of the VF as the rdma index
+ local -i rdma_index=$(( int_id % 16 ))
+ name_builder="gpu${gpu_index}rdma${rdma_index}"
+
+ notice "gpu nic name: ${name_builder} for device_path: ${device_path}"
echo ${name_builder}
}
+###############################
+# Generate the name of the device based on the vendor
+# Globals:
+# GPU_NIC_DEVICES_VENDORS: Array of ethernet device vendors
+# relevant to intent based naming
+# gpu_nic_devices: Array of gpu nic devices
+# ethernet_devices: Array of ethernet devices
+# accelerator_devices: Array of processor devices
+# Arguments:
+# $1: Path of the device to name that includes PCI bus id
+# Outputs:
+# Name of the device
+###############################
+function generate_name() {
+ local device_path=$1
+ local -i int_id=$(get_id_from_path "${device_path}")
+ debug "Path discovered int_id: ${int_id}"
+
+ # Pass the array of keys to find index of this device
+ local -i eth_index=$(get_index ${int_id} "${!ethernet_devices[@]}")
+ debug "ethernet device index: ${eth_index}"
+ if [[ ${eth_index} -ne -1 ]]; then
+ echo $(generate_ethernet_name ${int_id} ${eth_index} ${device_path})
+ return
+ fi
+
+ local -i gpu_nic_index=$(get_index ${int_id} "${!gpu_nic_devices[@]}")
+ debug "gpu_nic device index: ${gpu_nic_index}"
+ if [[ ${gpu_nic_index} -ne -1 ]]; then
+ echo $(generate_gpu_nic_name ${int_id} ${gpu_nic_index} ${device_path})
+ return
+ fi
+
+ # If device is not in ethernet_devices or gpu_nic_devices, check if it is a
+ # CX VF in baremetal.
+ local physfn_path=$(readlink -f "${device_path}/device/physfn")
+ if [[ -d "${physfn_path}" ]]; then
+ echo $(generate_gpu_nic_name_has_physfn ${int_id} ${device_path}
${physfn_path})
+ return
+ fi
+
+ error_and_exit "Device is not for intent based name: ${device_path}"
+}
+
# Intel VF driver and needs special handling to determine if name should be
# RDMA related
if [[ "${DEVICE_DRIVER}" == "idpf" ]]; then
@@ -520,10 +662,10 @@
notice "Triggered for non Net Device"
fi
# Note can not use "" around ls path here or it errors out
- list_devices ethernet_devices accelerator_devices "$(ls -d
"${PCI_BUS_PATH}"/*)"
+ list_devices ethernet_devices gpu_nic_devices accelerator_devices "$(ls -d
"${PCI_BUS_PATH}"/*)"
# Check if any devices were found
- if [[ "${ethernet_devices[*]}" == "" ]]; then
+ if [[ "${gpu_nic_devices[*]}" == "" && "${ethernet_devices[*]}" == "" ]];
then
error_and_exit "No network devices found"
fi
@@ -531,7 +673,7 @@
determine_index_ratios
fi
- generated_name=$(generate_name "${DEVPATH}")
+ generated_name=$(generate_name "${SYS_PREPEND_PATH}${DEVPATH}")
if [[ "$SUBSYSTEM" == "net" ]]; then
echo ${generated_name}
@@ -542,9 +684,7 @@
if [[ -d "$dev_path/net/" ]]; then
current_net_iface="$(ls $dev_path/net/)"
notice "Renaming iface ${current_net_iface} to ${generated_name}"
- /usr/sbin/ip link set dev ${current_net_iface} down
- /usr/sbin/ip link set dev ${current_net_iface} name ${generated_name}
- /usr/sbin/ip link set dev ${generated_name} up
+ run_ip_cmd link set dev ${current_net_iface} name ${generated_name}
fi
fi
fi
\ No newline at end of file
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/guest-configs-20251014.00/src/usr/bin/google_set_multiqueue
new/guest-configs-20260112.00/src/usr/bin/google_set_multiqueue
--- old/guest-configs-20251014.00/src/usr/bin/google_set_multiqueue
2025-10-14 20:26:30.000000000 +0200
+++ new/guest-configs-20260112.00/src/usr/bin/google_set_multiqueue
2026-01-10 02:55:06.000000000 +0100
@@ -28,14 +28,33 @@
# each CPU a dedicated TX and RX network queue, while ensuring that all packets
# from a single flow are delivered to the same CPU.
#
-# For a gvnic device, set the IRQ affinities to the per-IRQ affinity hint.
-# The google virtual ethernet driver maps each queue MSI-X interrupt to a
-# unique single CPU, which is stored in the affinity_hint for each MSI-X
-# vector. In older versions of the kernel, irqblanace is expected to copy the
-# affinity_hint to smp_affinity; however, GCE instances disable irqbalance by
+# For a gvnic device, set the IRQ affinities to the per-IRQ affinity hint.
+# The google virtual ethernet driver maps each queue MSI-X interrupt to a
+# unique single CPU, which is stored in the affinity_hint for each MSI-X
+# vector. In older versions of the kernel, irqblanace is expected to copy the
+# affinity_hint to smp_affinity; however, GCE instances disable irqbalance by
# default. This script copies over the affinity_hint to smp_affinity on boot to
# replicate the behavior of irqbalance.
+# Allow changing the metadata server and "root" for testing.
+METADATA_SERVER="169.254.169.254"
+ROOT_DIR="/"
+if [[ ! -z "${GOOGLE_SET_MULTIQUEUE_METADATASERVER+x}" ]]; then
+ METADATA_SERVER="$GOOGLE_SET_MULTIQUEUE_METADATASERVER"
+fi
+if [[ ! -z "${GOOGLE_SET_MULTIQUEUE_FAKEROOT+x}" ]]; then
+ ROOT_DIR="$GOOGLE_SET_MULTIQUEUE_FAKEROOT"
+fi
+
+function get_metadata() {
+ local path="$1"
+ curl -s -m 1 -H "Metadata-Flavor: Google"
"http://${METADATA_SERVER}/computeMetadata/v1/${path}"
+}
+
+A4X_ALL_CPUS_MASK="0000ffff,ffffffff,ffffffff,ffffffff,ffffffff"
+A4X_RX_RING_LENGTH="2048"
+A4X_TX_RING_LENGTH="1024"
+
function is_decimal_int() {
[ "${1}" -eq "${1}" ] > /dev/null 2>&1
}
@@ -44,28 +63,58 @@
ethtool -L "${1}" combined "${2}" > /dev/null 2>&1
}
-function set_irq_range() {
+function set_irq_range_idpf() {
local -r nic="$1"
local bind_cores_index="$2"
local irq_ranges=("${@:3}")
# The user may not have this $nic configured on their VM, if not, just skip
# it, no need to error out.
- if [ ! -d "/sys/class/net/"$nic"/device" ]; then
+ if [ ! -d "${ROOT_DIR}sys/class/net/"$nic"/device" ]; then
return;
fi
+ # More IRQs are in msi_irqs than actually exist for idpf.
+ local alleged_irqs=($(ls ${ROOT_DIR}sys/class/net/"$nic"/device/msi_irqs))
+ local -a actual_irqs=()
+ for irq in "${alleged_irqs[@]}"; do
+ if [[ -d "${ROOT_DIR}proc/irq/${irq}" ]]; then
+ # Only reaffinitize TxRx irqs, not the Mailbox.
+ if ls ${ROOT_DIR}proc/irq/${irq}/*TxRx* 1> /dev/null 2>&1; then
+ actual_irqs+=("${irq}")
+ fi
+ fi
+ done
+ local num_irqs=${#actual_irqs[@]}
+ for ((i=0; i<num_irqs; i+=1)); do
+ core="${irq_ranges[${bind_cores_index}]}"
+ echo "Setting irq binding for "$nic" to core ${actual_irqs[$i]} to
"${core} >&2
+ echo "${core}" > "${ROOT_DIR}proc/irq/${actual_irqs[$i]}/smp_affinity_list"
+ bind_cores_index=$((bind_cores_index + 1))
+ done
+}
+
+function set_irq_range_gve() {
+ local -r nic="$1"
+ local bind_cores_index="$2"
+ local irq_ranges=("${@:3}")
+
+ # The user may not have this $nic configured on their VM, if not, just skip
+ # it, no need to error out.
+ if [ ! -d "${ROOT_DIR}sys/class/net/"$nic"/device" ]; then
+ return;
+ fi
# We count the number of rx queues and assume number of rx queues == tx
# queues. The number of queues shown in the sysfs stands for the initial
# queues while the number of IRQs stands for the max queues. The number of
# initial queues should be always less than or equal to that of the max
# queues.
- num_irqs=$(( $(ls /sys/class/net/"$nic"/device/msi_irqs | wc -l) / 2 ))
- num_q=$(ls -1 /sys/class/net/"$nic"/queues/ | grep rx | wc -l)
+ num_irqs=$(( $(ls ${ROOT_DIR}sys/class/net/"$nic"/device/msi_irqs | wc -l) /
2 ))
+ num_q=$(ls -1 ${ROOT_DIR}sys/class/net/"$nic"/queues/ | grep rx | wc -l)
echo "Setting irq binding for "$nic" to core
["${irq_ranges[${bind_cores_index}]}" - "${irq_ranges[$((bind_cores_index +
num_q - 1))]}] ... >&2
- irqs=($(ls /sys/class/net/"$nic"/device/msi_irqs | sort -g))
+ irqs=($(ls ${ROOT_DIR}sys/class/net/"$nic"/device/msi_irqs | sort -g))
for ((irq = 0; irq < "$num_irqs"; irq++)); do
tx_irq=${irqs[$irq]}
rx_irq=${irqs[$((irq + num_irqs))]}
@@ -76,18 +125,17 @@
((bind_cores_index++))
# this is GVE's TX irq. See gve_tx_idx_to_ntfy().
- echo "$core" > "/proc/irq/${tx_irq}/smp_affinity_list"
- echo "tx_irq: ${tx_irq}, assigned irq core: $(cat
"/proc/irq/${tx_irq}/smp_affinity_list")" >&2
+ echo "$core" > "${ROOT_DIR}proc/irq/${tx_irq}/smp_affinity_list"
+ echo "tx_irq: ${tx_irq}, assigned irq core: $(cat
"${ROOT_DIR}proc/irq/${tx_irq}/smp_affinity_list")" >&2
# this is GVE's RX irq. See gve_rx_idx_to_ntfy().
- echo "$core" > "/proc/irq/${rx_irq}/smp_affinity_list"
- echo "rx_irq: ${rx_irq}, assigned irq core: $(cat
"/proc/irq/${rx_irq}/smp_affinity_list")" >&2
-
+ echo "$core" > "${ROOT_DIR}proc/irq/${rx_irq}/smp_affinity_list"
+ echo "rx_irq: ${rx_irq}, assigned irq core: $(cat
"${ROOT_DIR}proc/irq/${rx_irq}/smp_affinity_list")" >&2
# Check if the queue exists at present because the number of IRQs equals
# the max number of queues allocated and could be greater than the current
# number of queues.
- tx_queue=/sys/class/net/"$nic"/queues/tx-"$irq"
+ tx_queue=${ROOT_DIR}sys/class/net/"$nic"/queues/tx-"$irq"
if ls $tx_queue 1> /dev/null 2>&1; then
echo -en "$nic:q-$irq: \ttx: irq $tx_irq bind to $core \trx: irq $rx_irq
bind to $core" >&2
echo -e " \txps_cpus bind to $(cat $tx_queue/xps_cpus)" >&2
@@ -102,17 +150,17 @@
# returns 0 (success) if the all the interfaces contains pnic_id on the
Metadats server.
function contains_pnic_ids() {
- while IFS= read -r interface; do
+ local interfaces=$(get_metadata "instance/network-interfaces/")
+ for interface in $interfaces; do
echo "Interface: $interface"
- network_interfaces_mds_attributes=$(curl -m 1 -H "Metadata-Flavor: Google"
\
-
"http://169.254.169.254/computeMetadata/v1/instance/network-interfaces/$interface/")
+ network_interfaces_mds_attributes=$(get_metadata
"instance/network-interfaces/$interface/")
if ! echo "$network_interfaces_mds_attributes" | grep -q
"physical-nic-id"; then
echo "physical-nic-id NOT found in interface $interface"
return 1
fi
- done < <(curl -m 1 -H "Metadata-Flavor: Google"
http://169.254.169.254/computeMetadata/v1/instance/network-interfaces/)
+ done
return 0
}
@@ -122,9 +170,6 @@
contains_pnic_ids
CONTAINS_PNIC_IDS=$?
- machine_type=$(curl -m 1 -H "Metadata-Flavor: Google" \
- http://169.254.169.254/computeMetadata/v1/instance/machine-type)
-
[[ $CONTAINS_PNIC_IDS -eq 0 \
|| "$machine_type" == *"a3-highgpu-8g"* \
|| "$machine_type" == *"a3-ultragpu-8g"* \
@@ -132,67 +177,49 @@
|| "$machine_type" == *"a3-edgegpu-8g"* \
|| "$machine_type" == *"a3-ultragpu-"* \
|| "$machine_type" == *"a4-highgpu-"* \
- || "$machine_type" == *"a4x-highgpu-"* ]] || return 1
+ || "$machine_type" == *"a4x-highgpu-"* \
+ || "$machine_type" == *"a4x-maxgpu-"* \
+ || "$machine_type" == *"c4x-"* ]] || return 1
return 0
}
-
# returns 0 (success) if the supplied nic is a Gvnic device.
function is_gvnic() {
local -r nic_name="$1"
- driver_type=$(ethtool -i $nic_name | grep driver)
+ local -r driver_type=$(ethtool -i $nic_name | grep driver)
- [[ "$driver_type" == *"gve"*
+ [[ "$driver_type" == *"gve"*
|| "$driver_type" == *"gvnic"* ]] || return 1
return 0
}
-# Returns the vCPU ranges on each of the numa nodes. The vCPU ranges will
-# be in the form of array of
-# [numa0_irq_start_1, numa0_irq_end_1, numa0_irq_start_2, numa0_irq_end_2,
-# numa1_irq_start_1, numa1_irq_end_1, numa1_irq_start_2, numa1_irq_end_2]
-# this will only return the vCPU ranges on NUMA0 and NUMA1 since accelerator
-# platforms of GEN3 and after only have 2 NUMA nodes.
-# The expected vCPU ranges on eahc platforms are:
-# A3/A3-mega:
-# numa0: [0, 51] [104, 155]
-# numa1: [52, 103] [156, 207]
-# A3-ultra:
-# numa0: [0, 55] [113, 168]
-# numa1: [56, 112] [169, 224]
-function get_vcpu_ranges_on_accelerator_platform {
- declare -n arr_ref=$1
-
- # Get vCPU ranges for NUMA 0
- numa0_irq_range=($(cat /sys/devices/system/node/node0/cpulist))
- numa0_irq_range0="${numa0_irq_range[0]%,*}"
- numa0_irq_range1="${numa0_irq_range[0]#*,}"
-
- numa0_irq_range0_start=$(echo "$numa0_irq_range0" | cut -d '-' -f 1)
- numa0_irq_range0_end=$(echo "$numa0_irq_range0" | cut -d '-' -f 2)
- numa0_irq_range1_start=$(echo "$numa0_irq_range1" | cut -d '-' -f 1)
- numa0_irq_range1_end=$(echo "$numa0_irq_range1" | cut -d '-' -f 2)
-
- # Get vCPU ranges for NUMA 1
- numa1_irq_range=($(cat /sys/devices/system/node/node1/cpulist))
- numa1_irq_range0="${numa1_irq_range[0]%,*}"
- numa1_irq_range1="${numa1_irq_range[0]#*,}"
-
- numa1_irq_range0_start=$(echo "$numa1_irq_range0" | cut -d '-' -f 1)
- numa1_irq_range0_end=$(echo "$numa1_irq_range0" | cut -d '-' -f 2)
- numa1_irq_range1_start=$(echo "$numa1_irq_range1" | cut -d '-' -f 1)
- numa1_irq_range1_end=$(echo "$numa1_irq_range1" | cut -d '-' -f 2)
-
- arr_ref=(
- "$numa0_irq_range0_start"
- "$numa0_irq_range0_end"
- "$numa0_irq_range1_start"
- "$numa0_irq_range1_end"
- "$numa1_irq_range0_start"
- "$numa1_irq_range0_end"
- "$numa1_irq_range1_start"
- "$numa1_irq_range1_end")
+# returns 0 (success) if the supplied nic is an IDPF device.
+function is_idpf() {
+ local -r nic_name="$1"
+ local -r driver_type=$(ethtool -i $nic_name | grep driver)
+
+ [[ "$driver_type" == *"idpf"* ]] || return 1
+
+ return 0
+}
+
+# Returns the CPU ranges for a given NUMA node.
+# The CPU ranges will be returned as a space-separated list of start/end
integers.
+function get_vcpu_ranges {
+ local numa_node="$1"
+ local
cpulist_file="${ROOT_DIR}sys/devices/system/node/node${numa_node}/cpulist"
+ if [ -f "$cpulist_file" ]; then
+ local cpulist=$(cat "$cpulist_file" | tr ',' ' ')
+ local result=""
+ for r in $cpulist; do
+ local start=$(echo "$r" | cut -d '-' -f 1)
+ local end=$(echo "$r" | cut -d '-' -f 2)
+ [[ -z "$end" ]] && end=$start
+ result+="$start $end "
+ done
+ echo "$result"
+ fi
}
function unpack_cpu_ranges() {
@@ -267,7 +294,7 @@
get_network_interfaces() {
local network_interfaces=()
- for nic_dir in /sys/class/net/*; do
+ for nic_dir in ${ROOT_DIR}sys/class/net/*; do
local nic_name=$(basename "${nic_dir}")
if [[ "${nic_name}" == "lo" || ! -e "${nic_dir}/device" ]]; then
@@ -290,52 +317,75 @@
local cpus=("${@:2}")
total_vcpus=${#cpus[@]}
- num_numa_nodes=2
nics_string=$(get_network_interfaces)
IFS=' ' read -r -a nics <<< "$nics_string"
for nic in "${nics[@]}"; do
- tx_queue_count=$(ls -1 /sys/class/net/"$nic"/queues/ | grep tx | wc -l)
+ tx_queue_count=$(ls -1 ${ROOT_DIR}sys/class/net/"$nic"/queues/ | grep tx |
wc -l)
+
+ if [[ "$machine_type" == *"a4x-"* ]]; then
+ # All queues on a4x get the full mask.
+ for (( queue=0; queue<tx_queue_count; queue++ )); do
+ echo "${A4X_ALL_CPUS_MASK}" >
"${ROOT_DIR}sys/class/net/$nic/queues/tx-$queue/xps_cpus"
+ done
+ continue
+ fi
- # the number of queues to assign CPUs for this NUMA node.
- queues_per_numa=$(( tx_queue_count / num_numa_nodes ))
+ if [[ $num_numa_nodes -le $tx_queue_count ]]; then
+ # the number of queues to assign CPUs for this NUMA node.
+ queues_per_numa=$(( tx_queue_count / num_numa_nodes ))
+
+ # the number of CPUs to assign per queue
+ cpus_per_queue=$(( total_vcpus / queues_per_numa))
+
+ echo "nic=$nic tx_queue_count=$tx_queue_count
queues_per_numa=$queues_per_numa cpus_per_queue=$cpus_per_queue"
+
+ cpu_index=0
+ queue_offset=$(( queues_per_numa*numa ))
+ for (( queue=queue_offset; queue<queue_offset+queues_per_numa; queue+=1
)); do
+ xps_path=${ROOT_DIR}sys/class/net/$nic/queues/tx-$queue/xps_cpus
+ xps_cpus=""
+
+ # Assign all the remaining CPUs to the last queue
+ if [[ queue -eq $(( queue_offset + queues_per_numa - 1 )) ]]; then
+ cpus_per_queue=$(( total_vcpus - cpu_index ))
+ fi
- # the number of CPUs to assign per queue
- cpus_per_queue=$(( total_vcpus / queues_per_numa))
+ for (( i=0; i<cpus_per_queue; i+=1 )); do
+ xps_cpus+="${cpus[cpu_index]},"
+ cpu_index=$(( cpu_index + 1 ))
+ done
- echo "nic=$nic tx_queue_count=$tx_queue_count
queues_per_numa=$queues_per_numa cpus_per_queue=$cpus_per_queue"
-
- cpu_index=0
- queue_offset=$(( queues_per_numa*numa ))
- for (( queue=queue_offset; queue<queue_offset+queues_per_numa; queue+=1
)); do
- xps_path=/sys/class/net/$nic/queues/tx-$queue/xps_cpus
- xps_cpus=""
-
- # Assign all the remaining CPUs to the last queue
- if [[ queue -eq $(( queue_offset + queues_per_numa - 1 )) ]]; then
- cpus_per_queue=$(( total_vcpus - cpu_index ))
- fi
-
- for (( i=0; i<cpus_per_queue; i+=1 )); do
- xps_cpus+="${cpus[cpu_index]},"
- cpu_index=$(( cpu_index + 1 ))
+ # remove the last ","
+ xps_cpus="${xps_cpus%,}"
+ cpu_mask=$(rangelist_to_bitmap "$xps_cpus" "$(nproc)")
+ echo ${cpu_mask} > $xps_path
done
+ else
+ # num_numa_nodes > tx_queue_count.
+ # multiple NUMA nodes share a queue. We append to the mask.
+ queue=$(( numa % tx_queue_count ))
+ xps_path=${ROOT_DIR}sys/class/net/$nic/queues/tx-$queue/xps_cpus
+
+ current_mask=$(cat "$xps_path" 2>/dev/null || echo "0")
+ current_rangelist=$(bitmap_to_rangelist "$current_mask" 2>/dev/null ||
echo "")
+
+ # Flatten cpus array to comma-separated list
+ new_cpus_list=$(IFS=,; echo "${cpus[*]}")
+ ranges="${current_rangelist},${new_cpus_list}"
+ ranges="${ranges#,}"
- # remove the last ","
- xps_cpus="${xps_cpus%,}"
- cpu_mask=$(rangelist_to_bitmap $xps_cpus $(nproc))
+ cpu_mask=$(rangelist_to_bitmap "$ranges" "$(nproc)")
echo ${cpu_mask} > $xps_path
- printf "Queue %d XPS_PATH=%s assigned CPUs=%s cpu_mask=%s\n" \
- "$queue" \
- "$xps_path" \
- "$(bitmap_to_rangelist "$cpu_mask")" \
- "$cpu_mask"
- done
+ fi
done
}
echo "Running $(basename $0)."
-VIRTIO_NET_DEVS=/sys/bus/virtio/drivers/virtio_net/virtio*
+machine_type=$(get_metadata "instance/machine-type")
+echo "Machine type: $machine_type"
+
+VIRTIO_NET_DEVS=${ROOT_DIR}sys/bus/virtio/drivers/virtio_net/virtio*
is_multinic_accelerator_platform
IS_MULTINIC_ACCELERATOR_PLATFORM=$?
@@ -369,7 +419,7 @@
for dev in $VIRTIO_NET_DEVS
do
dev=$(basename "$dev")
- irq_dir=/proc/irq/*
+ irq_dir=${ROOT_DIR}proc/irq/*
for irq in $irq_dir
do
smp_affinity="${irq}/smp_affinity_list"
@@ -410,7 +460,7 @@
# Set smp_affinity properly for gvnic queues. '-ntfy-block.' is unique to gve
# and will not affect virtio queues.
-for i in /proc/irq/*; do
+for i in ${ROOT_DIR}proc/irq/*; do
if ls ${i}/*-ntfy-block.* 1> /dev/null 2>&1; then
if [ -f ${i}/affinity_hint ]; then
echo Setting smp_affinity on ${i} to $(cat ${i}/affinity_hint)
@@ -419,99 +469,55 @@
fi
done
-vcpu_ranges=()
-get_vcpu_ranges_on_accelerator_platform vcpu_ranges
+num_numa_nodes=$(ls -d ${ROOT_DIR}sys/devices/system/node/node* | wc -l)
+echo "Found ${num_numa_nodes} NUMA nodes."
-packed_numa0_vcpu_ranges=(
- "${vcpu_ranges[0]} ${vcpu_ranges[1]} ${vcpu_ranges[2]} ${vcpu_ranges[3]}"
-)
-packed_numa1_vcpu_ranges=(
- "${vcpu_ranges[4]} ${vcpu_ranges[5]} ${vcpu_ranges[6]} ${vcpu_ranges[7]}"
-)
-declare -a numa0_vcpu_ranges
-unpack_cpu_ranges "${packed_numa0_vcpu_ranges[0]}" numa0_vcpu_ranges
-declare -a numa1_vcpu_ranges
-unpack_cpu_ranges "${packed_numa1_vcpu_ranges[0]}" numa1_vcpu_ranges
-
-echo -e "\nConfiguring XPS affinity for devices on NUMA 0"
-echo -e "vCPUs on NUMA0 [${vcpu_ranges[0]}-${vcpu_ranges[1]}],
[${vcpu_ranges[2]}-${vcpu_ranges[3]}]"
-set_xps_affinity 0 "${numa0_vcpu_ranges[@]}"
-
-echo -e "\nConfiguring XPS affinity for devices on NUMA 1"
-echo -e "vCPUs on NUMA1 [${vcpu_ranges[4]}-${vcpu_ranges[5]}],
[${vcpu_ranges[6]}-${vcpu_ranges[7]}]"
-set_xps_affinity 1 "${numa1_vcpu_ranges[@]}"
+for ((node=0; node<num_numa_nodes; node++)); do
+ ranges=$(get_vcpu_ranges "$node")
+ dec_ranges=()
+ unpack_cpu_ranges "${ranges}" dec_ranges
+
+ echo -e "\nConfiguring XPS affinity for devices on NUMA ${node}"
+ echo -e "vCPUs on NUMA${node} [${dec_ranges[@]}]"
+ set_xps_affinity "$node" "${dec_ranges[@]}"
+done
if [[ ! $IS_MULTINIC_ACCELERATOR_PLATFORM == 0 ]]; then
exit
fi
-
# Assign IRQ binding for network interfaces based on pci bus ordering.
-#
-# Below logics explains how we rank interfaces by pci bus order.
-# > find /sys/class/net -type l | xargs -L 1 realpath | sort
-# /sys/devices/pci0000:00/0000:00:0b.0/net/enp0s11
-#
/sys/devices/pci0000:01/0000:01:00.0/0000:02:00.0/0000:03:02.0/0000:06:00.0/net/enp6s0
-#
/sys/devices/pci0000:07/0000:07:00.0/0000:08:00.0/0000:09:02.0/0000:0c:00.0/net/enp12s0
-#
/sys/devices/pci0000:81/0000:81:00.0/0000:82:00.0/0000:83:02.0/0000:86:00.0/net/enp134s0
-#
/sys/devices/pci0000:87/0000:87:00.0/0000:88:00.0/0000:89:02.0/0000:8c:00.0/net/enp140s0
-# /sys/devices/virtual/net/lo
-#
-# > find /sys/class/net -type l | xargs -L 1 realpath | sort | xargs -L 1
basename | grep -v lo
-# enp0s11
-# enp6s0
-# enp12s0
-# enp134s0
-# enp140s0
-
# Avoid setting binding IRQ on vCPU 0 as it is a busy vCPU being heavily
# used by the system.
-packed_numa0_irq_ranges=(
- "$((vcpu_ranges[0] + 1)) ${vcpu_ranges[1]} ${vcpu_ranges[2]}
${vcpu_ranges[3]}"
-)
-packed_numa1_irq_ranges=(
- "${vcpu_ranges[4]} ${vcpu_ranges[5]} ${vcpu_ranges[6]} ${vcpu_ranges[7]}"
-)
-declare -a numa0_irq_ranges
-unpack_cpu_ranges "${packed_numa0_irq_ranges[0]}" numa0_irq_ranges
-declare -a numa1_irq_ranges
-unpack_cpu_ranges "${packed_numa1_irq_ranges[0]}" numa1_irq_ranges
-
-echo -e "\nSetting IRQ affinity with vCPUs on NUMA0 [${numa0_irq_ranges[@]}]"
-bind_cores_index=0
-find /sys/class/net -type l | xargs -L 1 realpath | grep '/sys/devices/pci' |
sort | xargs -L 1 basename | while read nic_name; do
- # For non-gvnic devices (e.g. mlx5), the IRQ bindings will be handled by the
device's driver.
- if ! is_gvnic "$nic_name"; then
- echo "$nic_name is not gvnic device, skipping set irq on this device"
- continue
- fi
-
- echo "$nic_name is Gvnic device, continuing set IRQ on $nic_name."
+for ((node=0; node<num_numa_nodes; node++)); do
+ ranges=$(get_vcpu_ranges "$node")
- nic_numa_node=$(cat /sys/class/net/"$nic_name"/device/numa_node)
- if [[ $nic_numa_node -ne 0 ]]; then
- continue
- fi
-
- bind_cores_index=$(set_irq_range "$nic_name" "$bind_cores_index"
"${numa0_irq_ranges[@]}")
-done
+ declare -a node_irq_ranges=()
+ unpack_cpu_ranges "${ranges}" node_irq_ranges
-echo -e "\nSetting IRQ affinity with vCPUs on NUMA1 [${numa1_irq_ranges[@]}]"
-bind_cores_index=0
-find /sys/class/net -type l | xargs -L 1 realpath | grep '/sys/devices/pci' |
sort | xargs -L 1 basename | while read nic_name; do
- # For non-gvnic devices (e.g. mlx5), the IRQ bindings will be handled by the
device's driver.
- if ! is_gvnic "$nic_name"; then
- echo "$nic_name is not gvnic device, skipping set irq on this device"
- continue
+ if [[ $node -eq 0 ]]; then
+ # Skip vCPU 0
+ node_irq_ranges=("${node_irq_ranges[@]:1}")
fi
- echo "$nic_name is Gvnic device, continuing set IRQ on $nic_name."
-
- nic_numa_node=$(cat /sys/class/net/"$nic_name"/device/numa_node)
- if [[ $nic_numa_node -ne 1 ]]; then
- continue
- fi
+ echo -e "\nSetting IRQ affinity with vCPUs on NUMA${node}
[${node_irq_ranges[@]}]"
+ bind_cores_index=0
+ find ${ROOT_DIR}sys/class/net -type l | xargs -L 1 realpath | grep
'/sys/devices/pci' | sort | xargs -L 1 basename | while read nic_name; do
+ nic_numa_node=$(cat ${ROOT_DIR}sys/class/net/"$nic_name"/device/numa_node)
+ if [[ $nic_numa_node -ne $node ]]; then
+ continue
+ fi
- bind_cores_index=$(set_irq_range "$nic_name" "$bind_cores_index"
"${numa1_irq_ranges[@]}")
+ # For non-gvnic/idpf devices (e.g. mlx5), the IRQ bindings will be handled
by the device's driver.
+ if is_gvnic "$nic_name"; then
+ bind_cores_index=$(set_irq_range_gve "$nic_name" "$bind_cores_index"
"${node_irq_ranges[@]}")
+ elif is_idpf "$nic_name"; then
+ bind_cores_index=$(set_irq_range_idpf "$nic_name" "$bind_cores_index"
"${node_irq_ranges[@]}")
+ if [[ $machine_type == *"a4x-maxgpu-"* ]]; then
+ ethtool -G "$nic_name" rx "$A4X_RX_RING_LENGTH" tx
"$A4X_TX_RING_LENGTH"
+ fi
+ else
+ echo "$nic_name is not a gvnic/idpf device, not setting irq affinity on
this device"
+ fi
+ done
done
-