Package: release.debian.org
Severity: normal
Tags: bookworm
User: release.debian....@packages.debian.org
Usertags: pu

[ Reason ]
In order to upgrade src:nvidia-graphics-drivers to the 535 LTS series
(the 525 series currently in stable is already EoL), we need to update
some additional packages (some driver components can be built from
source and reside in contrib).

[ Impact ]
Driver components of different major versions may not work well together
(untested combinations) or at least confuse users.

[ Tests ]
Would require nvidia hardware and driver usage.

[ Risks ]
Low. Upgrading the nvidia driver stack to new upstream releases in
stable has been done in the past.

[ Checklist ]
  [*] *all* changes are documented in the d/changelog
  [*] I reviewed all changes and I approve them
  [*] attach debdiff against the package in (old)stable
  [*] the issue is verified as fixed in unstable

[ Changes ]
+nvidia-persistenced (535.171.04-1~deb12u1) bookworm; urgency=medium
+
+  * Rebuild for bookworm.
+
+ -- Andreas Beckmann <a...@debian.org>  Tue, 26 Mar 2024 01:13:10 +0100
+
+nvidia-persistenced (535.171.04-1) unstable; urgency=medium
+
+  * New upstream release.
+
+ -- Andreas Beckmann <a...@debian.org>  Mon, 25 Mar 2024 10:51:19 +0100
+
+nvidia-persistenced (530.41.03-1) unstable; urgency=medium
+
+  * New upstream release.
+  * Switch B-D from pkg-config to pkgconf.
+
+ -- Andreas Beckmann <a...@debian.org>  Tue, 19 Mar 2024 17:59:21 +0100
+
+nvidia-persistenced (525.147.05-1) unstable; urgency=medium
+
+  * New upstream release.
+  * Update the list of supported drivers.
+
+ -- Andreas Beckmann <a...@debian.org>  Fri, 26 Jan 2024 23:34:41 +0100

- pkg-config was already a transitional package in bookworm.
- The transitional -tesla driver packages have been removed from
  dependency alternatives.

[ Other info ]
This is a rebuild of the package from sid with no further changes.

Andreas
diff --git a/debian/changelog b/debian/changelog
index 4a6ead7..4cd4301 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,29 @@
+nvidia-persistenced (535.171.04-1~deb12u1) bookworm; urgency=medium
+
+  * Rebuild for bookworm.
+
+ -- Andreas Beckmann <a...@debian.org>  Tue, 26 Mar 2024 01:13:10 +0100
+
+nvidia-persistenced (535.171.04-1) unstable; urgency=medium
+
+  * New upstream release.
+
+ -- Andreas Beckmann <a...@debian.org>  Mon, 25 Mar 2024 10:51:19 +0100
+
+nvidia-persistenced (530.41.03-1) unstable; urgency=medium
+
+  * New upstream release.
+  * Switch B-D from pkg-config to pkgconf.
+
+ -- Andreas Beckmann <a...@debian.org>  Tue, 19 Mar 2024 17:59:21 +0100
+
+nvidia-persistenced (525.147.05-1) unstable; urgency=medium
+
+  * New upstream release.
+  * Update the list of supported drivers.
+
+ -- Andreas Beckmann <a...@debian.org>  Fri, 26 Jan 2024 23:34:41 +0100
+
 nvidia-persistenced (525.85.05-1) unstable; urgency=medium
 
   * New upstream release.
diff --git a/debian/control b/debian/control
index 488080e..a55bf29 100644
--- a/debian/control
+++ b/debian/control
@@ -6,7 +6,7 @@ Uploaders:
  Andreas Beckmann <a...@debian.org>,
 Build-Depends:
  debhelper-compat (= 13),
- pkg-config,
+ pkgconf,
  libtirpc-dev,
  m4,
 Rules-Requires-Root: no
@@ -21,8 +21,7 @@ Multi-Arch: foreign
 Pre-Depends:
  ${misc:Pre-Depends}
 Depends:
- libnvidia-cfg1 [!i386 !armhf !ppc64el]
- | libnvidia-tesla-cfg1 [amd64 arm64 ppc64el]
+ libnvidia-cfg1 [!i386 !armhf]
  | libnvidia-tesla-470-cfg1 [amd64 arm64 ppc64el]
  | libnvidia-cfg.so.1
  | libnvidia-cfg1-any,
diff --git a/debian/copyright b/debian/copyright
index 929b9c2..61fef5c 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -9,12 +9,12 @@ Disclaimer:
  NVIDIA drivers in non-free.
 
 Files: *
-Copyright: Copyright (C) 2004-2022 NVIDIA Corporation
+Copyright: Copyright (C) 2004-2023 NVIDIA Corporation
 License: Expat
 
 Files: debian/*
 Copyright:
- © 2014-2023 Andreas Beckmann <a...@debian.org>
+ © 2014-2024 Andreas Beckmann <a...@debian.org>
 License: Expat
 
 License: Expat
diff --git a/debian/salsa-ci.yml b/debian/salsa-ci.yml
index 14fa000..c3d1fdf 100644
--- a/debian/salsa-ci.yml
+++ b/debian/salsa-ci.yml
@@ -1,7 +1,6 @@
 ---
 include:
-  - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml
-  - 
https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml
+  - 
https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/recipes/debian.yml
 
 variables:
   SALSA_CI_COMPONENTS: 'main contrib non-free'
diff --git a/nv-ioctl-numa.h b/nv-ioctl-numa.h
index 3fad820..1d456ec 100644
--- a/nv-ioctl-numa.h
+++ b/nv-ioctl-numa.h
@@ -62,6 +62,7 @@ typedef struct nv_ioctl_numa_info
     uint64_t memblock_size __aligned(8);
     uint64_t numa_mem_addr __aligned(8);
     uint64_t numa_mem_size __aligned(8);
+    uint8_t  use_auto_online;
     nv_offline_addresses_t offline_addresses __aligned(8);
 } nv_ioctl_numa_info_t;
 
diff --git a/nvidia-numa.c b/nvidia-numa.c
index afc8fe4..0fbd287 100644
--- a/nvidia-numa.c
+++ b/nvidia-numa.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -51,6 +51,7 @@
 #define MEMORY_PATH_FMT              "/sys/devices/system/memory"
 #define MEMORY_HARD_OFFLINE_PATH_FMT MEMORY_PATH_FMT "/hard_offline_page"
 #define MEMORY_PROBE_PATH_FMT        MEMORY_PATH_FMT "/probe"
+#define AUTO_ONLINE_PATH             MEMORY_PATH_FMT "/auto_online_blocks"
 #define MEMBLK_FILE_FMT              "memory%d"
 #define MEMBLK_DIR_PATH_FMT          MEMORY_PATH_FMT "/" MEMBLK_FILE_FMT
 #define MEMBLK_STATE_PATH_FMT        MEMBLK_DIR_PATH_FMT "/state"
@@ -59,6 +60,9 @@
 #define STATE_ONLINE                 "online"
 #define VALID_MOVABLE_STATE          "Movable"
 
+#define SYSFS_NVIDIA_DIR             "/sys/bus/pci/drivers/nvidia/"
+#define SYSFS_ID_PATH                SYSFS_NVIDIA_DIR "%s/%s"
+
 #ifndef NV_IS_ALIGNED
 #define NV_IS_ALIGNED(v, gran)       (0 == ((v) & ((gran) - 1)))
 #endif
@@ -708,6 +712,10 @@ NvPdStatus nvNumaOnlineMemory(NvNumaDevice *numa_info)
         goto driver_fail;
     }
 
+    /* handle case where auto online/offline should be used for NUMA memory */
+    if (numa_info_params.use_auto_online)
+        goto done;
+
     /* Check if numa status from RM is valid */
     switch (numa_info_params.status)
     {
@@ -834,6 +842,7 @@ set_driver_status:
     syslog(LOG_NOTICE, "NUMA: Memory onlining completed!\n");
 done:
     numa_info->fd = fd;
+    numa_info->use_auto_online = numa_info_params.use_auto_online;
     return NVPD_SUCCESS;
 
 online_failed:
@@ -864,6 +873,10 @@ NvPdStatus nvNumaOfflineMemory(NvNumaDevice *numa_info)
         return NVPD_ERR_NUMA_FAILURE;
     }
 
+    /* handle case where auto online/offline should be used for NUMA memory */
+    if (numa_info->use_auto_online)
+        goto done;
+
     status = offline_memory(fd);
     if (status < 0) {
         syslog_device(device_pci_info,
@@ -873,7 +886,70 @@ NvPdStatus nvNumaOfflineMemory(NvNumaDevice *numa_info)
         return NVPD_ERR_NUMA_FAILURE;
     }
 
+done:
     close(fd);
     numa_info->fd = -1;
+    numa_info->use_auto_online = 0;
+    return NVPD_SUCCESS;
+}
+
+static int
+read_int_from_file(char *devicename, char *id_file)
+{
+    FILE *fp;
+    char filename[PATH_MAX];
+    unsigned int id;
+
+    sprintf(filename, SYSFS_ID_PATH, devicename, id_file);
+
+    fp = fopen(filename, "r");
+    if (fp == NULL)
+        return -1;
+    if (fscanf(fp, "%x", &id) < 0)
+        return -1;
+    fclose(fp);
+
+    return id;
+}
+/*
+ * Handle setup for systems with GPUs that require Auto-online of NUMA memory
+ */
+NvPdStatus setup_numa_auto_online(void)
+{
+    DIR *nvidia;
+    struct dirent *device;
+    int vendor_id, device_id;
+    int status;
+
+    nvidia = opendir(SYSFS_NVIDIA_DIR);
+    if (nvidia == NULL) {
+        printf("Failed to open %s\n", SYSFS_NVIDIA_DIR);
+        syslog(LOG_ERR, "NUMA: Failed to open %s\n", SYSFS_NVIDIA_DIR);
+        return NVPD_ERR_DEVICE_NOT_FOUND;
+    }
+
+    // Scans devices owned by the NVIDIA driver...
+    while ((device = readdir(nvidia)) != NULL) {
+        if (device->d_type != DT_LNK)
+            continue;
+
+        vendor_id = read_int_from_file(device->d_name, "vendor");
+        if (vendor_id != 0x10de)
+            continue;
+
+        device_id = read_int_from_file(device->d_name, "device");
+
+        // Check for GH180, which requires auto-online
+        if (device_id >= 0x2340 && device_id <= 0x237f) {
+            syslog(LOG_INFO, "NUMA: Enabling NUMA memory Auto-Online due to 
GPU requirement\n");
+            status = write_string_to_file(AUTO_ONLINE_PATH, BRING_ONLINE_CMD, 
strlen(BRING_ONLINE_CMD));
+            if (status < 0) {
+                syslog(LOG_ERR, "NUMA: Failed to enable NUMA memory 
Auto-Online\n");
+                return NVPD_ERR_NUMA_FAILURE;
+            }
+            return NVPD_SUCCESS;
+        }
+    }
+
     return NVPD_SUCCESS;
 }
diff --git a/nvidia-numa.h b/nvidia-numa.h
index b9e9a32..53d58aa 100644
--- a/nvidia-numa.h
+++ b/nvidia-numa.h
@@ -31,10 +31,13 @@ typedef struct
 {
     int fd;
     NvCfgPciDevice *pci_info;
+    uint8_t use_auto_online;
 } NvNumaDevice;
 
 NvPdStatus nvNumaOnlineMemory(NvNumaDevice *numa_info);
 
 NvPdStatus nvNumaOfflineMemory(NvNumaDevice *numa_info);
 
+NvPdStatus setup_numa_auto_online(void);
+
 #endif
diff --git a/nvidia-persistenced.c b/nvidia-persistenced.c
index 9fccec8..f674aa9 100644
--- a/nvidia-persistenced.c
+++ b/nvidia-persistenced.c
@@ -518,6 +518,8 @@ static NvPdStatus setup_nvidia_cfg_api(const char 
*nvidia_cfg_path)
 {
     char *lib_path;
     int status = 0;
+    NvCfgBool success;
+    NvCfgPciDevice *nv_cfg_devices;
 
     if (nvidia_cfg_path != NULL) {
         lib_path = nvstrcat(nvidia_cfg_path, "/", NVIDIA_CFG_LIB, NULL);
@@ -550,6 +552,16 @@ static NvPdStatus setup_nvidia_cfg_api(const char 
*nvidia_cfg_path)
         return NVPD_ERR_DRIVER;
     }
 
+    /* Make a call to get_pci_devices for the side-effect of creating the 
device files */
+    success = nv_cfg_api.get_pci_devices(&num_devices, &nv_cfg_devices);
+    if (!success) {
+        syslog(LOG_ERR, "Failed to query NVIDIA devices. Please ensure that "
+                        "the NVIDIA device files (/dev/nvidia*) exist, and "
+                        "that user %u has read and write permissions for "
+                        "those files.", getuid());
+        return NVPD_ERR_DRIVER;
+    }
+
     return NVPD_SUCCESS;
 }
 
@@ -903,6 +915,11 @@ int main(int argc, char* argv[])
         goto shutdown;
     }
 
+    status = setup_numa_auto_online();
+    if (status != NVPD_SUCCESS) {
+        goto shutdown;
+    }
+
     status = setup_devices(options.persistence_mode);
     if (status != NVPD_SUCCESS) {
         goto shutdown;
diff --git a/version.mk b/version.mk
index 36f5738..89404cd 100644
--- a/version.mk
+++ b/version.mk
@@ -1,4 +1,4 @@
-NVIDIA_VERSION = 525.85.05
+NVIDIA_VERSION = 535.171.04
 
 # This file.
 VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))

Reply via email to