The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/3650
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === The previous code assumes that the nvidia card index and the dri card index are identical, i.e. for a given pair {/dev/card<card-idx>, /dev/nvidia<nvidia-idx>} it was assumed that <card-idx> == <nvidia-idx> but it is definitely possible that <card-idx> != <nvidia-idx>. Also, let's report an error when we don't find the gpu device that the user requested. Closes #3642. Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
From f6595d97c95e0eaeaf0e390d40da378ce9f1f539 Mon Sep 17 00:00:00 2001 From: Christian Brauner <christian.brau...@ubuntu.com> Date: Thu, 10 Aug 2017 20:17:16 +0200 Subject: [PATCH] gpu: fix gpu attach The previous code assumes that the nvidia card index and the dri card index are identical, i.e. for a given pair {/dev/card<card-idx>, /dev/nvidia<nvidia-idx>} it was assumed that <card-idx> == <nvidia-idx> but it is definitely possible that <card-idx> != <nvidia-idx>. Also, let's report an error when we don't find the gpu device that the user requested. Closes #3642. Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com> --- lxd/container_lxc.go | 18 ++++++++++++++++++ lxd/devices.go | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go index 62356543d..8d26217f6 100644 --- a/lxd/container_lxc.go +++ b/lxd/container_lxc.go @@ -1812,6 +1812,7 @@ func (c *containerLXC) startCommon() (string, error) { } sawNvidia := false + found := false for _, gpu := range gpus { if (m["vendorid"] != "" && gpu.vendorid != m["vendorid"]) || (m["pci"] != "" && gpu.pci != m["pci"]) || @@ -1820,6 +1821,8 @@ func (c *containerLXC) startCommon() (string, error) { continue } + found = true + err := c.setupUnixDevice(k, m, gpu.major, gpu.minor, gpu.path, true) if err != nil { return "", err @@ -1845,6 +1848,12 @@ func (c *containerLXC) startCommon() (string, error) { } } } + + if !found { + msg := "Failed to detect requested GPU device" + logger.Error(msg) + return "", fmt.Errorf(msg) + } } else if m["type"] == "disk" { if m["path"] != "/" { diskDevices[k] = m @@ -3859,6 +3868,7 @@ func (c *containerLXC) Update(args containerArgs, userRequested bool) error { } sawNvidia := false + found := false for _, gpu := range gpus { if (m["vendorid"] != "" && gpu.vendorid != m["vendorid"]) || (m["pci"] != "" && gpu.pci != m["pci"]) || @@ -3867,6 +3877,8 @@ func (c *containerLXC) Update(args containerArgs, userRequested bool) error { continue } + found = true + err = c.insertUnixDeviceNum(m, gpu.major, gpu.minor, gpu.path) if err != nil { logger.Error("Failed to insert GPU device.", log.Ctx{"err": err, "gpu": gpu, "container": c.Name()}) @@ -3898,6 +3910,12 @@ func (c *containerLXC) Update(args containerArgs, userRequested bool) error { } } } + + if !found { + msg := "Failed to detect requested GPU device" + logger.Error(msg) + return fmt.Errorf(msg) + } } } diff --git a/lxd/devices.go b/lxd/devices.go index 771094b5e..2fa749a1f 100644 --- a/lxd/devices.go +++ b/lxd/devices.go @@ -205,11 +205,39 @@ func deviceLoadGpu() ([]gpuDevice, []nvidiaGpuDevices, error) { if !isNvidia { isNvidia = true } - nvidiaPath := "/dev/nvidia" + strconv.Itoa(tmpGpu.minor) + + nvidiaPath := fmt.Sprintf("/proc/driver/nvidia/gpus/%s/information", tmpGpu.pci) + buf, err := ioutil.ReadFile(nvidiaPath) + if err != nil { + return nil, nil, err + } + strBuf := strings.TrimSpace(string(buf)) + idx := strings.Index(strBuf, "Device Minor:") + idx += len("Device Minor:") + strBuf = strBuf[idx:] + strBuf = strings.TrimSpace(strBuf) + idx = strings.Index(strBuf, " ") + if idx == -1 { + idx = strings.Index(strBuf, "\t") + } + if idx >= 1 { + strBuf = strBuf[:idx] + } + + if strBuf == "" { + return nil, nil, fmt.Errorf("No device minor index detected") + } + + _, err = strconv.Atoi(strBuf) + if err != nil { + return nil, nil, err + } + + nvidiaPath = "/dev/nvidia" + strBuf stat := syscall.Stat_t{} - err := syscall.Stat(nvidiaPath, &stat) + err = syscall.Stat(nvidiaPath, &stat) if err != nil { - continue + return nil, nil, err } tmpGpu.nvidia.path = nvidiaPath tmpGpu.nvidia.major = int(stat.Rdev / 256)
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel