Re: [RFC for-5.1 1/4] spapr: Refactor locating NVLink2 devices for device tree creation

2020-03-26 Thread David Gibson
On Thu, Mar 26, 2020 at 12:57:38PM +0100, Greg Kurz wrote:
65;5803;1c> On Thu, 26 Mar 2020 16:40:06 +1100
> David Gibson  wrote:
> 
> > Currently spapr_phb_nvgpu_populate_pcidev_dt() works a little cryptically.
> > It steps through all the NVLink2 GPUs and NPUs and if they match the device
> > we're called for, we generate the relevant device tree information.
> > 
> > Make this a little more obvious by introducing helpers to determine it a
> 
> ... to determine if a

Fixed, thanks.

> 
> > given PCI device is an NVLink2 GPU or NPU, returning the NVLink2 slot and
> > link number information as well.
> > 
> > Signed-off-by: David Gibson 
> > ---
> 
> LGTM
> 
> Reviewed-by: Greg Kurz 
> 
> >  hw/ppc/spapr_pci_nvlink2.c | 115 +
> >  1 file changed, 79 insertions(+), 36 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
> > index 8332d5694e..7d3a685421 100644
> > --- a/hw/ppc/spapr_pci_nvlink2.c
> > +++ b/hw/ppc/spapr_pci_nvlink2.c
> > @@ -390,13 +390,12 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState 
> > *sphb, void *fdt)
> >  
> >  }
> >  
> > -void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int 
> > offset,
> > -SpaprPhbState *sphb)
> > +static bool is_nvgpu(PCIDevice *dev, SpaprPhbState *sphb, int *slot)
> >  {
> > -int i, j;
> > +int i;
> >  
> >  if (!sphb->nvgpus) {
> > -return;
> > +return false;
> >  }
> >  
> >  for (i = 0; i < sphb->nvgpus->num; ++i) {
> > @@ -406,47 +405,91 @@ void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice 
> > *dev, void *fdt, int offset,
> >  if (!nvslot->gpdev) {
> >  continue;
> >  }
> > +
> >  if (dev == nvslot->gpdev) {
> > -uint32_t npus[nvslot->linknum];
> > +if (slot) {
> > +*slot = i;
> > +}
> > +return true;
> > +}
> > +}
> >  
> > -for (j = 0; j < nvslot->linknum; ++j) {
> > -PCIDevice *npdev = nvslot->links[j].npdev;
> > +return false;
> > +}
> >  
> > -npus[j] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
> > -}
> > -_FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
> > - j * sizeof(npus[0])));
> > -_FDT((fdt_setprop_cell(fdt, offset, "phandle",
> > -   PHANDLE_PCIDEV(sphb, dev;
> > +static bool is_nvnpu(PCIDevice *dev, SpaprPhbState *sphb, int *slot, int 
> > *link)
> > +{
> > +int i, j;
> > +
> > +if (!sphb->nvgpus) {
> > +return false;
> > +}
> > +
> > +for (i = 0; i < sphb->nvgpus->num; ++i) {
> > +SpaprPhbPciNvGpuSlot *nvslot = >nvgpus->slots[i];
> > +
> > +/* Skip "slot" without attached GPU */
> > +if (!nvslot->gpdev) {
> >  continue;
> >  }
> >  
> >  for (j = 0; j < nvslot->linknum; ++j) {
> > -if (dev != nvslot->links[j].npdev) {
> > -continue;
> > +if (dev == nvslot->links[j].npdev) {
> > +if (slot) {
> > +*slot = i;
> > +}
> > +if (link) {
> > +*link = j;
> > +}
> > +return true;
> >  }
> > +}
> > +}
> >  
> > -_FDT((fdt_setprop_cell(fdt, offset, "phandle",
> > -   PHANDLE_PCIDEV(sphb, dev;
> > -_FDT(fdt_setprop_cell(fdt, offset, "ibm,gpu",
> > -  PHANDLE_PCIDEV(sphb, nvslot->gpdev)));
> > -_FDT((fdt_setprop_cell(fdt, offset, "ibm,nvlink",
> > -   PHANDLE_NVLINK(sphb, i, j;
> > -/*
> > - * If we ever want to emulate GPU RAM at the same location as 
> > on
> > - * the host - here is the encoding GPA->TGT:
> > - *
> > - * gta  = ((sphb->nv2_gpa >> 42) & 0x1) << 42;
> > - * gta |= ((sphb->nv2_gpa >> 45) & 0x3) << 43;
> > - * gta |= ((sphb->nv2_gpa >> 49) & 0x3) << 45;
> > - * gta |= sphb->nv2_gpa & ((1UL << 43) - 1);
> > - */
> > -_FDT(fdt_setprop_cell(fdt, offset, "memory-region",
> > -  PHANDLE_GPURAM(sphb, i)));
> > -_FDT(fdt_setprop_u64(fdt, offset, "ibm,device-tgt-addr",
> > - nvslot->tgt));
> > -_FDT(fdt_setprop_cell(fdt, offset, "ibm,nvlink-speed",
> > -  nvslot->links[j].link_speed));
> > +return false;
> > +}
> > +
> > +void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int 
> > offset,
> > +SpaprPhbState *sphb)
> > +{
> > +int slot, link;
> > +
> > +if (is_nvgpu(dev, sphb, )) {
> > +SpaprPhbPciNvGpuSlot *nvslot = 

Re: [RFC for-5.1 1/4] spapr: Refactor locating NVLink2 devices for device tree creation

2020-03-26 Thread Greg Kurz
On Thu, 26 Mar 2020 16:40:06 +1100
David Gibson  wrote:

> Currently spapr_phb_nvgpu_populate_pcidev_dt() works a little cryptically.
> It steps through all the NVLink2 GPUs and NPUs and if they match the device
> we're called for, we generate the relevant device tree information.
> 
> Make this a little more obvious by introducing helpers to determine it a

... to determine if a

> given PCI device is an NVLink2 GPU or NPU, returning the NVLink2 slot and
> link number information as well.
> 
> Signed-off-by: David Gibson 
> ---

LGTM

Reviewed-by: Greg Kurz 

>  hw/ppc/spapr_pci_nvlink2.c | 115 +
>  1 file changed, 79 insertions(+), 36 deletions(-)
> 
> diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
> index 8332d5694e..7d3a685421 100644
> --- a/hw/ppc/spapr_pci_nvlink2.c
> +++ b/hw/ppc/spapr_pci_nvlink2.c
> @@ -390,13 +390,12 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState 
> *sphb, void *fdt)
>  
>  }
>  
> -void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int 
> offset,
> -SpaprPhbState *sphb)
> +static bool is_nvgpu(PCIDevice *dev, SpaprPhbState *sphb, int *slot)
>  {
> -int i, j;
> +int i;
>  
>  if (!sphb->nvgpus) {
> -return;
> +return false;
>  }
>  
>  for (i = 0; i < sphb->nvgpus->num; ++i) {
> @@ -406,47 +405,91 @@ void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, 
> void *fdt, int offset,
>  if (!nvslot->gpdev) {
>  continue;
>  }
> +
>  if (dev == nvslot->gpdev) {
> -uint32_t npus[nvslot->linknum];
> +if (slot) {
> +*slot = i;
> +}
> +return true;
> +}
> +}
>  
> -for (j = 0; j < nvslot->linknum; ++j) {
> -PCIDevice *npdev = nvslot->links[j].npdev;
> +return false;
> +}
>  
> -npus[j] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
> -}
> -_FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
> - j * sizeof(npus[0])));
> -_FDT((fdt_setprop_cell(fdt, offset, "phandle",
> -   PHANDLE_PCIDEV(sphb, dev;
> +static bool is_nvnpu(PCIDevice *dev, SpaprPhbState *sphb, int *slot, int 
> *link)
> +{
> +int i, j;
> +
> +if (!sphb->nvgpus) {
> +return false;
> +}
> +
> +for (i = 0; i < sphb->nvgpus->num; ++i) {
> +SpaprPhbPciNvGpuSlot *nvslot = >nvgpus->slots[i];
> +
> +/* Skip "slot" without attached GPU */
> +if (!nvslot->gpdev) {
>  continue;
>  }
>  
>  for (j = 0; j < nvslot->linknum; ++j) {
> -if (dev != nvslot->links[j].npdev) {
> -continue;
> +if (dev == nvslot->links[j].npdev) {
> +if (slot) {
> +*slot = i;
> +}
> +if (link) {
> +*link = j;
> +}
> +return true;
>  }
> +}
> +}
>  
> -_FDT((fdt_setprop_cell(fdt, offset, "phandle",
> -   PHANDLE_PCIDEV(sphb, dev;
> -_FDT(fdt_setprop_cell(fdt, offset, "ibm,gpu",
> -  PHANDLE_PCIDEV(sphb, nvslot->gpdev)));
> -_FDT((fdt_setprop_cell(fdt, offset, "ibm,nvlink",
> -   PHANDLE_NVLINK(sphb, i, j;
> -/*
> - * If we ever want to emulate GPU RAM at the same location as on
> - * the host - here is the encoding GPA->TGT:
> - *
> - * gta  = ((sphb->nv2_gpa >> 42) & 0x1) << 42;
> - * gta |= ((sphb->nv2_gpa >> 45) & 0x3) << 43;
> - * gta |= ((sphb->nv2_gpa >> 49) & 0x3) << 45;
> - * gta |= sphb->nv2_gpa & ((1UL << 43) - 1);
> - */
> -_FDT(fdt_setprop_cell(fdt, offset, "memory-region",
> -  PHANDLE_GPURAM(sphb, i)));
> -_FDT(fdt_setprop_u64(fdt, offset, "ibm,device-tgt-addr",
> - nvslot->tgt));
> -_FDT(fdt_setprop_cell(fdt, offset, "ibm,nvlink-speed",
> -  nvslot->links[j].link_speed));
> +return false;
> +}
> +
> +void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int 
> offset,
> +SpaprPhbState *sphb)
> +{
> +int slot, link;
> +
> +if (is_nvgpu(dev, sphb, )) {
> +SpaprPhbPciNvGpuSlot *nvslot = >nvgpus->slots[slot];
> +uint32_t npus[nvslot->linknum];
> +
> +for (link = 0; link < nvslot->linknum; ++link) {
> +PCIDevice *npdev = nvslot->links[link].npdev;
> +
> +npus[link] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
>  }
> +_FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
> + link * 

[RFC for-5.1 1/4] spapr: Refactor locating NVLink2 devices for device tree creation

2020-03-25 Thread David Gibson
Currently spapr_phb_nvgpu_populate_pcidev_dt() works a little cryptically.
It steps through all the NVLink2 GPUs and NPUs and if they match the device
we're called for, we generate the relevant device tree information.

Make this a little more obvious by introducing helpers to determine it a
given PCI device is an NVLink2 GPU or NPU, returning the NVLink2 slot and
link number information as well.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr_pci_nvlink2.c | 115 +
 1 file changed, 79 insertions(+), 36 deletions(-)

diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
index 8332d5694e..7d3a685421 100644
--- a/hw/ppc/spapr_pci_nvlink2.c
+++ b/hw/ppc/spapr_pci_nvlink2.c
@@ -390,13 +390,12 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, 
void *fdt)
 
 }
 
-void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
-SpaprPhbState *sphb)
+static bool is_nvgpu(PCIDevice *dev, SpaprPhbState *sphb, int *slot)
 {
-int i, j;
+int i;
 
 if (!sphb->nvgpus) {
-return;
+return false;
 }
 
 for (i = 0; i < sphb->nvgpus->num; ++i) {
@@ -406,47 +405,91 @@ void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, 
void *fdt, int offset,
 if (!nvslot->gpdev) {
 continue;
 }
+
 if (dev == nvslot->gpdev) {
-uint32_t npus[nvslot->linknum];
+if (slot) {
+*slot = i;
+}
+return true;
+}
+}
 
-for (j = 0; j < nvslot->linknum; ++j) {
-PCIDevice *npdev = nvslot->links[j].npdev;
+return false;
+}
 
-npus[j] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
-}
-_FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
- j * sizeof(npus[0])));
-_FDT((fdt_setprop_cell(fdt, offset, "phandle",
-   PHANDLE_PCIDEV(sphb, dev;
+static bool is_nvnpu(PCIDevice *dev, SpaprPhbState *sphb, int *slot, int *link)
+{
+int i, j;
+
+if (!sphb->nvgpus) {
+return false;
+}
+
+for (i = 0; i < sphb->nvgpus->num; ++i) {
+SpaprPhbPciNvGpuSlot *nvslot = >nvgpus->slots[i];
+
+/* Skip "slot" without attached GPU */
+if (!nvslot->gpdev) {
 continue;
 }
 
 for (j = 0; j < nvslot->linknum; ++j) {
-if (dev != nvslot->links[j].npdev) {
-continue;
+if (dev == nvslot->links[j].npdev) {
+if (slot) {
+*slot = i;
+}
+if (link) {
+*link = j;
+}
+return true;
 }
+}
+}
 
-_FDT((fdt_setprop_cell(fdt, offset, "phandle",
-   PHANDLE_PCIDEV(sphb, dev;
-_FDT(fdt_setprop_cell(fdt, offset, "ibm,gpu",
-  PHANDLE_PCIDEV(sphb, nvslot->gpdev)));
-_FDT((fdt_setprop_cell(fdt, offset, "ibm,nvlink",
-   PHANDLE_NVLINK(sphb, i, j;
-/*
- * If we ever want to emulate GPU RAM at the same location as on
- * the host - here is the encoding GPA->TGT:
- *
- * gta  = ((sphb->nv2_gpa >> 42) & 0x1) << 42;
- * gta |= ((sphb->nv2_gpa >> 45) & 0x3) << 43;
- * gta |= ((sphb->nv2_gpa >> 49) & 0x3) << 45;
- * gta |= sphb->nv2_gpa & ((1UL << 43) - 1);
- */
-_FDT(fdt_setprop_cell(fdt, offset, "memory-region",
-  PHANDLE_GPURAM(sphb, i)));
-_FDT(fdt_setprop_u64(fdt, offset, "ibm,device-tgt-addr",
- nvslot->tgt));
-_FDT(fdt_setprop_cell(fdt, offset, "ibm,nvlink-speed",
-  nvslot->links[j].link_speed));
+return false;
+}
+
+void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
+SpaprPhbState *sphb)
+{
+int slot, link;
+
+if (is_nvgpu(dev, sphb, )) {
+SpaprPhbPciNvGpuSlot *nvslot = >nvgpus->slots[slot];
+uint32_t npus[nvslot->linknum];
+
+for (link = 0; link < nvslot->linknum; ++link) {
+PCIDevice *npdev = nvslot->links[link].npdev;
+
+npus[link] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
 }
+_FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
+ link * sizeof(npus[0])));
+_FDT((fdt_setprop_cell(fdt, offset, "phandle",
+   PHANDLE_PCIDEV(sphb, dev;
+} else if (is_nvnpu(dev, sphb, , )) {
+SpaprPhbPciNvGpuSlot *nvslot = >nvgpus->slots[slot];
+
+_FDT((fdt_setprop_cell(fdt, offset, "phandle",
+   PHANDLE_PCIDEV(sphb, dev;
+