Le 03/08/2020 à 09:35, Oliver O'Halloran a écrit :
On Thu, Apr 30, 2020 at 11:15 PM Max Gurtovoy <m...@mellanox.com> wrote:
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 57d3a6a..9ecc576 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3706,18 +3706,208 @@ static void pnv_pci_ioda_dma_bus_setup(struct pci_bus 
*bus)
         }
  }

+#ifdef CONFIG_PCI_P2PDMA
+static DEFINE_MUTEX(p2p_mutex);
+
+static bool pnv_pci_controller_owns_addr(struct pci_controller *hose,
+                                        phys_addr_t addr, size_t size)
+{
+       int i;
+
+       /*
+        * It seems safe to assume the full range is under the same PHB, so we
+        * can ignore the size.
+        */
+       for (i = 0; i < ARRAY_SIZE(hose->mem_resources); i++) {
+               struct resource *res = &hose->mem_resources[i];
+
+               if (res->flags && addr >= res->start && addr < res->end)
+                       return true;
+       }
+       return false;
+}
+
+/*
+ * find the phb owning a mmio address if not owned locally
+ */
+static struct pnv_phb *pnv_pci_find_owning_phb(struct pci_dev *pdev,
+                                              phys_addr_t addr, size_t size)
+{
+       struct pci_controller *hose;
+
+       /* fast path */
+       if (pnv_pci_controller_owns_addr(pdev->bus->sysdata, addr, size))
+               return NULL;

Do we actually need this fast path? It's going to be slow either way.
Also if a device is doing p2p to another device under the same PHB
then it should not be happening via the root complex. Is this a case
you've tested?


The "fast path" comment is misleading and we should rephrase. The point is to catch if we're mapping a resource under the same PHB, in which case we don't modify the PHB configuration. So we need to catch it early, but it's not a fast path. If the 2 devices are under the same PHB, the code above shouldn't do anything. So I guess behavior depends on the underlying bridge? We'll need another platform than witherspoon to test it. Probably worth checking.


+       list_for_each_entry(hose, &hose_list, list_node) {
+               struct pnv_phb *phb = hose->private_data;
+
+               if (phb->type != PNV_PHB_NPU_NVLINK &&
+                   phb->type != PNV_PHB_NPU_OCAPI) {
+                       if (pnv_pci_controller_owns_addr(hose, addr, size))
+                               return phb;
+               }
+       }
+       return NULL;
+}
+
+static u64 pnv_pci_dma_dir_to_opal_p2p(enum dma_data_direction dir)
+{
+       if (dir == DMA_TO_DEVICE)
+               return OPAL_PCI_P2P_STORE;
+       else if (dir == DMA_FROM_DEVICE)
+               return OPAL_PCI_P2P_LOAD;
+       else if (dir == DMA_BIDIRECTIONAL)
+               return OPAL_PCI_P2P_LOAD | OPAL_PCI_P2P_STORE;
+       else
+               return 0;
+}
+
+static int pnv_pci_ioda_enable_p2p(struct pci_dev *initiator,
+                                  struct pnv_phb *phb_target,
+                                  enum dma_data_direction dir)
+{
+       struct pci_controller *hose;
+       struct pnv_phb *phb_init;
+       struct pnv_ioda_pe *pe_init;
+       u64 desc;
+       int rc;
+
+       if (!opal_check_token(OPAL_PCI_SET_P2P))
+               return -ENXIO;
+

+       hose = pci_bus_to_host(initiator->bus);
+       phb_init = hose->private_data;

You can use the pci_bus_to_pnvhb() helper

+
+       pe_init = pnv_ioda_get_pe(initiator);
+       if (!pe_init)
+               return -ENODEV;
+
+       if (!pe_init->tce_bypass_enabled)
+               return -EINVAL;
+
+       /*
+        * Configuring the initiator's PHB requires to adjust its TVE#1
+        * setting. Since the same device can be an initiator several times for
+        * different target devices, we need to keep a reference count to know
+        * when we can restore the default bypass setting on its TVE#1 when
+        * disabling. Opal is not tracking PE states, so we add a reference
+        * count on the PE in linux.
+        *
+        * For the target, the configuration is per PHB, so we keep a
+        * target reference count on the PHB.
+        */

This irks me a bit because configuring the DMA address limits for the
TVE is the kernel's job. What we really should be doing is using
opal_pci_map_pe_dma_window_real() to set the bypass-mode address limit
for the TVE to something large enough to hit the MMIO ranges rather
than having set_p2p do it as a side effect. Unfortunately, for some
reason skiboot doesn't implement support for enabling 56bit addressing
using opal_pci_map_pe_dma_window_real() and we do need to support
older kernel's which used this stuff so I guess we're stuck with it
for now. It'd be nice if we could fix this in the longer term
though...


OK. We'd need more than a 56-bit opal_pci_map_pe_dma_window_real() though, there's also a queue setting change on the target PHB.

  Fred


+       mutex_lock(&p2p_mutex);
+
+       desc = OPAL_PCI_P2P_ENABLE | pnv_pci_dma_dir_to_opal_p2p(dir);
+       /* always go to opal to validate the configuration */
+       rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id, desc,
+                             pe_init->pe_number);
+       if (rc != OPAL_SUCCESS) {
+               rc = -EIO;
+               goto out;
+       }
+
+       pe_init->p2p_initiator_count++;
+       phb_target->p2p_target_count++;
+
+       rc = 0;
+out:
+       mutex_unlock(&p2p_mutex);
+       return rc;
+}
+
+static int pnv_pci_dma_map_resource(struct pci_dev *pdev,
+                                   phys_addr_t phys_addr, size_t size,
+                                   enum dma_data_direction dir)
+{
+       struct pnv_phb *target_phb;
+
+       target_phb = pnv_pci_find_owning_phb(pdev, phys_addr, size);
+       if (!target_phb)
+               return 0;
+
+       return pnv_pci_ioda_enable_p2p(pdev, target_phb, dir);
+}
+
+static int pnv_pci_ioda_disable_p2p(struct pci_dev *initiator,
+               struct pnv_phb *phb_target)
+{
+       struct pci_controller *hose;
+       struct pnv_phb *phb_init;
+       struct pnv_ioda_pe *pe_init;
+       int rc;
+
+       if (!opal_check_token(OPAL_PCI_SET_P2P))
+               return -ENXIO;

This should probably have a WARN_ON() since we can't hit this path
unless the initial map succeeds.

+       hose = pci_bus_to_host(initiator->bus);
+       phb_init = hose->private_data;

pci_bus_to_pnvhb()

+       pe_init = pnv_ioda_get_pe(initiator);
+       if (!pe_init)
+               return -ENODEV;
+
+       mutex_lock(&p2p_mutex);
+
+       if (!pe_init->p2p_initiator_count || !phb_target->p2p_target_count) {
+               rc = -EINVAL;
+               goto out;
+       }
+
+       if (--pe_init->p2p_initiator_count == 0)
+               pnv_pci_ioda2_set_bypass(pe_init, true);
+
+       if (--phb_target->p2p_target_count == 0) {
+               rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id,
+                                     0, pe_init->pe_number);
+               if (rc != OPAL_SUCCESS) {
+                       rc = -EIO;
+                       goto out;
+               }
+       }
+
+       rc = 0;
+out:
+       mutex_unlock(&p2p_mutex);
+       return rc;
+}
+
+static void pnv_pci_dma_unmap_resource(struct pci_dev *pdev,
+                                      dma_addr_t addr, size_t size,
+                                      enum dma_data_direction dir)
+{
+       struct pnv_phb *target_phb;
+       int rc;
+
+       target_phb = pnv_pci_find_owning_phb(pdev, addr, size);
+       if (!target_phb)
+               return;
+
+       rc = pnv_pci_ioda_disable_p2p(pdev, target_phb);
+       if (rc)
+               dev_err(&pdev->dev, "Failed to undo PCI peer-to-peer setup for 
address %llx: %d\n",
+                       addr, rc);

Use pci_err() or pe_err().

Reply via email to