On Fri, Oct 30, 2015 at 07:05:05PM +1100, Alexey Kardashevskiy wrote:
>On 10/30/2015 06:18 PM, Wei Yang wrote:
>>On Fri, Oct 30, 2015 at 03:11:20PM +1100, Alexey Kardashevskiy wrote:
>>>On 10/26/2015 02:15 PM, Wei Yang wrote:
>>>>PEs for VFs don't have primary bus. So they have to have their own reset
>>>>backend, which is used during EEH recovery. The patch implements the reset
>>>>backend for VF's PE by issuing FLR or AF FLR to the VFs, which are contained
>>>>in the PE.
>>>>
>>>>[gwshan: changelog and code refactoring]
>>>>Signed-off-by: Wei Yang <weiy...@linux.vnet.ibm.com>
>>>>Acked-by: Gavin Shan <gws...@linux.vnet.ibm.com>
>>>>---
>>>>  arch/powerpc/include/asm/eeh.h               |   1 +
>>>>  arch/powerpc/platforms/powernv/eeh-powernv.c | 134 
>>>> ++++++++++++++++++++++++++-
>>>>  2 files changed, 134 insertions(+), 1 deletion(-)
>>>>
>>>>diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
>>>>index ec21f8f..331c856 100644
>>>>--- a/arch/powerpc/include/asm/eeh.h
>>>>+++ b/arch/powerpc/include/asm/eeh.h
>>>>@@ -136,6 +136,7 @@ struct eeh_dev {
>>>>    int pcix_cap;                   /* Saved PCIx capability        */
>>>>    int pcie_cap;                   /* Saved PCIe capability        */
>>>>    int aer_cap;                    /* Saved AER capability         */
>>>>+   int af_cap;                     /* Saved AF capability          */
>>>>    struct eeh_pe *pe;              /* Associated PE                */
>>>>    struct list_head list;          /* Form link list in the PE     */
>>>>    struct pci_controller *phb;     /* Associated PHB               */
>>>>diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
>>>>b/arch/powerpc/platforms/powernv/eeh-powernv.c
>>>>index cfd55dd..017cd72 100644
>>>>--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
>>>>+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
>>>>@@ -404,6 +404,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void 
>>>>*data)
>>>>    edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
>>>>    edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
>>>>    edev->aer_cap  = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
>>>>+   edev->af_cap   = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF);
>>>>    if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
>>>>            edev->mode |= EEH_DEV_BRIDGE;
>>>>            if (edev->pcie_cap) {
>>>>@@ -893,6 +894,127 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, 
>>>>int option)
>>>>    return 0;
>>>>  }
>>>>
>>>>+static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, int pos,
>>>>+                                u16 mask, bool af_flr_rst)
>
>Missed this - @af_flr_rst is only used for warnings so better do:
>s/bool af_flr_rst/const char *reset_type/
>to make it explicit.
>

Looks good, will change in next version.

>
>>>>+{
>>>>+   struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
>>>>+   int status, i;
>>>>+
>>>>+   /* Wait for Transaction Pending bit to be cleared */
>>>>+   for (i = 0; i < 4; i++) {
>>>>+           eeh_ops->read_config(pdn, pos, 2, &status);
>>>
>>>
>>>gcc should have complained on using uninitialized @status here.
>>>
>>
>>I remove the obj file and re-compile the file, not the warning.
>
>Hm. Does not warn me either.
>
>>And took a look at other places where read_config() is called. The laster
>>parameter is not initialized before called.
>
>So? It does not make it right.
>
>>You see the error during build?
>
>Why does it matter? We have an undefined behavior here which we should not.
>You could test the return values from read_config() but you do not so at
>least initialize local variables.
>

I believe your concern is reasonable.

I suggest to have a separate patch to fix the read_config() by initialize the
last parameter.

>
>>
>>>
>>>>+           if (!(status & mask))
>>>>+                   return;
>>>>+
>>>>+           msleep((1 << i) * 100);
>>>>+   }
>>>>+
>>>>+   pr_warn("%s: Pending transaction while issuing %s FLR to "
>>>>+           "%04x:%02x:%02x.%01x\n",
>>>
>>>Do not wrap user-visible strings.
>>>
>>
>>Will change this.
>>
>>>
>>>>+           __func__, af_flr_rst ? "AF" : "",
>>>>+           edev->phb->global_number, pdn->busno,
>>>>+           PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
>>>>+}
>>>>+
>>>>+static int pnv_eeh_do_flr(struct pci_dn *pdn, int option)
>>>>+{
>>>>+   struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
>>>>+   u32 reg;
>>>>+
>>>>+   if (!edev->pcie_cap)
>>>>+           return -ENOTTY;
>>>
>>>
>>>Can pnv_eeh_do_flr() be really called on a non PCIe device, can we get that
>>>far? WARN_ON_ONCE() may be?
>>>
>>
>>So you suggest to add a WARN_ON_ONCE() in this condition, right?
>
>I am asking a question here whether it makes sense or not to add a
>WARN_ON_ONCE or replace "if" with WARN_ON_ONCE or not having pcie_cap
>initialized is possible in this code - which one is it?
>

I think the check here is reasonable. In the body of this function, pcie_cap
is used to access the config space. If we remove this, it would be a chance to
access a not correct area.

>
>>
>>>
>>>>+
>>>>+   eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, &reg);
>>>
>>>
>>>... and here about uninitialized @reg.
>>>
>>>
>>>>+   if (!(reg & PCI_EXP_DEVCAP_FLR))
>>>>+           return -ENOTTY;
>>>>+
>>>>+   switch (option) {
>>>>+   case EEH_RESET_HOT:
>>>>+   case EEH_RESET_FUNDAMENTAL:
>>>>+           pnv_eeh_wait_for_pending(pdn, edev->pcie_cap + PCI_EXP_DEVSTA,
>>>>+                                    PCI_EXP_DEVSTA_TRPND, false);
>>>>+           eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
>>>>+                                4, &reg);
>>>>+           reg |= PCI_EXP_DEVCTL_BCR_FLR;
>>>>+           eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
>>>>+                                 4, reg);
>>>>+           msleep(EEH_PE_RST_HOLD_TIME);
>>>>+           break;
>>>>+   case EEH_RESET_DEACTIVATE:
>>>>+           eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
>>>>+                                4, &reg);
>>>>+           reg &= ~PCI_EXP_DEVCTL_BCR_FLR;
>>>>+           eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
>>>>+                                 4, reg);
>>>>+           msleep(EEH_PE_RST_SETTLE_TIME);
>>>>+           break;
>>>>+   }
>>>>+
>>>>+   return 0;
>>>>+}
>>>>+
>>>>+static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option)
>>>>+{
>>>>+   struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
>>>>+   u32 cap;
>>>>+
>>>>+   if (!edev->af_cap)
>>>>+           return -ENOTTY;
>>>>+
>>>>+   eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap);
>>>
>>>
>>>... and here about @cap.
>>>
>>>>+   if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR))
>>>>+           return -ENOTTY;
>>>>+
>>>>+   switch (option) {
>>>>+   case EEH_RESET_HOT:
>>>>+   case EEH_RESET_FUNDAMENTAL:
>>>>+           /*
>>>>+            * Wait for Transaction Pending bit to clear. A word-aligned
>>>>+            * test is used, so we use the conrol offset rather than status
>>>>+            * and shift the test bit to match.
>>>
>>>
>>>Why word-aligned (not byte or double word)?
>>>
>>
>>I copied this words from pci_af_flr(). Actually, I don't tried to understand
>>this reason.
>
>Ok. I looked at pci_af_flr().
>
>In this patch, the comment before pnv_eeh_wait_for_pending() is missing,
>something like "pnv_eeh_wait_for_pending() uses a word-size accessor so @pos
>must be work-aligned".
>
>
>>
>>>>+            */
>>>>+           pnv_eeh_wait_for_pending(pdn, edev->af_cap + PCI_AF_CTRL,
>>>>+                                    PCI_AF_STATUS_TP << 8, true);
>>>>+           eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL,
>>>>+                                 1, PCI_AF_CTRL_FLR);
>>>>+           msleep(EEH_PE_RST_HOLD_TIME);
>>>>+           break;
>>>>+   case EEH_RESET_DEACTIVATE:
>>>>+           eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0);
>>>>+           msleep(EEH_PE_RST_SETTLE_TIME);
>>>
>>>
>>>btw there is an unrelated issue with EEH_PE_RST_SETTLE_TIME which is defined
>>>as 1800 which is A LOT (+250ms from EEH_PE_RST_HOLD_TIME and for some reason
>>>this is actually doubled so there is another reset somewhere).
>>>
>>
>>I don't know the reason for this value. This code keeps aligned with other
>>reset functions, like pnv_eeh_bridge_reset().
>
>
>Are they all in POWERNV/EEH or generic PCI uses same values on, for example,
>x86?
>
>
>
>>>Booting a guest with 63 VFs takes 6 minutes or so, is there a good reason for
>>>such a huge timeout?
>>>
>>>
>>>>+           break;
>>>>+   }
>>>>+
>>>>+   return 0;
>>>>+}
>>>>+
>>>>+static int pnv_eeh_reset_vf(struct pci_dn *pdn, int option)
>>>>+{
>>>>+   int ret;
>>>>+
>>>>+   ret = pnv_eeh_do_flr(pdn, option);
>>>>+   if (ret != -ENOTTY)
>>>>+           return ret;
>>>>+
>>>>+   return pnv_eeh_do_af_flr(pdn, option);
>>>>+}
>>>>+
>>>>+static int pnv_eeh_vf_pe_reset(struct eeh_pe *pe, int option)
>>>>+{
>>>>+   struct eeh_dev *edev, *tmp;
>>>>+   struct pci_dn *pdn;
>>>>+   int ret;
>>>>+
>>>>+   eeh_pe_for_each_dev(pe, edev, tmp) {
>>>>+           pdn = eeh_dev_to_pdn(edev);
>>>>+           ret = pnv_eeh_reset_vf(pdn, option);
>>>>+           if (ret)
>>>>+                   return ret;
>>>>+   }
>>>>+
>>>>+   return 0;
>>>>+}
>>>>+
>>>>  void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
>>>>  {
>>>>    struct pci_controller *hose;
>>>>@@ -968,7 +1090,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
>>>>            }
>>>>
>>>>            bus = eeh_pe_bus_get(pe);
>>>>-           if (pci_is_root_bus(bus) ||
>>>>+           if (pe->type & EEH_PE_VF)
>>>>+                   ret = pnv_eeh_vf_pe_reset(pe, option);
>>>>+           else if (pci_is_root_bus(bus) ||
>>>>                    pci_is_root_bus(bus->parent))
>>>>                    ret = pnv_eeh_root_reset(hose, option);
>>>>            else
>>>>@@ -1108,6 +1232,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn 
>>>>*pdn)
>>>>    if (!edev || !edev->pe)
>>>>            return false;
>>>>
>>>>+   /*
>>>>+    * We will issue FLR or AF FLR to all VFs, which are contained
>>>>+    * in VF PE. It relies on the EEH PCI config accessors. So we
>>>>+    * can't block them during the window.
>>>>+    */
>>>>+   if ((edev->physfn) && (edev->pe->state & EEH_PE_RESET))
>>>
>>>
>>>Extra braces around edev->physfn.
>>>
>>
>>Will remove it.
>>
>>>
>>>
>>>>+           return false;
>>>>+
>>>>    if (edev->pe->state & EEH_PE_CFG_BLOCKED)
>>>>            return true;
>>>>
>>>>
>
>
>-- 
>Alexey

-- 
Richard Yang
Help you, Help me

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to