On Sat, Feb 22, 2014 at 07:01:27AM +1100, Benjamin Herrenschmidt wrote:
>On Fri, 2014-02-21 at 19:53 +0800, Gavin Shan wrote:
>> EEH core tries to recover from fenced PHB or frozen PE. Unfortunately,
>> the log isn't consistent with the site because enabling IO path for
>> the frozen PE before collecting log would ruin the site. The patch
>> solves the problem to cache the PHB diag-data in advance with the
>> help of additional flag PNV_PHB_FLAG_DIAG to pnv_phb::flags.
>
>Ok, so correct me if I'm wrong, but you are
>
>  - Collecting the diag data in get_state, as a sort of side effect
>(what happens if get_state is called multiple times ?)
>
>  - Dumping it later on
>

Yeah, the patch would have some problems when get_state gets called
for multiple times: the log could be much more than what we expected
in case that frozen PE#1 detected and in progress to handle it (we
don't dump it yet), frozen PE#2 detected. The log would include all
information for frozen PE#1 and PE#2 and it's not expected.

Another case is get_state called for multiple times on frozen PE#1
and we can check EEH_PE_ISOLATED to avoid diag-data over-writting.

I'm thinking of a new mechanism (please refer to the reply below).

>Any reason why we can't instead dump it immediately ? Also do we have a
>clean trigger for when we detect an error condition ? It can either be
>the result of an interrupt or a driver called get_state following an
>ffffffff. Are both path eventually going into the same function to
>handle a "new" error condition ? That's where I would both collect and
>dump the EEH state..
>

The reason I don't want dump it immediately is that I would keep
struct pnv_eeh_ops::get_log() to dump diag-data to guest in the
future.

The problem is that we have only one PHB diag-data instance in
struct pnv_phb::diag.blob[]. I'm thinking of to have each PE
to have diag-data for itself and the things would look like
followings. Ben, please comment :-)

- Extend "struct eeh_pe" to have a platform pointer (void *data).
  And we still collect diag-data in get_state() or next_error(),
  which will be dumped in pnv_eeh_ops->get_log(). The disadvantage
  could be lots of memory (extra 8KB usually) consumed by each PE
  instance.
- For PCI config accessors and informative (also dead PHB, dead
  P7IOC), we still use pnv_phb::diag.blob[].

Thanks,
Gavin

>> Signed-off-by: Gavin Shan <sha...@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/platforms/powernv/eeh-ioda.c |   65 
>> ++++++++++++++++++-----------
>>  arch/powerpc/platforms/powernv/pci.c      |   21 ++++++----
>>  arch/powerpc/platforms/powernv/pci.h      |    1 +
>>  3 files changed, 55 insertions(+), 32 deletions(-)
>> 
>> diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c 
>> b/arch/powerpc/platforms/powernv/eeh-ioda.c
>> index 04b4710..3ed8d22 100644
>> --- a/arch/powerpc/platforms/powernv/eeh-ioda.c
>> +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
>> @@ -114,6 +114,27 @@ DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, 
>> ioda_eeh_inbB_dbgfs_get,
>>                      ioda_eeh_inbB_dbgfs_set, "0x%llx\n");
>>  #endif /* CONFIG_DEBUG_FS */
>>  
>> +static void ioda_eeh_phb_diag(struct pci_controller *hose)
>> +{
>> +    struct pnv_phb *phb = hose->private_data;
>> +    unsigned long flags;
>> +    long rc;
>> +
>> +    spin_lock_irqsave(&phb->lock, flags);
>> +
>> +    rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
>> +                                     PNV_PCI_DIAG_BUF_SIZE);
>> +    if (rc == OPAL_SUCCESS) {
>> +            phb->flags |= PNV_PHB_FLAG_DIAG;
>> +    } else {
>> +            pr_warn("%s: Can't get diag-data for PHB#%x (%ld)\n",
>> +                    __func__, hose->global_number, rc);
>> +            phb->flags &= ~PNV_PHB_FLAG_DIAG;
>> +    }
>> +
>> +    spin_unlock_irqrestore(&phb->lock, flags);
>> +}
>> +
>>  /**
>>   * ioda_eeh_post_init - Chip dependent post initialization
>>   * @hose: PCI controller
>> @@ -272,6 +293,8 @@ static int ioda_eeh_get_state(struct eeh_pe *pe)
>>                      result |= EEH_STATE_DMA_ACTIVE;
>>                      result |= EEH_STATE_MMIO_ENABLED;
>>                      result |= EEH_STATE_DMA_ENABLED;
>> +            } else {
>> +                    ioda_eeh_phb_diag(hose);
>>              }
>>  
>>              return result;
>> @@ -541,24 +564,13 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int 
>> option)
>>  static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
>>                          char *drv_log, unsigned long len)
>>  {
>> -    s64 ret;
>> +    struct pnv_phb *phb = pe->phb->private_data;
>>      unsigned long flags;
>> -    struct pci_controller *hose = pe->phb;
>> -    struct pnv_phb *phb = hose->private_data;
>>  
>>      spin_lock_irqsave(&phb->lock, flags);
>>  
>> -    ret = opal_pci_get_phb_diag_data2(phb->opal_id,
>> -                    phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
>> -    if (ret) {
>> -            spin_unlock_irqrestore(&phb->lock, flags);
>> -            pr_warning("%s: Can't get log for PHB#%x-PE#%x (%lld)\n",
>> -                       __func__, hose->global_number, pe->addr, ret);
>> -            return -EIO;
>> -    }
>> -
>> -    /* The PHB diag-data is always indicative */
>> -    pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
>> +    pnv_pci_dump_phb_diag_data(pe->phb, phb->diag.blob);
>> +    phb->flags &= ~PNV_PHB_FLAG_DIAG;
>>  
>>      spin_unlock_irqrestore(&phb->lock, flags);
>>  
>> @@ -646,19 +658,11 @@ static void ioda_eeh_hub_diag(struct pci_controller 
>> *hose)
>>      }
>>  }
>>  
>> -static void ioda_eeh_phb_diag(struct pci_controller *hose)
>> +static void ioda_eeh_phb_diag_dump(struct pci_controller *hose)
>>  {
>>      struct pnv_phb *phb = hose->private_data;
>> -    long rc;
>> -
>> -    rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
>> -                                     PNV_PCI_DIAG_BUF_SIZE);
>> -    if (rc != OPAL_SUCCESS) {
>> -            pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
>> -                        __func__, hose->global_number, rc);
>> -            return;
>> -    }
>>  
>> +    ioda_eeh_phb_diag(hose);
>>      pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
>>  }
>>  
>> @@ -778,7 +782,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
>>                              pr_info("EEH: PHB#%x informative error "
>>                                      "detected\n",
>>                                      hose->global_number);
>> -                            ioda_eeh_phb_diag(hose);
>> +                            ioda_eeh_phb_diag_dump(hose);
>>                              ret = EEH_NEXT_ERR_NONE;
>>                      }
>>  
>> @@ -809,6 +813,17 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
>>              }
>>  
>>              /*
>> +             * EEH core will try recover from fenced PHB or
>> +             * frozen PE. In the time for frozen PE, EEH core
>> +             * enable IO path for that before collecting logs,
>> +             * but it ruins the site. So we have to cache the
>> +             * log in advance here.
>> +             */
>> +            if (ret == EEH_NEXT_ERR_FROZEN_PE ||
>> +                ret == EEH_NEXT_ERR_FENCED_PHB)
>> +                    ioda_eeh_phb_diag(hose);
>> +
>> +            /*
>>               * If we have no errors on the specific PHB or only
>>               * informative error there, we continue poking it.
>>               * Otherwise, we need actions to be taken by upper
>> diff --git a/arch/powerpc/platforms/powernv/pci.c 
>> b/arch/powerpc/platforms/powernv/pci.c
>> index 437c37d..67b2254 100644
>> --- a/arch/powerpc/platforms/powernv/pci.c
>> +++ b/arch/powerpc/platforms/powernv/pci.c
>> @@ -259,11 +259,15 @@ static void pnv_pci_dump_phb3_diag_data(struct 
>> pci_controller *hose,
>>  void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
>>                              unsigned char *log_buff)
>>  {
>> +    struct pnv_phb *phb = hose->private_data;
>>      struct OpalIoPhbErrorCommon *common;
>>  
>>      if (!hose || !log_buff)
>>              return;
>>  
>> +    if (!(phb->flags & PNV_PHB_FLAG_DIAG))
>> +            return;
>> +
>>      common = (struct OpalIoPhbErrorCommon *)log_buff;
>>      switch (common->ioType) {
>>      case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
>> @@ -281,13 +285,19 @@ void pnv_pci_dump_phb_diag_data(struct pci_controller 
>> *hose,
>>  static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
>>  {
>>      unsigned long flags, rc;
>> -    int has_diag;
>> +    bool has_diag = false;
>>  
>>      spin_lock_irqsave(&phb->lock, flags);
>>  
>> -    rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
>> -                                     PNV_PCI_DIAG_BUF_SIZE);
>> -    has_diag = (rc == OPAL_SUCCESS);
>> +    if (!(phb->flags & PNV_PHB_FLAG_DIAG)) {
>> +            rc = opal_pci_get_phb_diag_data2(phb->opal_id,
>> +                                             phb->diag.blob,
>> +                                             PNV_PCI_DIAG_BUF_SIZE);
>> +            if (rc == OPAL_SUCCESS) {
>> +                    phb->flags |= PNV_PHB_FLAG_DIAG;
>> +                    has_diag = true;
>> +            }
>> +    }
>>  
>>      rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
>>                                     OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
>> @@ -303,9 +313,6 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb 
>> *phb, u32 pe_no)
>>               */
>>              if (has_diag)
>>                      pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
>> -            else
>> -                    pr_warning("PCI %d: No diag data available\n",
>> -                               phb->hose->global_number);
>>      }
>>  
>>      spin_unlock_irqrestore(&phb->lock, flags);
>> diff --git a/arch/powerpc/platforms/powernv/pci.h 
>> b/arch/powerpc/platforms/powernv/pci.h
>> index adeb3c4..153af9a 100644
>> --- a/arch/powerpc/platforms/powernv/pci.h
>> +++ b/arch/powerpc/platforms/powernv/pci.h
>> @@ -82,6 +82,7 @@ struct pnv_eeh_ops {
>>  #endif /* CONFIG_EEH */
>>  
>>  #define PNV_PHB_FLAG_EEH    (1 << 0)
>> +#define PNV_PHB_FLAG_DIAG   (1 << 1)
>>  
>>  struct pnv_phb {
>>      struct pci_controller   *hose;
>
>

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to