Re: [PATCH 2/2] PCI/AER: Log correctable errors as warning, not error

2020-07-09 Thread Bjorn Helgaas
On Tue, Jul 07, 2020 at 07:14:01PM -0500, Bjorn Helgaas wrote:
> From: Matt Jolly 
> 
> PCIe correctable errors are recovered by hardware with no need for software
> intervention (PCIe r5.0, sec 6.2.2.1).
> 
> Reduce the log level of correctable errors from KERN_ERR to KERN_WARNING.
> 
> The bug reports below are for correctable error logging.  This doesn't fix
> the cause of those reports, but it may make the messages less alarming.
> 
> [bhelgaas: commit log, use pci_printk() to avoid code duplication]
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=201517
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=196183
> Link: https://lore.kernel.org/r/20200618155511.16009-1-Kangie@footclan.ninja
> Signed-off-by: Matt Jolly 
> Signed-off-by: Bjorn Helgaas 

I applied both of these to pci/error for v5.9.

> ---
>  drivers/pci/pcie/aer.c | 25 +++--
>  1 file changed, 15 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index 9176c8a968b9..ca886bf91fd9 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -673,20 +673,23 @@ static void __aer_print_error(struct pci_dev *dev,
>  {
>   const char **strings;
>   unsigned long status = info->status & ~info->mask;
> - const char *errmsg;
> + const char *level, *errmsg;
>   int i;
>  
> - if (info->severity == AER_CORRECTABLE)
> + if (info->severity == AER_CORRECTABLE) {
>   strings = aer_correctable_error_string;
> - else
> + level = KERN_WARNING;
> + } else {
>   strings = aer_uncorrectable_error_string;
> + level = KERN_ERR;
> + }
>  
>   for_each_set_bit(i, &status, 32) {
>   errmsg = strings[i];
>   if (!errmsg)
>   errmsg = "Unknown Error Bit";
>  
> - pci_err(dev, "   [%2d] %-22s%s\n", i, errmsg,
> + pci_printk(level, dev, "   [%2d] %-22s%s\n", i, errmsg,
>   info->first_error == i ? " (First)" : "");
>   }
>   pci_dev_aer_stats_incr(dev, info);
> @@ -696,6 +699,7 @@ void aer_print_error(struct pci_dev *dev, struct 
> aer_err_info *info)
>  {
>   int layer, agent;
>   int id = ((dev->bus->number << 8) | dev->devfn);
> + const char *level;
>  
>   if (!info->status) {
>   pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, 
> (Unregistered Agent ID)\n",
> @@ -706,13 +710,14 @@ void aer_print_error(struct pci_dev *dev, struct 
> aer_err_info *info)
>   layer = AER_GET_LAYER_ERROR(info->severity, info->status);
>   agent = AER_GET_AGENT(info->severity, info->status);
>  
> - pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
> - aer_error_severity_string[info->severity],
> - aer_error_layer[layer], aer_agent_string[agent]);
> + level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
> +
> + pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
> +aer_error_severity_string[info->severity],
> +aer_error_layer[layer], aer_agent_string[agent]);
>  
> - pci_err(dev, "  device [%04x:%04x] error status/mask=%08x/%08x\n",
> - dev->vendor, dev->device,
> - info->status, info->mask);
> + pci_printk(level, dev, "  device [%04x:%04x] error 
> status/mask=%08x/%08x\n",
> +dev->vendor, dev->device, info->status, info->mask);
>  
>   __aer_print_error(dev, info);
>  
> -- 
> 2.25.1
> 


[PATCH 2/2] PCI/AER: Log correctable errors as warning, not error

2020-07-07 Thread Bjorn Helgaas
From: Matt Jolly 

PCIe correctable errors are recovered by hardware with no need for software
intervention (PCIe r5.0, sec 6.2.2.1).

Reduce the log level of correctable errors from KERN_ERR to KERN_WARNING.

The bug reports below are for correctable error logging.  This doesn't fix
the cause of those reports, but it may make the messages less alarming.

[bhelgaas: commit log, use pci_printk() to avoid code duplication]
Link: https://bugzilla.kernel.org/show_bug.cgi?id=201517
Link: https://bugzilla.kernel.org/show_bug.cgi?id=196183
Link: https://lore.kernel.org/r/20200618155511.16009-1-Kangie@footclan.ninja
Signed-off-by: Matt Jolly 
Signed-off-by: Bjorn Helgaas 
---
 drivers/pci/pcie/aer.c | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 9176c8a968b9..ca886bf91fd9 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -673,20 +673,23 @@ static void __aer_print_error(struct pci_dev *dev,
 {
const char **strings;
unsigned long status = info->status & ~info->mask;
-   const char *errmsg;
+   const char *level, *errmsg;
int i;
 
-   if (info->severity == AER_CORRECTABLE)
+   if (info->severity == AER_CORRECTABLE) {
strings = aer_correctable_error_string;
-   else
+   level = KERN_WARNING;
+   } else {
strings = aer_uncorrectable_error_string;
+   level = KERN_ERR;
+   }
 
for_each_set_bit(i, &status, 32) {
errmsg = strings[i];
if (!errmsg)
errmsg = "Unknown Error Bit";
 
-   pci_err(dev, "   [%2d] %-22s%s\n", i, errmsg,
+   pci_printk(level, dev, "   [%2d] %-22s%s\n", i, errmsg,
info->first_error == i ? " (First)" : "");
}
pci_dev_aer_stats_incr(dev, info);
@@ -696,6 +699,7 @@ void aer_print_error(struct pci_dev *dev, struct 
aer_err_info *info)
 {
int layer, agent;
int id = ((dev->bus->number << 8) | dev->devfn);
+   const char *level;
 
if (!info->status) {
pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, 
(Unregistered Agent ID)\n",
@@ -706,13 +710,14 @@ void aer_print_error(struct pci_dev *dev, struct 
aer_err_info *info)
layer = AER_GET_LAYER_ERROR(info->severity, info->status);
agent = AER_GET_AGENT(info->severity, info->status);
 
-   pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
-   aer_error_severity_string[info->severity],
-   aer_error_layer[layer], aer_agent_string[agent]);
+   level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
+
+   pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
+  aer_error_severity_string[info->severity],
+  aer_error_layer[layer], aer_agent_string[agent]);
 
-   pci_err(dev, "  device [%04x:%04x] error status/mask=%08x/%08x\n",
-   dev->vendor, dev->device,
-   info->status, info->mask);
+   pci_printk(level, dev, "  device [%04x:%04x] error 
status/mask=%08x/%08x\n",
+  dev->vendor, dev->device, info->status, info->mask);
 
__aer_print_error(dev, info);
 
-- 
2.25.1