Re: [RFC PATCH 4/4] watchdog: hpwdt: Make panic behaviour configurable

2019-02-01 Thread Ivan Mironov
On Tue, 2019-01-15 at 19:30 -0700, Jerry Hoemann wrote:
> On Mon, Jan 14, 2019 at 07:36:17AM +0500, Ivan Mironov wrote:
> > This adds an option to not panic on NMI even if it was caused by iLO.
> > 
> > Signed-off-by: Ivan Mironov 
> > ---
> >  drivers/watchdog/hpwdt.c | 19 ---
> >  1 file changed, 16 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
> > index 95d002b5b120..b12858491189 100644
> > --- a/drivers/watchdog/hpwdt.c
> > +++ b/drivers/watchdog/hpwdt.c
> > @@ -37,6 +37,10 @@ static unsigned int soft_margin = DEFAULT_MARGIN;
> > /* in seconds */
> >  static bool nowayout = WATCHDOG_NOWAYOUT;
> >  static bool pretimeout = IS_ENABLED(CONFIG_HPWDT_NMI_DECODING);
> >  
> > +#ifdef CONFIG_HPWDT_NMI_DECODING
> > +static bool panic_on_nmi = true;
> > +#endif /* CONFIG_HPWDT_NMI_DECODING */
> > +
> >  static void __iomem *pci_mem_addr; /* the PCI-memory address */
> >  static unsigned long __iomem *hpwdt_nmistat;
> >  static unsigned long __iomem *hpwdt_timer_reg;
> > @@ -146,7 +150,10 @@ static int hpwdt_set_pretimeout(struct watchdog_device 
> > *wdd, unsigned int req)
> >  
> >  static int hpwdt_my_nmi(void)
> >  {
> > -   return ioread8(hpwdt_nmistat) & 0x6;
> > +   int nmistat = ioread8(hpwdt_nmistat);
> > +
> > +   iowrite8(nmistat & ~0x6, hpwdt_nmistat);
> > +   return nmistat & 0x6;
> 
> This is a read only register.
> 

Oops... At least on my system this code has the desired effect:
subsequent function call returns zero.

Probably it would be better to use additional variable to determine
whether NMI from iLO already happened or not.

> 
> >  }
> >  
> >  /*
> > @@ -168,7 +175,10 @@ static int hpwdt_pretimeout(unsigned int ulReason, 
> > struct pt_regs *regs)
> >  "4. iLO Event Log\n",
> >  mynmi, ulReason, smp_processor_id());
> >  
> > -   nmi_panic(regs, "hpwdt: NMI: Not continuing");
> > +   if (panic_on_nmi)
> > +   nmi_panic(regs, "hpwdt: NMI: Not continuing");
> > +
> > +   pr_emerg("Dazed and confused, but trying to continue\n");
> >  
> 
> The watchdog core provides a way to enable/disable the NMI pretimeout 
> dynamically
> via ioctl.  The pretimeout NMI can also be disabled via module parameter to 
> hpwdt.
> This adds complexity without really adding functionality.
> 

It looks like disabling pretimout will disable panics only for iLO 5
(mine system has iLO 2). Or am I missing something?

> 
> BTW, don't reuse error messages.  Makes it difficult to tell where the error
> originated from.
> 

Would it be better to just return NMI_DONE and thus reuse normal NMI
handling from kernel, with logging and panic/don't panic logic?

> 
> > return NMI_HANDLED;
> >  }
> > @@ -376,6 +386,9 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped 
> > once started (default="
> >  #ifdef CONFIG_HPWDT_NMI_DECODING
> >  module_param(pretimeout, bool, 0);
> >  MODULE_PARM_DESC(pretimeout, "Watchdog pretimeout enabled");
> > -#endif
> > +
> > +module_param(panic_on_nmi, bool, 0);
> > +MODULE_PARM_DESC(panic_on_nmi, "Cause panic on NMI induced by iLO 
> > (default=1)");
> > +#endif /* CONFIG_HPWDT_NMI_DECODING */
> >  
> >  module_pci_driver(hpwdt_driver);
> > -- 
> > 2.20.1



Re: [RFC PATCH 4/4] watchdog: hpwdt: Make panic behaviour configurable

2019-01-15 Thread Jerry Hoemann
On Mon, Jan 14, 2019 at 07:36:17AM +0500, Ivan Mironov wrote:
> This adds an option to not panic on NMI even if it was caused by iLO.
> 
> Signed-off-by: Ivan Mironov 
> ---
>  drivers/watchdog/hpwdt.c | 19 ---
>  1 file changed, 16 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
> index 95d002b5b120..b12858491189 100644
> --- a/drivers/watchdog/hpwdt.c
> +++ b/drivers/watchdog/hpwdt.c
> @@ -37,6 +37,10 @@ static unsigned int soft_margin = DEFAULT_MARGIN;  /* in 
> seconds */
>  static bool nowayout = WATCHDOG_NOWAYOUT;
>  static bool pretimeout = IS_ENABLED(CONFIG_HPWDT_NMI_DECODING);
>  
> +#ifdef CONFIG_HPWDT_NMI_DECODING
> +static bool panic_on_nmi = true;
> +#endif /* CONFIG_HPWDT_NMI_DECODING */
> +
>  static void __iomem *pci_mem_addr;   /* the PCI-memory address */
>  static unsigned long __iomem *hpwdt_nmistat;
>  static unsigned long __iomem *hpwdt_timer_reg;
> @@ -146,7 +150,10 @@ static int hpwdt_set_pretimeout(struct watchdog_device 
> *wdd, unsigned int req)
>  
>  static int hpwdt_my_nmi(void)
>  {
> - return ioread8(hpwdt_nmistat) & 0x6;
> + int nmistat = ioread8(hpwdt_nmistat);
> +
> + iowrite8(nmistat & ~0x6, hpwdt_nmistat);
> + return nmistat & 0x6;


This is a read only register.


>  }
>  
>  /*
> @@ -168,7 +175,10 @@ static int hpwdt_pretimeout(unsigned int ulReason, 
> struct pt_regs *regs)
>"4. iLO Event Log\n",
>mynmi, ulReason, smp_processor_id());
>  
> - nmi_panic(regs, "hpwdt: NMI: Not continuing");
> + if (panic_on_nmi)
> + nmi_panic(regs, "hpwdt: NMI: Not continuing");
> +
> + pr_emerg("Dazed and confused, but trying to continue\n");
>  


The watchdog core provides a way to enable/disable the NMI pretimeout 
dynamically
via ioctl.  The pretimeout NMI can also be disabled via module parameter to 
hpwdt.
This adds complexity without really adding functionality.


BTW, don't reuse error messages.  Makes it difficult to tell where the error
originated from.


>   return NMI_HANDLED;
>  }
> @@ -376,6 +386,9 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped 
> once started (default="
>  #ifdef CONFIG_HPWDT_NMI_DECODING
>  module_param(pretimeout, bool, 0);
>  MODULE_PARM_DESC(pretimeout, "Watchdog pretimeout enabled");
> -#endif
> +
> +module_param(panic_on_nmi, bool, 0);
> +MODULE_PARM_DESC(panic_on_nmi, "Cause panic on NMI induced by iLO 
> (default=1)");
> +#endif /* CONFIG_HPWDT_NMI_DECODING */
>  
>  module_pci_driver(hpwdt_driver);
> -- 
> 2.20.1

-- 

-
Jerry Hoemann  Software Engineer   Hewlett Packard Enterprise
-


[RFC PATCH 4/4] watchdog: hpwdt: Make panic behaviour configurable

2019-01-13 Thread Ivan Mironov
This adds an option to not panic on NMI even if it was caused by iLO.

Signed-off-by: Ivan Mironov 
---
 drivers/watchdog/hpwdt.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 95d002b5b120..b12858491189 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -37,6 +37,10 @@ static unsigned int soft_margin = DEFAULT_MARGIN;/* in 
seconds */
 static bool nowayout = WATCHDOG_NOWAYOUT;
 static bool pretimeout = IS_ENABLED(CONFIG_HPWDT_NMI_DECODING);
 
+#ifdef CONFIG_HPWDT_NMI_DECODING
+static bool panic_on_nmi = true;
+#endif /* CONFIG_HPWDT_NMI_DECODING */
+
 static void __iomem *pci_mem_addr; /* the PCI-memory address */
 static unsigned long __iomem *hpwdt_nmistat;
 static unsigned long __iomem *hpwdt_timer_reg;
@@ -146,7 +150,10 @@ static int hpwdt_set_pretimeout(struct watchdog_device 
*wdd, unsigned int req)
 
 static int hpwdt_my_nmi(void)
 {
-   return ioread8(hpwdt_nmistat) & 0x6;
+   int nmistat = ioread8(hpwdt_nmistat);
+
+   iowrite8(nmistat & ~0x6, hpwdt_nmistat);
+   return nmistat & 0x6;
 }
 
 /*
@@ -168,7 +175,10 @@ static int hpwdt_pretimeout(unsigned int ulReason, struct 
pt_regs *regs)
 "4. iLO Event Log\n",
 mynmi, ulReason, smp_processor_id());
 
-   nmi_panic(regs, "hpwdt: NMI: Not continuing");
+   if (panic_on_nmi)
+   nmi_panic(regs, "hpwdt: NMI: Not continuing");
+
+   pr_emerg("Dazed and confused, but trying to continue\n");
 
return NMI_HANDLED;
 }
@@ -376,6 +386,9 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once 
started (default="
 #ifdef CONFIG_HPWDT_NMI_DECODING
 module_param(pretimeout, bool, 0);
 MODULE_PARM_DESC(pretimeout, "Watchdog pretimeout enabled");
-#endif
+
+module_param(panic_on_nmi, bool, 0);
+MODULE_PARM_DESC(panic_on_nmi, "Cause panic on NMI induced by iLO 
(default=1)");
+#endif /* CONFIG_HPWDT_NMI_DECODING */
 
 module_pci_driver(hpwdt_driver);
-- 
2.20.1