On 09/04/2015 04:56 AM, Gouji, Masayuki wrote:
> Hi, Corey
>
> This is new one which includes the missed braces and function for modifying
> waiting period (default 0).
>
> I think it complies to your comment.
Well, not exactly. Thinking through this, if you are going to delay and
spin, you might as well spin completely in the KCS code, spinning on the
IBF value. That would be a lot simpler and it would return as soon as
IBF was set. However, spinning is just not a good idea, especially in
hardirq or softirq mode (where some of this would be happening).
I've spent some time thinking about this and looking at the code, and I
realized that the timer should be running whenever a message is being
sent. Which is why no one has seen this before, and it means we are
dealing with a different issue. If you look at the sender() function in
ipmi_si_intf.c, it calls check_start_timer_thread(), which will start
the timer and start the message send. The timer code checks at the end
if it is idle, and if it is idle it doesn't restart the timer.
So somehow the timer is not getting started or is getting canceled.
That's the problem we need to fix.
Looking at the code:
static void check_start_timer_thread(struct smi_info *smi_info)
{
if (smi_info->si_state == SI_NORMAL && smi_info->curr_msg == NULL) {
smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES);
if (smi_info->thread)
wake_up_process(smi_info->thread);
start_next_msg(smi_info);
smi_event_handler(smi_info, 0);
}
}
perhaps start_next_msg() should come before smi_mod_timer(). Maybe
there's a race where the timer goes off before start_next_msg()? Well,
that shouldn't be possible because this code is called under the lock
and the first the time timeout does is claim the lock. But something is
going on here.
-corey
> >From de7620fd5243fd8f762a8d8869fa90f0f80bfb19 Mon Sep 17 00:00:00 2001
> From: root <[email protected]>
> Date: Sat, 5 Sep 2015 01:40:07 +0900
> Subject: [PATCH] ipmi:driver not to stop when the hardware not respond in irq
> mode. revised
>
> This is the patch to work around the temporary hang up on IRQ mode
> The idea is that for waiting a bit and retry under one special condition.
>
> Signed-off-by: Masayuki Gouji <[email protected]>
> ---
> drivers/char/ipmi/ipmi_kcs_sm.c | 9 +++++++--
> drivers/char/ipmi/ipmi_si_intf.c | 42
> +++++++++++++++++++++++++++++++++++++---
> drivers/char/ipmi/ipmi_si_sm.h | 3 ++-
> 3 files changed, 48 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/char/ipmi/ipmi_kcs_sm.c b/drivers/char/ipmi/ipmi_kcs_sm.c
> index 8c25f59..24ddab9 100644
> --- a/drivers/char/ipmi/ipmi_kcs_sm.c
> +++ b/drivers/char/ipmi/ipmi_kcs_sm.c
> @@ -354,8 +354,13 @@ static enum si_sm_result kcs_event(struct si_sm_data
> *kcs, long time)
> printk(KERN_DEBUG "KCS: State = %d, %x\n", kcs->state, status);
>
> /* All states wait for ibf, so just do it here. */
> - if (!check_ibf(kcs, status, time))
> - return SI_SM_CALL_WITH_DELAY;
> + if (!check_ibf(kcs, status, time)) {
> + if ((kcs->state == KCS_WAIT_WRITE_START) &&
> + (kcs->write_count == 1))
> + return SI_SM_CALL_WITH_SMALL_DELAY;
> + else
> + return SI_SM_CALL_WITH_DELAY;
> + }
>
> /* Just about everything looks at the KCS state, so grab that, too. */
> state = GET_STATUS_STATE(status);
> diff --git a/drivers/char/ipmi/ipmi_si_intf.c
> b/drivers/char/ipmi/ipmi_si_intf.c
> index 8a45e92..cfe115a 100644
> --- a/drivers/char/ipmi/ipmi_si_intf.c
> +++ b/drivers/char/ipmi/ipmi_si_intf.c
> @@ -295,6 +295,12 @@ struct smi_info {
>
> struct list_head link;
> union ipmi_smi_info_union addr_info;
> +
> + /*
> + * It waits some micro-second
> + * when getting IBF set under special condition.
> + */
> + int delay_usec;
> };
>
> #define smi_inc_stat(smi, stat) \
> @@ -818,11 +824,25 @@ static enum si_sm_result smi_event_handler(struct
> smi_info *smi_info,
> while (si_sm_result == SI_SM_CALL_WITHOUT_DELAY)
> si_sm_result = smi_info->handlers->event(smi_info->si_sm, 0);
>
> + if ((si_sm_result == SI_SM_CALL_WITH_SMALL_DELAY) &&
> + (smi_info->irq && !smi_info->interrupt_disabled)) {
> +
> + udelay(smi_info->delay_usec);
> + si_sm_result = smi_info->handlers->event(smi_info->si_sm, time);
> +
> + if (si_sm_result == SI_SM_CALL_WITH_SMALL_DELAY)
> + si_sm_result = SI_SM_CALL_WITH_DELAY;
> + }
> +
> if (si_sm_result == SI_SM_TRANSACTION_COMPLETE) {
> smi_inc_stat(smi_info, complete_transactions);
>
> handle_transaction_done(smi_info);
> si_sm_result = smi_info->handlers->event(smi_info->si_sm, 0);
> +
> + if (si_sm_result == SI_SM_CALL_WITH_SMALL_DELAY)
> + si_sm_result = SI_SM_CALL_WITH_DELAY;
> +
> } else if (si_sm_result == SI_SM_HOSED) {
> smi_inc_stat(smi_info, hosed_count);
>
> @@ -840,6 +860,10 @@ static enum si_sm_result smi_event_handler(struct
> smi_info *smi_info,
> return_hosed_msg(smi_info, IPMI_ERR_UNSPECIFIED);
> }
> si_sm_result = smi_info->handlers->event(smi_info->si_sm, 0);
> +
> + if (si_sm_result == SI_SM_CALL_WITH_SMALL_DELAY)
> + si_sm_result = SI_SM_CALL_WITH_DELAY;
> +
> }
>
> /*
> @@ -1689,6 +1713,7 @@ static int mem_setup(struct smi_info *info)
> * rsh=<regshift>
> * irq=<irq>
> * ipmb=<ipmb addr>
> + * delay=<delay usec>
> */
> enum hotmod_op { HM_ADD, HM_REMOVE };
> struct hotmod_vals {
> @@ -1783,6 +1808,7 @@ static int hotmod_handler(const char *val, struct
> kernel_param *kp)
> int regshift;
> int irq;
> int ipmb;
> + int delay;
> int ival;
> int len;
> struct smi_info *info;
> @@ -1803,7 +1829,8 @@ static int hotmod_handler(const char *val, struct
> kernel_param *kp)
> regsize = 1;
> regshift = 0;
> irq = 0;
> - ipmb = 0; /* Choose the default if not specified */
> + ipmb = 0;
> + delay = 0; /* Choose the default if not specified */
>
> next = strchr(curr, ':');
> if (next) {
> @@ -1874,6 +1901,11 @@ static int hotmod_handler(const char *val, struct
> kernel_param *kp)
> goto out;
> else if (rv)
> continue;
> + rv = check_hotmod_int_op(curr, o, "delay", &delay);
> + if (rv < 0)
> + goto out;
> + else if (rv)
> + continue;
>
> rv = -EINVAL;
> printk(KERN_WARNING PFX
> @@ -1910,6 +1942,7 @@ static int hotmod_handler(const char *val, struct
> kernel_param *kp)
> if (info->irq)
> info->irq_setup = std_irq_setup;
> info->slave_addr = ipmb;
> + info->delay_usec = delay;
>
> rv = add_smi(info);
> if (rv) {
> @@ -2850,6 +2883,8 @@ static int wait_for_msg_done(struct smi_info *smi_info)
>
> smi_result = smi_info->handlers->event(smi_info->si_sm, 0);
> for (;;) {
> + if (smi_result == SI_SM_CALL_WITH_SMALL_DELAY)
> + smi_result = SI_SM_CALL_WITH_DELAY;
> if (smi_result == SI_SM_CALL_WITH_DELAY ||
> smi_result == SI_SM_CALL_WITH_TICK_DELAY) {
> schedule_timeout_uninterruptible(1);
> @@ -3145,7 +3180,7 @@ static int smi_params_proc_show(struct seq_file *m,
> void *v)
> struct smi_info *smi = m->private;
>
> seq_printf(m,
> - "%s,%s,0x%lx,rsp=%d,rsi=%d,rsh=%d,irq=%d,ipmb=%d\n",
> + "%s,%s,0x%lx,rsp=%d,rsi=%d,rsh=%d,irq=%d,ipmb=%d,delay=%d\n",
> si_to_str[smi->si_type],
> addr_space_to_str[smi->io.addr_type],
> smi->io.addr_data,
> @@ -3153,7 +3188,8 @@ static int smi_params_proc_show(struct seq_file *m,
> void *v)
> smi->io.regsize,
> smi->io.regshift,
> smi->irq,
> - smi->slave_addr);
> + smi->slave_addr,
> + smi->delay_usec);
>
> return 0;
> }
> diff --git a/drivers/char/ipmi/ipmi_si_sm.h b/drivers/char/ipmi/ipmi_si_sm.h
> index df89f73..30c67c4 100644
> --- a/drivers/char/ipmi/ipmi_si_sm.h
> +++ b/drivers/char/ipmi/ipmi_si_sm.h
> @@ -76,7 +76,8 @@ enum si_sm_result {
> * The hardware is asserting attn and the state machine is
> * idle.
> */
> - SI_SM_ATTN
> + SI_SM_ATTN,
> + SI_SM_CALL_WITH_SMALL_DELAY /* Delay some before calling again. */
> };
>
> /* Handlers for the SMI state machine. */
------------------------------------------------------------------------------
_______________________________________________
Openipmi-developer mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/openipmi-developer