[patch 2.6.19-rc5 9/12] crash_stop: ia64 specific code

Keith Owens Wed, 08 Nov 2006 20:05:17 -0800

Add the ia64 specific crash_stop code.  This contains routines that are
called from the common crash_stop code and from the ia64 notify_die
chain.


Signed-off-by: Keith Owens <[EMAIL PROTECTED]>
---
 arch/ia64/kernel/Makefile     |    1 
 arch/ia64/kernel/crash_stop.c |  237 ++++++++++++++++++++++++++++++++++++++++++
 include/asm-ia64/crash_stop.h |   11 +
 3 files changed, 249 insertions(+)

Index: linux/arch/ia64/kernel/Makefile
===================================================================
--- linux.orig/arch/ia64/kernel/Makefile
+++ linux/arch/ia64/kernel/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_KPROBES)         += kprobes.o jpro
 obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)  += uncached.o
 obj-$(CONFIG_AUDIT)            += audit.o
 obj-$(CONFIG_PCI_MSI)          += msi_ia64.o
+obj-$(CONFIG_CRASH_STOP_SUPPORTED)     += crash_stop.o
 mca_recovery-y                 += mca_drv.o mca_drv_asm.o
 
 obj-$(CONFIG_IA64_ESI)         += esi.o
Index: linux/arch/ia64/kernel/crash_stop.c
===================================================================
--- /dev/null
+++ linux/arch/ia64/kernel/crash_stop.c
@@ -0,0 +1,237 @@
+/*
+ * linux/arch/ia64/crash_stop.c
+ *
+ * Copyright (C) 2006 Keith Owens <[EMAIL PROTECTED]>
+ *
+ * Most of the IA64 specific bits of the crash_stop code.  There is a little
+ * bit of crash_stop code in arch/ia64/kernel/smp.c to handle IPI_CRASH_STOP,
+ * everything else is in this file.
+ *
+ * IA64 is more complicated than the other architectures (isn't it always?).
+ * An MCA will force the entire machine into an MCA rendezvous state, using
+ * normal interrupts and/or selective INIT.  The NMI command/button will send
+ * INIT to all cpus at the "same" time.  For both these cases, one cpu is the
+ * monarch and all others are slaves[1].  An INIT can also be generated by
+ * crash_stop, to interrupt any cpus that are spinning disabled.  In the latter
+ * case, mca.c does not know about the monarch that called crash_stop().
+ *
+ * The code in arch/ia64/kernel/mca.c only handles the first two cases, i.e.
+ * the ones that are defined by the SAL specification.  To handle the Linux
+ * specific crash_stop case, we have to fool mca.c into thinking that the
+ * monarch cpu has already been defined, then clear the monarch cpu to allow
+ * the INIT slaves to resume.  The notify_die callbacks are passed a data
+ * pointer, for MCA/INIT events, data->err is a pointer to a struct
+ * ia64_mca_notify_die.  The data in that structure lets crash_stop decide
+ * which cpu is the monarch and which is the slave, as well as override the
+ * 'wait for slaves' logic in mca.c.
+ *
+ * [1] Ignoring broken proms which assign the wrong values to the monarch flag.
+ *
+ * Another IA64 complication is that struct pt_regs only contains part of the
+ * system state.  IA64 also needs a struct switch_stack in order to give the
+ * unwinder all the state information.  Tasks that have been scheduled off a
+ * cpu already have a switch_stack, but the running tasks do not.  Create a
+ * switch_stack for each running task and store the address of that structure
+ * in the arch specific area of crash_stop_running_process.
+ */
+
+#include <linux/crash_stop.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <asm/kdebug.h>
+#include <asm/mca.h>
+
+int cs_arch_monarch_cpu;
+
+/* cs_arch_cpu() -> unw_init_running() -> cs_ca_switch_stack().  Save
+ * the address of the switch_stack created by unw_init_running() in the arch
+ * specific area of crash_stop_running_process then cs_common_cpu() to
+ * do the rest of the per cpu setup for a crash stop.
+ */
+
+struct cs_ca_data {
+       int monarch;
+       struct crash_stop_running_process *r;
+};
+
+static
+void cs_ca_switch_stack(struct unw_frame_info *info, void *vdata)
+{
+       struct cs_ca_data *data = vdata;
+       int monarch = data->monarch;
+       struct crash_stop_running_process *r = data->r;
+       struct switch_stack *sw;
+       sw = (struct switch_stack *)(info+1);
+       /* padding from unw_init_running */
+       sw = (struct switch_stack *)(((unsigned long)sw + 15) & ~15);
+       r->arch.sw = sw;
+       cs_common_cpu(monarch);
+}
+
+void
+cs_arch_cpu(int monarch, struct crash_stop_running_process *r)
+{
+       struct cs_ca_data data = {
+               .monarch = monarch,
+               .r = r,
+       };
+       unw_init_running(cs_ca_switch_stack, &data);
+}
+
+/* Called at the start of a notify_chain. */
+static int
+cs_arch_notify_start(struct notifier_block *self,
+                    unsigned long val, void *data)
+{
+       struct die_args *args = data;
+       switch(val) {
+       case DIE_OOPS:
+       case DIE_MCA_MONARCH_ENTER:
+       case DIE_INIT_MONARCH_ENTER:
+               cs_notify_chain_start(args->regs);
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+/* Called at the end of a notify_chain. */
+static int
+cs_arch_notify_end(struct notifier_block *self,
+                  unsigned long val, void *data)
+{
+       struct die_args *args = data;
+       switch(val) {
+       case DIE_OOPS:
+       case DIE_INIT_MONARCH_LEAVE:
+               cs_notify_chain_end();
+               break;
+       case DIE_MCA_MONARCH_LEAVE:
+               cs_notify_chain_end();
+               /* mca.c passes the recover flag as signr */
+               if (args->signr)
+                       crash_stop_recovered();
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static int
+cs_arch_notify_nmi(struct notifier_block *self,
+                  unsigned long val, void *data)
+{
+       struct ia64_mca_notify_die *nd;
+       struct pt_regs *old_regs;
+       struct die_args *args = data;
+       static cpumask_t cs_INIT;
+       int cpu = smp_processor_id();
+       nd = (struct ia64_mca_notify_die *)(args->err);
+
+       switch(val) {
+       /* FIXME: if the MCA rendezvous timeout is increased (case
+        * DIE_MCA_NEW_TIMEOUT), does crash_stop care about the new limit?  It
+        * might affect wait_secs in crash_stop() - KAO.
+        */
+       case DIE_MCA_RENDZVOUS_PROCESS:
+               /* The MCA monarch event has woken up the slaves that were
+                * suspended via the MCA rendezvous interrupt.  Tell
+                * crash_stop() that this slave cpu is ready and waiting to be
+                * debugged.
+                */
+               old_regs = set_irq_regs(args->regs);
+               crash_stop_slave();
+               set_irq_regs(old_regs);
+               break;
+       case DIE_INIT_ENTER:
+               /* INIT that is sent to all cpus is correctly handled by mca.c.
+                * If cs_arch_send_nmi() was invoked on IA64 because a cpu was
+                * spinning disabled then we get a lone INIT event with no
+                * monarch, or at least not a monarch that mca.c knows about.
+                * Tell mca.c that we already have a monarch.  Also clear the
+                * sos->monarch flag, some broken proms incorrectly mark
+                * individual INIT events as a monarch event.
+                */
+               oops_in_progress = 1;
+               if (crash_stop_sent_nmi()) {
+                       cpu_set(cpu, cs_INIT);
+                       *(nd->monarch_cpu) = cs_arch_monarch_cpu;
+                       nd->sos->monarch = 0;
+               }
+               break;
+       case DIE_INIT_SLAVE_ENTER:
+               /* This slave INIT event could have come from crash_stop(), it
+                * could also have come from a global INIT event.  In either
+                * case, drop into the crash_stop() slave processing.
+                */
+               old_regs = set_irq_regs(args->regs);
+               crash_stop_slave();
+               set_irq_regs(old_regs);
+               break;
+       case DIE_INIT_SLAVE_PROCESS:
+               /* Reverse the processing for DIE_INIT_ENTER.  Normal mca.c
+                * processing waits for the MCA (monarch) to release any INIT
+                * slaves, but we may not have an MCA monarch.  Pretend that
+                * each slave that was hit with INIT by crash_stop() is a
+                * monarch, to avoid complicating mca.c any more than it
+                * already is.
+                */
+               if (cpu_isset(cpu, cs_INIT)) {
+                       cpu_clear(cpu, cs_INIT);
+                       *(nd->monarch_cpu) = -1;
+                       nd->sos->monarch = 1;
+               }
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+
+static struct notifier_block cs_arch_nb_start = {
+       .notifier_call = cs_arch_notify_start,
+       .priority = ~0U >> 1,
+};
+
+static struct notifier_block cs_arch_nb_end = {
+       .notifier_call = cs_arch_notify_end,
+       .priority = 1,
+};
+
+static struct notifier_block cs_arch_nb_nmi = {
+       .notifier_call = cs_arch_notify_nmi,
+       .priority = 10,
+};
+
+static int __init
+cs_arch_init(void)
+{
+       int err;
+       const char *nb_name;
+       nb_name = "cs_arch_nb_start";
+       if ((err = register_die_notifier(&cs_arch_nb_start)))
+               goto error;
+       nb_name = "cs_arch_nb_end";
+       if ((err = register_die_notifier(&cs_arch_nb_end)))
+               goto error;
+       nb_name = "cs_arch_nb_nmi";
+       if ((err = register_die_notifier(&cs_arch_nb_nmi)))
+               goto error;
+       return 0;
+error:
+       printk(KERN_ERR "Failed to register %s\n", nb_name);
+       unregister_die_notifier(&cs_arch_nb_start);
+       unregister_die_notifier(&cs_arch_nb_end);
+       unregister_die_notifier(&cs_arch_nb_nmi);
+       return err;
+}
+
+static void __exit
+cs_arch_exit(void)
+{
+       unregister_die_notifier(&cs_arch_nb_nmi);
+       unregister_die_notifier(&cs_arch_nb_start);
+       unregister_die_notifier(&cs_arch_nb_end);
+       return;
+}
+
+module_init(cs_arch_init);
+module_exit(cs_arch_exit);
Index: linux/include/asm-ia64/crash_stop.h
===================================================================
--- /dev/null
+++ linux/include/asm-ia64/crash_stop.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_CRASH_STOP_H
+#define _ASM_CRASH_STOP_H
+
+struct crash_stop_running_process_arch
+{
+       struct switch_stack *sw;
+};
+
+extern int cs_arch_monarch_cpu;
+
+#endif /* _ASM_CRASH_STOP_H */
-
To unsubscribe from this list: send the line "unsubscribe linux-arch" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 2.6.19-rc5 9/12] crash_stop: ia64 specific code

Reply via email to