Module Name:    src
Committed By:   cliff
Date:           Mon Sep 20 19:42:31 UTC 2010

Modified Files:
        src/sys/arch/mips/rmi [matt-nb5-mips64]: rmixl_pcie.c rmixl_pcievar.h
            rmixl_pcix.c rmixl_pcixvar.h

Log Message:
- provide lockless interrupt dispatch by eliminating use of LIST(9)
for interrupt handles.  Handles are now managed in variable size arrays.
Establishing a new interrupt causes allocation of a new array, pointer
to which is changed atomically.  Old arrays are allowed to persist for
some time before free, allowing any CPU working with that data
to safely finish using it.
- interrupt events are now managed per-CPU, avoid need for atomic adds.


To generate a diff of this commit:
cvs rdiff -u -r1.1.2.15 -r1.1.2.16 src/sys/arch/mips/rmi/rmixl_pcie.c
cvs rdiff -u -r1.1.2.7 -r1.1.2.8 src/sys/arch/mips/rmi/rmixl_pcievar.h
cvs rdiff -u -r1.1.2.6 -r1.1.2.7 src/sys/arch/mips/rmi/rmixl_pcix.c
cvs rdiff -u -r1.1.2.2 -r1.1.2.3 src/sys/arch/mips/rmi/rmixl_pcixvar.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/mips/rmi/rmixl_pcie.c
diff -u src/sys/arch/mips/rmi/rmixl_pcie.c:1.1.2.15 src/sys/arch/mips/rmi/rmixl_pcie.c:1.1.2.16
--- src/sys/arch/mips/rmi/rmixl_pcie.c:1.1.2.15	Thu Aug 26 20:09:33 2010
+++ src/sys/arch/mips/rmi/rmixl_pcie.c	Mon Sep 20 19:42:31 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: rmixl_pcie.c,v 1.1.2.15 2010/08/26 20:09:33 rmind Exp $	*/
+/*	$NetBSD: rmixl_pcie.c,v 1.1.2.16 2010/09/20 19:42:31 cliff Exp $	*/
 
 /*
  * Copyright (c) 2001 Wasabi Systems, Inc.
@@ -40,7 +40,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rmixl_pcie.c,v 1.1.2.15 2010/08/26 20:09:33 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rmixl_pcie.c,v 1.1.2.16 2010/09/20 19:42:31 cliff Exp $");
 
 #include "opt_pci.h"
 #include "pci.h"
@@ -52,6 +52,8 @@
 #include <sys/device.h>
 #include <sys/extent.h>
 #include <sys/malloc.h>
+#include <sys/kernel.h>		/* for 'hz' */
+#include <sys/cpu.h>
 
 #include <uvm/uvm_extern.h>
 
@@ -158,6 +160,9 @@
 		((((uint64_t)RMIXL_PCIE_LINK_STATUS1_ERRORS) << 32) |	\
 		   (uint64_t)RMIXL_PCIE_LINK_STATUS0_ERRORS)
 
+#define RMIXL_PCIE_EVCNT(sc, link, bitno, cpu)	\
+		&(sc)->sc_evcnts[link][(bitno) * (ncpu) + (cpu)]
+
 static int	rmixl_pcie_match(device_t, cfdata_t, void *);
 static void	rmixl_pcie_attach(device_t, device_t, void *);
 static void	rmixl_pcie_init(struct rmixl_pcie_softc *);
@@ -195,6 +200,10 @@
 static void	rmixl_pcie_intr_disestablish(void *, void *);
 static void	*rmixl_pcie_intr_establish(void *, pci_intr_handle_t,
 		    int, int (*)(void *), void *);
+static rmixl_pcie_link_intr_t *
+		rmixl_pcie_lip_add_1(rmixl_pcie_softc_t *, u_int, int, int);
+static void	rmixl_pcie_lip_free_callout(rmixl_pcie_link_intr_t *);
+static void	rmixl_pcie_lip_free(void *);
 static int	rmixl_pcie_intr(void *);
 static void	rmixl_pcie_link_error_intr(u_int, uint32_t, uint32_t);
 #if defined(DEBUG) || defined(DDB)
@@ -203,12 +212,6 @@
 static int	_rmixl_pcie_error_check(void *);
 static int	rmixl_pcie_error_intr(void *);
 
-/*
- * XXX use locks
- */
-#define	PCI_CONF_LOCK(s)	(s) = splhigh()
-#define	PCI_CONF_UNLOCK(s)	splx((s))
-
 
 #define RMIXL_PCIE_CONCAT3(a,b,c) a ## b ## c
 #define RMIXL_PCIE_BAR_INIT(reg, bar, size, align) {			\
@@ -288,6 +291,8 @@
 
 	aprint_normal(" RMI XLS PCIe Interface\n");
 
+	mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_HIGH);
+
 	rmixl_pcie_lnkcfg(sc);
 
 	rmixl_pcie_intcfg(sc);
@@ -546,8 +551,9 @@
 static void
 rmixl_pcie_intcfg(struct rmixl_pcie_softc *sc)
 {
-	rmixl_pcie_link_intr_t *lip;
 	int link;
+	size_t size;
+	rmixl_pcie_evcnt_t *ev;
 
 	DPRINTF(("%s: disable all link interrupts\n", __func__));
 	for (link=0; link < sc->sc_pcie_lnktab.ncfgs; link++) {
@@ -556,11 +562,25 @@
 		RMIXL_IOREG_WRITE(RMIXL_IO_DEV_PCIE_LE + int_enb_offset[link].r1,
 			RMIXL_PCIE_LINK_STATUS1_ERRORS); 
 		RMIXL_IOREG_WRITE(RMIXL_IO_DEV_PCIE_LE + msi_enb_offset[link], 0); 
-		lip = &sc->sc_link_intr[link];
-		LIST_INIT(&lip->dispatch);
-		lip->ih = NULL;
-		lip->link = link;
-		lip->enabled = false;
+		sc->sc_link_intr[link] = NULL;
+
+		/*
+		 * allocate per-cpu, per-pin interrupt event counters
+		 */
+		size = ncpu * PCI_INTERRUPT_PIN_MAX * sizeof(rmixl_pcie_evcnt_t);
+		ev = malloc(size, M_DEVBUF, M_NOWAIT);
+		if (ev == NULL)
+			panic("%s: cannot malloc evcnts\n", __func__);
+		sc->sc_evcnts[link] = ev;
+		for (int pin=PCI_INTERRUPT_PIN_A; pin <= PCI_INTERRUPT_PIN_MAX; pin++) {
+			for (int cpu=0; cpu < ncpu; cpu++) {
+				ev = RMIXL_PCIE_EVCNT(sc, link, pin - 1, cpu);
+				snprintf(ev->name, sizeof(ev->name),
+					"cpu%d, link %d, pin %d", cpu, link, pin);
+				evcnt_attach_dynamic(&ev->evcnt, EVCNT_TYPE_INTR,
+					NULL, "rmixl_pcie", ev->name);
+			}
+		}
 	}
 }
 
@@ -956,9 +976,8 @@
 	bus_space_tag_t bst;
 	pcireg_t rv;
 	uint64_t cfg0;
-	u_int s;
 
-	PCI_CONF_LOCK(s);
+	mutex_enter(&sc->sc_mutex);
 
 	if (rmixl_pcie_conf_setup(sc, tag, &offset, &bst, &bsh) == 0) {
 		cfg0 = rmixl_cache_err_dis();
@@ -978,7 +997,8 @@
 		rv = -1;
 	}
 
-	PCI_CONF_UNLOCK(s);
+	mutex_exit(&sc->sc_mutex);
+
 	return rv;
 }
 
@@ -989,9 +1009,8 @@
 	static bus_space_handle_t bsh;
 	bus_space_tag_t bst;
 	uint64_t cfg0;
-	u_int s;
 
-	PCI_CONF_LOCK(s);
+	mutex_enter(&sc->sc_mutex);
 
 	if (rmixl_pcie_conf_setup(sc, tag, &offset, &bst, &bsh) == 0) {
 		cfg0 = rmixl_cache_err_dis();
@@ -1008,7 +1027,7 @@
 		rmixl_cache_err_restore(cfg0);
 	}
 
-	PCI_CONF_UNLOCK(s);
+	mutex_exit(&sc->sc_mutex);
 }
 
 int
@@ -1169,49 +1188,72 @@
 {
 	rmixl_pcie_softc_t *sc = v;
 	rmixl_pcie_link_dispatch_t *dip = ih;
-	rmixl_pcie_link_intr_t *lip = &sc->sc_link_intr[dip->link];;
+	rmixl_pcie_link_intr_t *lip = sc->sc_link_intr[dip->link];
 	uint32_t r;
 	uint32_t bit;
 	u_int offset;
 	u_int other;
+	bool busy;
 
 	DPRINTF(("%s: link=%d pin=%d irq=%d\n",
 		__func__, dip->link, dip->bitno + 1, dip->irq));
-	LIST_REMOVE(dip, next);
 
-	rmixl_intr_disestablish(lip->ih);
+	mutex_enter(&sc->sc_mutex);
 
-	if (dip->bitno < 32) {
-		bit = 1 << dip->bitno;
-		offset = int_enb_offset[dip->link].r0;
-		other  = int_enb_offset[dip->link].r1;
-	} else {
-		bit = 1 << (dip->bitno - 32);
-		offset = int_enb_offset[dip->link].r1;
-		other  = int_enb_offset[dip->link].r0;
-	}
-
-	/* disable this interrupt in the PCIe bridge */
-	r = RMIXL_IOREG_READ(RMIXL_IO_DEV_PCIE_LE + offset);
-	r &= ~bit;
-	RMIXL_IOREG_WRITE(RMIXL_IO_DEV_PCIE_LE + offset, r);
+	dip->func = NULL;	/* mark unused, prevent further dispatch */
 
 	/*
-	 * if both STATUS0 and STATUS1 are 0
-	 * mark the link interrupt disabled
+	 * if no other dispatch handle is using this interrupt,
+	 * we can disable it
 	 */
-	if (r == 0) {
-		/* check the other reg */
-		if (RMIXL_IOREG_READ(RMIXL_IO_DEV_PCIE_LE + other) == 0) {
-			lip->enabled = false;
-			DPRINTF(("%s: disabled link %d\n", __func__, lip->link));
+	busy = false;
+	for (int i=0; i < lip->dispatch_count; i++) {
+		rmixl_pcie_link_dispatch_t *d = &lip->dispatch_data[i];
+		if (d == dip)
+			continue;
+		if (d->bitno == dip->bitno) {
+			busy = true;
+			break;
 		}
 	}
+	if (! busy) {
+		if (dip->bitno < 32) {
+			bit = 1 << dip->bitno;
+			offset = int_enb_offset[dip->link].r0;
+			other  = int_enb_offset[dip->link].r1;
+		} else {
+			bit = 1 << (dip->bitno - 32);
+			offset = int_enb_offset[dip->link].r1;
+			other  = int_enb_offset[dip->link].r0;
+		}
+
+		/* disable this interrupt in the PCIe bridge */
+		r = RMIXL_IOREG_READ(RMIXL_IO_DEV_PCIE_LE + offset);
+		r &= ~bit;
+		RMIXL_IOREG_WRITE(RMIXL_IO_DEV_PCIE_LE + offset, r);
+
+		/*
+		 * if both ENABLE0 and ENABLE1 are 0
+		 * disable the link interrupt
+		 */
+		if (r == 0) {
+			/* check the other reg */
+			if (RMIXL_IOREG_READ(RMIXL_IO_DEV_PCIE_LE + other) == 0) {
+				DPRINTF(("%s: disable link %d\n", __func__, lip->link));
+
+				/* tear down interrupt on this link */
+				rmixl_intr_disestablish(lip->ih);
 
-	evcnt_detach(&dip->count);
+				/* commit NULL interrupt set */
+				sc->sc_link_intr[dip->link] = NULL;
 
-	free(dip, M_DEVBUF);
+				/* schedule delayed free of the old link interrupt set */
+				rmixl_pcie_lip_free_callout(lip);
+			}
+		}
+	}
 
+	mutex_exit(&sc->sc_mutex);
 }
 
 static void *
@@ -1225,7 +1267,6 @@
 	rmixl_pcie_link_dispatch_t *dip = NULL;
 	uint32_t bit;
 	u_int offset;
-	int s;
 
 	if (pih == ~0) {
 		DPRINTF(("%s: bad pih=%#lx, implies PCI_INTERRUPT_PIN_NONE\n",
@@ -1234,55 +1275,25 @@
 	}
 
 	rmixl_pcie_decompose_pih(pih, &link, &bitno, &irq);
-	DPRINTF(("%s: link=%d pin=%d irq=%d\n", __func__, link, bitno + 1, irq));
-
-	lip = &sc->sc_link_intr[link];
-
-	s = splhigh();
+	DPRINTF(("%s: link=%d pin=%d irq=%d\n",
+		__func__, link, bitno + 1, irq));
 
-#ifdef DEBUG
-	LIST_FOREACH(dip, &lip->dispatch, next) {
-		if (dip->bitno == bitno)
-			panic("%s: pin %d alread on dispatch list",
-				__func__, bitno + 1);
-	}
-#endif
+	mutex_enter(&sc->sc_mutex);
 
-	/*
-	 * all intrs on a link get same ipl and sc
-	 * first intr established sets the standard
-	 */
-	if (lip->enabled == true) {
-		KASSERT(sc == lip->sc);
-		if (sc != lip->sc) {
-			printf("%s: sc %p mismatch\n", __func__, sc); 
-			goto out;
-		}
-		KASSERT(ipl == lip->ipl);
-		if (ipl != lip->ipl) {
-			printf("%s: ipl %d mismatch\n", __func__, ipl); 
-			goto out;
-		}
-	}
+	lip = rmixl_pcie_lip_add_1(sc, link, irq, ipl);
+	if (lip == NULL)
+		return NULL;
 
 	/*
-	 * allocate and initialize a dispatch handle
+	 * initializae our new interrupt, the last element in dispatch_data[]
 	 */
-	dip = malloc(sizeof(*dip), M_DEVBUF, M_NOWAIT);
-	if (dip == NULL) {
-		printf("%s: cannot malloc dispatch handle\n", __func__);
-		goto out;
-	}
-
+	dip = &lip->dispatch_data[lip->dispatch_count - 1];
 	dip->link = link;
 	dip->bitno = bitno;
 	dip->irq = irq;
 	dip->func = func;
 	dip->arg = arg;
-	snprintf(dip->count_name, sizeof(dip->count_name),
-		"link %d, pin %d", link, bitno + 1);
-	evcnt_attach_dynamic(&dip->count, EVCNT_TYPE_INTR, NULL,
-		"rmixl_pcie", dip->count_name);
+	dip->counts = RMIXL_PCIE_EVCNT(sc, link, bitno, 0);
 
 	if (bitno < 32) {
 		offset = int_enb_offset[link].r0;
@@ -1292,28 +1303,123 @@
 		bit = 1 << (bitno - 32);
 	}
 
+	/* commit the new link interrupt set */
+	sc->sc_link_intr[link] = lip;
+
 	/* enable this interrupt in the PCIe bridge */
 	r = RMIXL_IOREG_READ(RMIXL_IO_DEV_PCIE_LE + offset); 
 	r |= bit;
 	RMIXL_IOREG_WRITE(RMIXL_IO_DEV_PCIE_LE + offset, r); 
 
-	if (lip->enabled == false) {
-		lip->ih = rmixl_intr_establish(irq, sc->sc_tmsk,
+	mutex_exit(&sc->sc_mutex);
+	return dip;
+}
+
+rmixl_pcie_link_intr_t *
+rmixl_pcie_lip_add_1(rmixl_pcie_softc_t *sc, u_int link, int irq, int ipl)
+{
+	rmixl_pcie_link_intr_t *lip_old = sc->sc_link_intr[link];
+	rmixl_pcie_link_intr_t *lip_new;
+	u_int dispatch_count;
+	size_t size;
+
+	dispatch_count = 1;
+	size = sizeof(rmixl_pcie_link_intr_t);
+	if (lip_old != NULL) {
+		/*
+		 * count only those dispatch elements still in use
+		 * unused ones will be pruned during copy
+		 * i.e. we are "lazy" there is no rmixl_pcie_lip_sub_1
+                 */     
+		for (int i=0; i < lip_old->dispatch_count; i++) {
+			if (lip_old->dispatch_data[i].func != NULL) {
+				dispatch_count++;
+				size += sizeof(rmixl_pcie_link_intr_t);
+			}
+		}
+	}
+
+	/*
+	 * allocate and initialize link intr struct
+	 * with one or more dispatch handles
+	 */
+	lip_new = malloc(size, M_DEVBUF, M_NOWAIT);
+	if (lip_new == NULL) {
+#ifdef DIAGNOSTIC
+		printf("%s: cannot malloc\n", __func__);
+#endif
+		return NULL;
+	}
+
+	if (lip_old == NULL) {
+		/* initialize the link interrupt struct */
+		lip_new->sc = sc;
+		lip_new->link = link;
+		lip_new->ipl = ipl;
+		lip_new->ih = rmixl_intr_establish(irq, sc->sc_tmsk,
 			ipl, RMIXL_TRIG_LEVEL, RMIXL_POLR_HIGH,
-			rmixl_pcie_intr, lip, false);
-		if (lip->ih == NULL)
+			rmixl_pcie_intr, lip_new, false);
+		if (lip_new->ih == NULL)
 			panic("%s: cannot establish irq %d", __func__, irq);
+	} else {
+		/*
+		 * all intrs on a link get same ipl and sc
+		 * first intr established sets the standard
+		 */
+		KASSERT(sc == lip_old->sc);
+		if (sc != lip_old->sc) {
+			printf("%s: sc %p mismatch\n", __func__, sc); 
+			free(lip_new, M_DEVBUF);
+			return NULL;
+		}
+		KASSERT (ipl == lip_old->ipl);
+		if (ipl != lip_old->ipl) {
+			printf("%s: ipl %d mismatch\n", __func__, ipl); 
+			free(lip_new, M_DEVBUF);
+			return NULL;
+		}
+		/*
+		 * copy lip_old to lip_new, skipping unused dispatch elemets
+		 */
+		memcpy(lip_new, lip_old, sizeof(rmixl_pcie_link_intr_t));
+		for (int j=0, i=0; i < lip_old->dispatch_count; i++) {
+			if (lip_old->dispatch_data[i].func != NULL) {
+				memcpy(&lip_new->dispatch_data[j],
+					&lip_old->dispatch_data[i],
+					sizeof(rmixl_pcie_link_dispatch_t));
+				j++;
+			}
+		}
 
-		lip->sc = sc;
-		lip->ipl = ipl;
-		lip->enabled = true;
-		DPRINTF(("%s: enabled link %d\n", __func__, link));
+		/*
+		 * schedule delayed free of old link interrupt set
+		 */
+		rmixl_pcie_lip_free_callout(lip_old);
 	}
-	LIST_INSERT_HEAD(&lip->dispatch, dip, next);
+	lip_new->dispatch_count = dispatch_count;
 
- out:
-	splx(s);
-	return dip;
+	return lip_new;
+}
+
+/*
+ * delay free of the old link interrupt set
+ * to allow anyone still using it to do so safely
+ * XXX 2 seconds should be plenty?
+ */
+static void
+rmixl_pcie_lip_free_callout(rmixl_pcie_link_intr_t *lip)
+{
+	callout_init(&lip->callout, 0);
+	callout_reset(&lip->callout, 2 * hz, rmixl_pcie_lip_free, lip);
+}
+
+static void
+rmixl_pcie_lip_free(void *arg)
+{
+	rmixl_pcie_link_intr_t *lip = arg;
+	
+	callout_destroy(&lip->callout);
+	free(lip, M_DEVBUF);
 }
 
 static int
@@ -1334,12 +1440,16 @@
 		if (status & RMIXL_PCIE_LINK_STATUS_ERRORS)
 			rmixl_pcie_link_error_intr(link, status0, status1);
 
-		LIST_FOREACH(dip, &lip->dispatch, next) {
-			uint64_t bit = 1 << dip->bitno;
-			if ((status & bit) != 0) {
-				(void)(*dip->func)(dip->arg);
-				dip->count.ev_count++;
-				rv = 1;
+		for (u_int i=0; i < lip->dispatch_count; i++) {
+			dip = &lip->dispatch_data[i];
+			int (*func)(void *) = dip->func;
+			if (func != NULL) {
+				uint64_t bit = 1 << dip->bitno;
+				if ((status & bit) != 0) {
+					(void)(*func)(dip->arg);
+					dip->counts[cpu_index(curcpu())].evcnt.ev_count++;
+					rv = 1;
+				}
 			}
 		}
 	}

Index: src/sys/arch/mips/rmi/rmixl_pcievar.h
diff -u src/sys/arch/mips/rmi/rmixl_pcievar.h:1.1.2.7 src/sys/arch/mips/rmi/rmixl_pcievar.h:1.1.2.8
--- src/sys/arch/mips/rmi/rmixl_pcievar.h:1.1.2.7	Tue Apr 13 18:15:16 2010
+++ src/sys/arch/mips/rmi/rmixl_pcievar.h	Mon Sep 20 19:42:31 2010
@@ -1,4 +1,4 @@
-/*      $NetBSD: rmixl_pcievar.h,v 1.1.2.7 2010/04/13 18:15:16 cliff Exp $	*/
+/*      $NetBSD: rmixl_pcievar.h,v 1.1.2.8 2010/09/20 19:42:31 cliff Exp $	*/
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
  * All rights reserved.
@@ -50,26 +50,31 @@
 	const rmixl_pcie_lnkcfg_t *cfg;
 } rmixl_pcie_lnktab_t;
 
+typedef struct rmixl_pcie_evcnt {
+	struct evcnt evcnt;
+	char name[32];
+} rmixl_pcie_evcnt_t;
+
 typedef struct rmixl_pcie_link_dispatch {
-	LIST_ENTRY(rmixl_pcie_link_dispatch) next;
 	int (*func)(void *);
 	void *arg;
 	u_int link;
 	u_int bitno;
 	u_int irq;
-	struct evcnt count;
-	char count_name[32];
+	rmixl_pcie_evcnt_t *counts;	/* index by cpu */
 } rmixl_pcie_link_dispatch_t;
 
 struct rmixl_pcie_softc;
 
 typedef struct rmixl_pcie_link_intr {
 	struct rmixl_pcie_softc *sc;
-	LIST_HEAD(, rmixl_pcie_link_dispatch) dispatch;
 	u_int link;
 	u_int ipl;
-	bool enabled;
 	void *ih;			/* mips interrupt handle */
+	callout_t callout;		/* for delayed free of this struct */
+	u_int dispatch_count;
+	rmixl_pcie_link_dispatch_t  dispatch_data[1];
+					/* variable length */
 } rmixl_pcie_link_intr_t;
 
 #define RMIXL_PCIE_NLINKS_MAX	4
@@ -83,9 +88,11 @@
 	bus_dma_tag_t			sc_32bit_dmat;
 	bus_dma_tag_t			sc_64bit_dmat;
 	rmixl_pcie_lnktab_t		sc_pcie_lnktab;
+	kmutex_t			sc_mutex;
 	int				sc_tmsk;
 	void 			       *sc_fatal_ih;
-	rmixl_pcie_link_intr_t		sc_link_intr[RMIXL_PCIE_NLINKS_MAX];
+	rmixl_pcie_evcnt_t	       *sc_evcnts[RMIXL_PCIE_NLINKS_MAX];
+	rmixl_pcie_link_intr_t	       *sc_link_intr[RMIXL_PCIE_NLINKS_MAX];
 } rmixl_pcie_softc_t;
 
 

Index: src/sys/arch/mips/rmi/rmixl_pcix.c
diff -u src/sys/arch/mips/rmi/rmixl_pcix.c:1.1.2.6 src/sys/arch/mips/rmi/rmixl_pcix.c:1.1.2.7
--- src/sys/arch/mips/rmi/rmixl_pcix.c:1.1.2.6	Thu Aug 26 20:09:33 2010
+++ src/sys/arch/mips/rmi/rmixl_pcix.c	Mon Sep 20 19:42:31 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: rmixl_pcix.c,v 1.1.2.6 2010/08/26 20:09:33 rmind Exp $	*/
+/*	$NetBSD: rmixl_pcix.c,v 1.1.2.7 2010/09/20 19:42:31 cliff Exp $	*/
 
 /*
  * Copyright (c) 2001 Wasabi Systems, Inc.
@@ -40,7 +40,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rmixl_pcix.c,v 1.1.2.6 2010/08/26 20:09:33 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rmixl_pcix.c,v 1.1.2.7 2010/09/20 19:42:31 cliff Exp $");
 
 #include "opt_pci.h"
 #include "pci.h"
@@ -52,6 +52,8 @@
 #include <sys/device.h>
 #include <sys/extent.h>
 #include <sys/malloc.h>
+#include <sys/kernel.h>		/* for 'hz' */
+#include <sys/cpu.h>
 
 #include <uvm/uvm_extern.h>
 
@@ -189,12 +191,6 @@
 #define RMIXL_PCIXREG_READ(o)     (*RMIXL_PCIXREG_VADDR(o))
 #define RMIXL_PCIXREG_WRITE(o,v)  *RMIXL_PCIXREG_VADDR(o) = (v)
 
-/*
- * XXX use locks
- */
-#define	PCI_CONF_LOCK(s)	(s) = splhigh()
-#define	PCI_CONF_UNLOCK(s)	splx((s))
-
 
 #define RMIXL_PCIX_CONCAT3(a,b,c) a ## b ## c
 #define RMIXL_PCIX_BAR_INIT(reg, bar, size, align) {			\
@@ -222,6 +218,11 @@
 	DPRINTF(("%s: %s BAR %#x\n", __func__, __STRING(reg), bar));	\
 }
 
+
+#define RMIXL_PCIX_EVCNT(sc, intrpin, cpu)	\
+	&(sc)->sc_evcnts[(intrpin) * (ncpu) + (cpu)]
+
+
 static int	rmixl_pcix_match(device_t, cfdata_t, void *);
 static void	rmixl_pcix_attach(device_t, device_t, void *);
 static void	rmixl_pcix_init(rmixl_pcix_softc_t *);
@@ -253,6 +254,10 @@
 static void	rmixl_pcix_intr_disestablish(void *, void *);
 static void	*rmixl_pcix_intr_establish(void *, pci_intr_handle_t,
 		    int, int (*)(void *), void *);
+static rmixl_pcix_intr_t *
+                rmixl_pcix_pip_add_1(rmixl_pcix_softc_t *, int, int);
+static void     rmixl_pcix_pip_free_callout(rmixl_pcix_intr_t *);
+static void     rmixl_pcix_pip_free(void *);
 static int	rmixl_pcix_intr(void *);
 static int	rmixl_pcix_error_intr(void *);
 
@@ -263,10 +268,6 @@
 
 static int rmixl_pcix_found;
 
-#ifdef DIAGNOSTIC
-static rmixl_pcix_softc_t *rmixl_pcix_sc;
-#endif
-
 
 static int  
 rmixl_pcix_match(device_t parent, cfdata_t cf, void *aux)
@@ -307,9 +308,6 @@
 	uint32_t bar;
 
 	rmixl_pcix_found = 1;
-#ifdef DIAGNOSTIC
-	rmixl_pcix_sc = sc;
-#endif
 	sc->sc_dev = self;
 	sc->sc_29bit_dmat = obio->obio_29bit_dmat;
 	sc->sc_32bit_dmat = obio->obio_32bit_dmat;
@@ -482,6 +480,9 @@
 static void
 rmixl_pcix_intcfg(rmixl_pcix_softc_t *sc)
 {
+	size_t size;
+	rmixl_pcix_evcnt_t *ev;
+
 	DPRINTF(("%s\n", __func__));
 
 	/* mask all interrupts until they are established */
@@ -495,28 +496,35 @@
 	(void)RMIXL_PCIXREG_READ(RMIXL_PCIX_ECFG_INTR_STATUS); 
 	(void)RMIXL_PCIXREG_READ(RMIXL_PCIX_ECFG_INTR_ERR_STATUS); 
 
-	/* initialize the dispatch handles */
-	for (int i=0; i < RMIXL_PCIX_NINTR; i++) {
-		rmixl_pcix_intr_t *ih = &sc->sc_intr[i];
-		LIST_INIT(&ih->dispatch);
-		ih->ih = NULL;
-		ih->intrpin = i;
-		ih->enabled = false;
-	}
+	/* initialize the (non-error interrupt) dispatch handles */
+	sc->sc_intr = NULL;
 
-	sc->sc_ih = rmixl_intr_establish(16, sc->sc_tmsk,
-		IPL_VM, RMIXL_TRIG_LEVEL, RMIXL_POLR_HIGH,
-		rmixl_pcix_intr, sc, false);
-	if (sc->sc_ih == NULL)
-		panic("%s: cannot establish irq %d", __func__, 16);
+	/*
+	 * allocate per-cpu, per-pin interrupt event counters
+	 */
+	size = ncpu * PCI_INTERRUPT_PIN_MAX * sizeof(rmixl_pcix_evcnt_t);
+	ev = malloc(size, M_DEVBUF, M_NOWAIT);
+	if (ev == NULL)
+		panic("%s: cannot malloc evcnts\n", __func__);
+	sc->sc_evcnts = ev;
+	for (int pin=PCI_INTERRUPT_PIN_A; pin <= PCI_INTERRUPT_PIN_MAX; pin++) {
+		for (int cpu=0; cpu < ncpu; cpu++) {
+			ev = RMIXL_PCIX_EVCNT(sc, pin - 1, cpu);
+			snprintf(ev->name, sizeof(ev->name),
+				"cpu%d, pin %d", cpu, pin);
+			evcnt_attach_dynamic(&ev->evcnt, EVCNT_TYPE_INTR,
+				NULL, "rmixl_pcix", ev->name);
+		}
+	}
 
+	/*
+	 * establish PCIX error interrupt handler
+	 */
 	sc->sc_fatal_ih = rmixl_intr_establish(24, sc->sc_tmsk,
 		IPL_VM, RMIXL_TRIG_LEVEL, RMIXL_POLR_HIGH,
 		rmixl_pcix_error_intr, sc, false);
 	if (sc->sc_fatal_ih == NULL)
 		panic("%s: cannot establish irq %d", __func__, 24);
-
-	sc->sc_intr_init_done = true;
 }
 
 static void
@@ -702,9 +710,8 @@
 	bus_space_tag_t bst;
 	pcireg_t rv;
 	uint64_t cfg0;
-	u_int s;
 
-	PCI_CONF_LOCK(s);
+	mutex_enter(&sc->sc_mutex);
 
 	if (rmixl_pcix_conf_setup(sc, tag, &offset, &bst, &bsh) == 0) {
 		cfg0 = rmixl_cache_err_dis();
@@ -724,7 +731,8 @@
 		rv = -1;
 	}
 
-	PCI_CONF_UNLOCK(s);
+	mutex_exit(&sc->sc_mutex);
+
 	return rv;
 }
 
@@ -735,9 +743,8 @@
 	static bus_space_handle_t bsh;
 	bus_space_tag_t bst;
 	uint64_t cfg0;
-	u_int s;
 
-	PCI_CONF_LOCK(s);
+	mutex_enter(&sc->sc_mutex);
 
 	if (rmixl_pcix_conf_setup(sc, tag, &offset, &bst, &bsh) == 0) {
 		cfg0 = rmixl_cache_err_dis();
@@ -754,7 +761,7 @@
 		rmixl_cache_err_restore(cfg0);
 	}
 
-	PCI_CONF_UNLOCK(s);
+	mutex_exit(&sc->sc_mutex);
 }
 
 int
@@ -826,17 +833,32 @@
 {
 	rmixl_pcix_softc_t *sc = v;
 	rmixl_pcix_dispatch_t *dip = ih;
-	rmixl_pcix_intr_t *pip = &sc->sc_intr[dip->bitno];;
+	rmixl_pcix_intr_t *pip = sc->sc_intr;
+	bool busy;
 
 	DPRINTF(("%s: pin=%d irq=%d\n",
 		__func__, dip->bitno + 1, dip->irq));
 	KASSERT(dip->bitno < RMIXL_PCIX_NINTR);
 
-	LIST_REMOVE(dip, next);
-	evcnt_detach(&dip->count);
-	free(dip, M_DEVBUF);
+	mutex_enter(&sc->sc_mutex);
+
+	dip->func = NULL;	/* prevent further dispatch */
 
-	if (LIST_EMPTY(&pip->dispatch)) {
+	/*
+	 * if no other dispatch handle is using this interrupt,
+	 * we can disable it
+	 */
+	busy = false;
+	for (int i=0; i < pip->dispatch_count; i++) {
+		rmixl_pcix_dispatch_t *d = &pip->dispatch_data[i];
+		if (d == dip)
+			continue;
+		if (d->bitno == dip->bitno) {
+			busy = true;
+			break;
+		}
+	}
+	if (! busy) {
 		uint32_t bit = 1 << (dip->bitno + 2);
 		uint32_t r;
 
@@ -845,8 +867,21 @@
 		RMIXL_PCIXREG_WRITE(RMIXL_PCIX_ECFG_INTR_CONTROL, r);
 		DPRINTF(("%s: disabled pin %d\n", __func__, dip->bitno + 1));
 
-		pip->enabled = false;
+		pip->intenb &= ~(1 << dip->bitno);
+
+		if ((r & PCIX_INTR_CONTROL_MASK_ALL) == 0) {
+			/* tear down interrupt for this pcix */
+			rmixl_intr_disestablish(pip->ih);
+
+			/* commit NULL interrupt set */
+			sc->sc_intr = NULL;
+
+			/* schedule delayed free of the old interrupt set */
+			rmixl_pcix_pip_free_callout(pip);
+		}
 	}
+
+	mutex_exit(&sc->sc_mutex);
 }
 
 static void *
@@ -856,8 +891,7 @@
 	rmixl_pcix_softc_t *sc = v;
 	u_int bitno, irq;
 	rmixl_pcix_intr_t *pip;
-	rmixl_pcix_dispatch_t *dip;
-	int s;
+	rmixl_pcix_dispatch_t *dip = NULL;
 
 	if (pih == ~0) {
 		DPRINTF(("%s: bad pih=%#lx, implies PCI_INTERRUPT_PIN_NONE\n",
@@ -869,35 +903,38 @@
 	DPRINTF(("%s: pin=%d irq=%d\n", __func__, bitno + 1, irq));
 
 	KASSERT(bitno < RMIXL_PCIX_NINTR);
-	pip = &sc->sc_intr[bitno];
-
-	s = splhigh();
 
 	/*
-	 * all PCI-X device intrs get same ipl and sc
+	 * all PCI-X device intrs get same ipl
 	 */
-	KASSERT(sc == rmixl_pcix_sc);
 	KASSERT(ipl == IPL_VM);
 
+	mutex_enter(&sc->sc_mutex);
+
+	pip = rmixl_pcix_pip_add_1(sc, irq, ipl); 
+	if (pip == NULL)
+		return NULL;
+
 	/*
-	 * allocate and initialize a dispatch handle
+	 * initializae our new interrupt, the last element in dispatch_data[] 
 	 */
-	dip = malloc(sizeof(*dip), M_DEVBUF, M_NOWAIT);
-	if (dip == NULL) {
-		printf("%s: cannot malloc dispatch handle\n", __func__);
-		goto out;
-	}
-
+	dip = &pip->dispatch_data[pip->dispatch_count - 1];
 	dip->bitno = bitno;
 	dip->irq = irq;
 	dip->func = func;
 	dip->arg = arg;
+#if NEVER
 	snprintf(dip->count_name, sizeof(dip->count_name),
 		"pin %d", bitno + 1);
 	evcnt_attach_dynamic(&dip->count, EVCNT_TYPE_INTR, NULL,
 		"rmixl_pcix", dip->count_name);
+#endif
+
+	/* commit the new interrupt set */
+	sc->sc_intr = pip;
 
-	if (pip->enabled == false) {
+	/* enable this interrupt in the PCIX controller, if necessary */
+	if ((pip->intenb & (1 << bitno)) == 0) {
 		uint32_t bit = 1 << (bitno + 2);
 		uint32_t r;
 
@@ -907,37 +944,138 @@
 
 		pip->sc = sc;
 		pip->ipl = ipl;
-		pip->enabled = true;
+		pip->intenb |= 1 << bitno;
 		DPRINTF(("%s: enabled pin %d\n", __func__, bitno + 1));
 	}
 
-	LIST_INSERT_HEAD(&pip->dispatch, dip, next);
-
- out:
-	splx(s);
+	mutex_exit(&sc->sc_mutex);
 	return dip;
 }
 
+rmixl_pcix_intr_t *
+rmixl_pcix_pip_add_1(rmixl_pcix_softc_t *sc, int irq, int ipl)
+{
+	rmixl_pcix_intr_t *pip_old = sc->sc_intr;
+	rmixl_pcix_intr_t *pip_new;
+	u_int dispatch_count;
+	size_t size;
+
+	dispatch_count = 1;
+	size = sizeof(rmixl_pcix_intr_t);
+	if (pip_old != NULL) {
+		/*
+		 * count only those dispatch elements still in use
+		 * unused ones will be pruned during copy
+		 * i.e. we are "lazy" there is no rmixl_pcix_pip_sub_1
+		 */
+		for (int i=0; i < pip_old->dispatch_count; i++) {
+			if (pip_old->dispatch_data[i].func != NULL) {
+				dispatch_count++;
+				size += sizeof(rmixl_pcix_intr_t);
+			}
+		}
+	}
+
+	/*
+	 * allocate and initialize softc intr struct
+	 * with one or more dispatch handles
+	 */
+	pip_new = malloc(size, M_DEVBUF, M_NOWAIT);
+	if (pip_new == NULL) {
+#ifdef DIAGNOSTIC
+		printf("%s: cannot malloc\n", __func__);
+#endif
+		return NULL;
+	}
+
+	if (pip_old == NULL) {
+		/* initialize the interrupt struct */
+		pip_new->sc = sc;
+		pip_new->ipl = ipl;
+		pip_new->ih = rmixl_intr_establish(irq, sc->sc_tmsk,
+			ipl, RMIXL_TRIG_LEVEL, RMIXL_POLR_HIGH,
+			rmixl_pcix_intr, pip_new, false);
+		if (pip_new->ih == NULL)
+			panic("%s: cannot establish irq %d", __func__, irq);
+	} else {
+		/*
+		 * all intrs on a softc get same ipl and sc
+		 * first intr established sets the standard
+		 */
+		KASSERT(sc == pip_old->sc);
+		if (sc != pip_old->sc) {
+			printf("%s: sc %p mismatch\n", __func__, sc); 
+			free(pip_new, M_DEVBUF);
+			return NULL;
+		}
+		KASSERT (ipl == pip_old->ipl);
+		if (ipl != pip_old->ipl) {
+			printf("%s: ipl %d mismatch\n", __func__, ipl); 
+			free(pip_new, M_DEVBUF);
+			return NULL;
+		}
+		/*
+		 * copy pip_old to pip_new, skipping unused dispatch elemets
+		 */
+		memcpy(pip_new, pip_old, sizeof(rmixl_pcix_intr_t));
+		for (int j=0, i=0; i < pip_old->dispatch_count; i++) {
+			if (pip_old->dispatch_data[i].func != NULL) {
+				memcpy(&pip_new->dispatch_data[j],
+					&pip_old->dispatch_data[i],
+					sizeof(rmixl_pcix_dispatch_t));
+				j++;
+			}
+		}
+
+		/*
+		 * schedule delayed free of old interrupt set
+		 */
+		rmixl_pcix_pip_free_callout(pip_old);
+	}
+	pip_new->dispatch_count = dispatch_count;
+
+	return pip_new;
+}
+
+/*
+ * delay free of the old interrupt set
+ * to allow anyone still using it to do so safely
+ * XXX 2 seconds should be plenty?
+ */
+static void
+rmixl_pcix_pip_free_callout(rmixl_pcix_intr_t *pip)
+{
+	callout_init(&pip->callout, 0);
+	callout_reset(&pip->callout, 2 * hz, rmixl_pcix_pip_free, pip);
+}       
+        
+static void
+rmixl_pcix_pip_free(void *arg)
+{       
+	rmixl_pcix_intr_t *pip = arg;
+
+	callout_destroy(&pip->callout);
+	free(pip, M_DEVBUF);
+}
+
 static int
 rmixl_pcix_intr(void *arg)
 {
-	rmixl_pcix_softc_t *sc = arg;
+	rmixl_pcix_intr_t *pip = arg;
 	int rv = 0;
 
 	uint32_t status = RMIXL_PCIXREG_READ(RMIXL_PCIX_ECFG_INTR_STATUS); 
 	DPRINTF(("%s: %#x\n", __func__, status));
 
 	if (status != 0) {
-		for (int i=0; i < RMIXL_PCIX_NINTR; i++) {
-			uint32_t bit = 1 << i;
-			if ((status & bit) != 0) {
-				rmixl_pcix_intr_t *pip = &sc->sc_intr[i];
-				rmixl_pcix_dispatch_t *dip;
-				LIST_FOREACH(dip, &pip->dispatch, next) {
-					(void)(*dip->func)(dip->arg);
-					dip->count.ev_count++;
-					rv = 1;
-				}
+		for (int i=0; i < pip->dispatch_count; i++) {
+			rmixl_pcix_dispatch_t *dip = &pip->dispatch_data[i];
+			uint32_t bit = 1 << dip->bitno;
+			int (*func)(void *) = dip->func;
+			if ((func != NULL) && (status & bit) != 0) {
+				(void)(*func)(dip->arg);
+				dip->counts[cpu_index(curcpu())].evcnt.ev_count++;
+				rv = 1;
 			}
 		}
 	}

Index: src/sys/arch/mips/rmi/rmixl_pcixvar.h
diff -u src/sys/arch/mips/rmi/rmixl_pcixvar.h:1.1.2.2 src/sys/arch/mips/rmi/rmixl_pcixvar.h:1.1.2.3
--- src/sys/arch/mips/rmi/rmixl_pcixvar.h:1.1.2.2	Tue Apr 13 18:15:16 2010
+++ src/sys/arch/mips/rmi/rmixl_pcixvar.h	Mon Sep 20 19:42:31 2010
@@ -1,4 +1,4 @@
-/*      $NetBSD: rmixl_pcixvar.h,v 1.1.2.2 2010/04/13 18:15:16 cliff Exp $	*/
+/*      $NetBSD: rmixl_pcixvar.h,v 1.1.2.3 2010/09/20 19:42:31 cliff Exp $	*/
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
  * All rights reserved.
@@ -33,25 +33,31 @@
 
 #include <dev/pci/pcivar.h>
 
+typedef struct rmixl_pcix_evcnt {
+	struct evcnt evcnt;
+	char name[32];
+} rmixl_pcix_evcnt_t;
+
 typedef struct rmixl_pcix_dispatch {
-	LIST_ENTRY(rmixl_pcix_dispatch) next;
 	int (*func)(void *);
 	void *arg;
 	u_int bitno;
 	u_int irq;
-	struct evcnt count;
-	char count_name[32];
+	rmixl_pcix_evcnt_t *counts;	/* index by cpu */
 } rmixl_pcix_dispatch_t;
 
 struct rmixl_pcix_softc;
 
 typedef struct rmixl_pcix_intr {
 	struct rmixl_pcix_softc *sc;
-	LIST_HEAD(, rmixl_pcix_dispatch) dispatch;
 	u_int intrpin;
 	u_int ipl;
-	bool enabled;
 	void *ih;			/* mips interrupt handle */
+	callout_t callout;		/* for delayed free of this struct */
+	u_int intenb;			/* enabled flags for INT[ABCD] */
+	u_int dispatch_count;
+	rmixl_pcix_dispatch_t dispatch_data[1];
+					/* variable length */
 } rmixl_pcix_intr_t;
 
 #define RMIXL_PCIX_NINTR	4	/* PCI INT[A,B,C,D] */
@@ -64,11 +70,11 @@
 	bus_dma_tag_t			sc_29bit_dmat;
 	bus_dma_tag_t			sc_32bit_dmat;
 	bus_dma_tag_t			sc_64bit_dmat;
+	kmutex_t			sc_mutex;
 	int				sc_tmsk;
 	void 			       *sc_fatal_ih;
-	void 			       *sc_ih;
-	rmixl_pcix_intr_t		sc_intr[RMIXL_PCIX_NINTR];
-	bool				sc_intr_init_done;	
+        rmixl_pcix_evcnt_t             *sc_evcnts;
+	rmixl_pcix_intr_t	       *sc_intr;
 } rmixl_pcix_softc_t;
 
 

Reply via email to