Module Name:    src
Committed By:   tls
Date:           Wed Sep 12 06:15:36 UTC 2012

Modified Files:
        src/sys/arch/amd64/include [tls-maxphys]: param.h
        src/sys/arch/i386/pnpbios [tls-maxphys]: fdc_pnpbios.c lpt_pnpbios.c
            pciide_pnpbios.c pnpbios.c
        src/sys/dev/acpi [tls-maxphys]: acpi.c
        src/sys/dev/ic [tls-maxphys]: mpt_netbsd.c mpt_netbsd.h
        src/sys/dev/isa [tls-maxphys]: isa.c
        src/sys/dev/pci [tls-maxphys]: amr.c mlyvar.h mpt_pci.c pci.c pciide.c
        src/sys/dev/scsipi [tls-maxphys]: cd.c sd.c ss.c
        src/sys/kern [tls-maxphys]: kern_physio.c subr_autoconf.c subr_disk.c
            sys_descrip.c vfs_vnops.c vfs_wapbl.c
        src/sys/miscfs/genfs [tls-maxphys]: genfs_io.c
        src/sys/sys [tls-maxphys]: device.h disk.h mount.h
        src/sys/ufs/ffs [tls-maxphys]: ffs_vfsops.c
        src/sys/uvm [tls-maxphys]: uvm_io.c uvm_map.c uvm_readahead.c
            uvm_readahead.h
Added Files:
        src [tls-maxphys]: MAXPHYS-NOTES

Log Message:
Initial snapshot of work to eliminate 64K MAXPHYS.  Basically works for
physio (I/O to raw devices); needs more doing to get it going with the
filesystems, but it shouldn't damage data.

All work's been done on amd64 so far.  Not hard to add support to other
ports.  If others want to pitch in, one very helpful thing would be to
sort out when and how IDE disks can do 128K or larger transfers, and
adjust the various PCI IDE (or at least ahcisata) drivers and wd.c
accordingly -- it would make testing much easier.  Another very helpful
thing would be to implement a smart minphys() for RAIDframe along the
lines detailed in the MAXPHYS-NOTES file.


To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1.2.1 src/MAXPHYS-NOTES
cvs rdiff -u -r1.18 -r1.18.2.1 src/sys/arch/amd64/include/param.h
cvs rdiff -u -r1.17 -r1.17.6.1 src/sys/arch/i386/pnpbios/fdc_pnpbios.c
cvs rdiff -u -r1.12 -r1.12.12.1 src/sys/arch/i386/pnpbios/lpt_pnpbios.c
cvs rdiff -u -r1.30 -r1.30.2.1 src/sys/arch/i386/pnpbios/pciide_pnpbios.c
cvs rdiff -u -r1.71 -r1.71.12.1 src/sys/arch/i386/pnpbios/pnpbios.c
cvs rdiff -u -r1.254 -r1.254.2.1 src/sys/dev/acpi/acpi.c
cvs rdiff -u -r1.18 -r1.18.2.1 src/sys/dev/ic/mpt_netbsd.c
cvs rdiff -u -r1.10 -r1.10.2.1 src/sys/dev/ic/mpt_netbsd.h
cvs rdiff -u -r1.138 -r1.138.18.1 src/sys/dev/isa/isa.c
cvs rdiff -u -r1.55 -r1.55.2.1 src/sys/dev/pci/amr.c
cvs rdiff -u -r1.5 -r1.5.44.1 src/sys/dev/pci/mlyvar.h
cvs rdiff -u -r1.22 -r1.22.2.1 src/sys/dev/pci/mpt_pci.c
cvs rdiff -u -r1.142 -r1.142.12.1 src/sys/dev/pci/pci.c
cvs rdiff -u -r1.219 -r1.219.18.1 src/sys/dev/pci/pciide.c
cvs rdiff -u -r1.309 -r1.309.2.1 src/sys/dev/scsipi/cd.c
cvs rdiff -u -r1.298 -r1.298.2.1 src/sys/dev/scsipi/sd.c
cvs rdiff -u -r1.84 -r1.84.2.1 src/sys/dev/scsipi/ss.c
cvs rdiff -u -r1.92 -r1.92.14.1 src/sys/kern/kern_physio.c
cvs rdiff -u -r1.223 -r1.223.2.1 src/sys/kern/subr_autoconf.c
cvs rdiff -u -r1.100 -r1.100.18.1 src/sys/kern/subr_disk.c
cvs rdiff -u -r1.27 -r1.27.2.1 src/sys/kern/sys_descrip.c
cvs rdiff -u -r1.185 -r1.185.2.1 src/sys/kern/vfs_vnops.c
cvs rdiff -u -r1.52 -r1.52.2.1 src/sys/kern/vfs_wapbl.c
cvs rdiff -u -r1.55 -r1.55.2.1 src/sys/miscfs/genfs/genfs_io.c
cvs rdiff -u -r1.142 -r1.142.2.1 src/sys/sys/device.h
cvs rdiff -u -r1.57 -r1.57.2.1 src/sys/sys/disk.h
cvs rdiff -u -r1.207 -r1.207.6.1 src/sys/sys/mount.h
cvs rdiff -u -r1.278 -r1.278.2.1 src/sys/ufs/ffs/ffs_vfsops.c
cvs rdiff -u -r1.27 -r1.27.6.1 src/sys/uvm/uvm_io.c
cvs rdiff -u -r1.322 -r1.322.2.1 src/sys/uvm/uvm_map.c
cvs rdiff -u -r1.8 -r1.8.12.1 src/sys/uvm/uvm_readahead.c
cvs rdiff -u -r1.4 -r1.4.22.1 src/sys/uvm/uvm_readahead.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/include/param.h
diff -u src/sys/arch/amd64/include/param.h:1.18 src/sys/arch/amd64/include/param.h:1.18.2.1
--- src/sys/arch/amd64/include/param.h:1.18	Fri Apr 20 22:23:24 2012
+++ src/sys/arch/amd64/include/param.h	Wed Sep 12 06:15:31 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: param.h,v 1.18 2012/04/20 22:23:24 rmind Exp $	*/
+/*	$NetBSD: param.h,v 1.18.2.1 2012/09/12 06:15:31 tls Exp $	*/
 
 #ifdef __x86_64__
 
@@ -45,9 +45,11 @@
 #define	DEV_BSIZE	(1 << DEV_BSHIFT)
 #define	BLKDEV_IOSIZE	2048
 #ifndef	MAXPHYS
-#define	MAXPHYS		(64 * 1024)	/* max raw I/O transfer size */
+#define	MAXPHYS		(64 * 1024)	/* default I/O transfer size max */
 #endif
 
+#define	MACHINE_MAXPHYS	(1024 * 1024)	/* absolute I/O transfer size max */
+
 #define	SSIZE		1		/* initial stack size/NBPG */
 #define	SINCR		1		/* increment of stack/NBPG */
 #ifdef DIAGNOSTIC

Index: src/sys/arch/i386/pnpbios/fdc_pnpbios.c
diff -u src/sys/arch/i386/pnpbios/fdc_pnpbios.c:1.17 src/sys/arch/i386/pnpbios/fdc_pnpbios.c:1.17.6.1
--- src/sys/arch/i386/pnpbios/fdc_pnpbios.c:1.17	Thu Feb  2 19:42:59 2012
+++ src/sys/arch/i386/pnpbios/fdc_pnpbios.c	Wed Sep 12 06:15:32 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: fdc_pnpbios.c,v 1.17 2012/02/02 19:42:59 tls Exp $	*/
+/*	$NetBSD: fdc_pnpbios.c,v 1.17.6.1 2012/09/12 06:15:32 tls Exp $	*/
 
 /*-
  * Copyright (c) 2000 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fdc_pnpbios.c,v 1.17 2012/02/02 19:42:59 tls Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fdc_pnpbios.c,v 1.17.6.1 2012/09/12 06:15:32 tls Exp $");
 
 
 
@@ -93,6 +93,9 @@ fdc_pnpbios_attach(device_t parent, devi
 	fdc->sc_dev = self;
 	fdc->sc_ic = aa->ic;
 
+	/* This is really ISA DMA under the covers: clamp max transfer size */
+	self->dv_maxphys = MIN(parent->dv_maxphys, 64 * 1024);
+
 	if (pnpbios_io_map(aa->pbt, aa->resc, 0, &fdc->sc_iot,
             &pdc->sc_baseioh)) {
 		aprint_error_dev(self, "unable to map I/O space\n");

Index: src/sys/arch/i386/pnpbios/lpt_pnpbios.c
diff -u src/sys/arch/i386/pnpbios/lpt_pnpbios.c:1.12 src/sys/arch/i386/pnpbios/lpt_pnpbios.c:1.12.12.1
--- src/sys/arch/i386/pnpbios/lpt_pnpbios.c:1.12	Fri Jul  1 18:14:15 2011
+++ src/sys/arch/i386/pnpbios/lpt_pnpbios.c	Wed Sep 12 06:15:32 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: lpt_pnpbios.c,v 1.12 2011/07/01 18:14:15 dyoung Exp $ */
+/* $NetBSD: lpt_pnpbios.c,v 1.12.12.1 2012/09/12 06:15:32 tls Exp $ */
 /*
  * Copyright (c) 1999
  * 	Matthias Drochner.  All rights reserved.
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lpt_pnpbios.c,v 1.12 2011/07/01 18:14:15 dyoung Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lpt_pnpbios.c,v 1.12.12.1 2012/09/12 06:15:32 tls Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -77,6 +77,10 @@ lpt_pnpbios_attach(device_t parent, devi
 
 	sc->sc_dev = self;
 
+       /* Lest someone attach a parallel-port SCSI adapter etc:
+	  this is really ISA DMA under the covers: clamp max transfer size */
+        self->dv_maxphys = MIN(parent->dv_maxphys, 64 * 1024);
+
 	if (pnpbios_io_map(aa->pbt, aa->resc, 0, &sc->sc_iot, &sc->sc_ioh)) { 	
 		printf(": can't map i/o space\n");
 		return;

Index: src/sys/arch/i386/pnpbios/pciide_pnpbios.c
diff -u src/sys/arch/i386/pnpbios/pciide_pnpbios.c:1.30 src/sys/arch/i386/pnpbios/pciide_pnpbios.c:1.30.2.1
--- src/sys/arch/i386/pnpbios/pciide_pnpbios.c:1.30	Tue Jul 31 15:50:32 2012
+++ src/sys/arch/i386/pnpbios/pciide_pnpbios.c	Wed Sep 12 06:15:32 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: pciide_pnpbios.c,v 1.30 2012/07/31 15:50:32 bouyer Exp $	*/
+/*	$NetBSD: pciide_pnpbios.c,v 1.30.2.1 2012/09/12 06:15:32 tls Exp $	*/
 
 /*
  * Copyright (c) 1999 Soren S. Jorvang.  All rights reserved.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pciide_pnpbios.c,v 1.30 2012/07/31 15:50:32 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pciide_pnpbios.c,v 1.30.2.1 2012/09/12 06:15:32 tls Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -88,6 +88,9 @@ pciide_pnpbios_attach(device_t parent, d
 	int i, drive, size;
 	uint8_t idedma_ctl;
 
+	/* Clamp max transfer size - XXX how to do 128K on pciide? */
+	self->dv_maxphys = MIN(parent->dv_maxphys, IDEDMA_BYTE_COUNT_MAX);
+
 	sc->sc_wdcdev.sc_atac.atac_dev = self;
 
 	aprint_naive(": disk controller\n");

Index: src/sys/arch/i386/pnpbios/pnpbios.c
diff -u src/sys/arch/i386/pnpbios/pnpbios.c:1.71 src/sys/arch/i386/pnpbios/pnpbios.c:1.71.12.1
--- src/sys/arch/i386/pnpbios/pnpbios.c:1.71	Thu Jun 30 20:09:31 2011
+++ src/sys/arch/i386/pnpbios/pnpbios.c	Wed Sep 12 06:15:32 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: pnpbios.c,v 1.71 2011/06/30 20:09:31 wiz Exp $ */
+/* $NetBSD: pnpbios.c,v 1.71.12.1 2012/09/12 06:15:32 tls Exp $ */
 
 /*
  * Copyright (c) 2000 Jason R. Thorpe.  All rights reserved.
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pnpbios.c,v 1.71 2011/06/30 20:09:31 wiz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pnpbios.c,v 1.71.12.1 2012/09/12 06:15:32 tls Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -289,6 +289,9 @@ pnpbios_attach(device_t parent, device_t
 	aprint_naive("\n");
 
 	pnpbios_softc = sc;
+
+	/* We *don't* clamp xfer size here as both PCI and ISA devs
+	   may attach beneath us */
 	sc->sc_dev = self;
 	sc->sc_ic = paa->paa_ic;
 

Index: src/sys/dev/acpi/acpi.c
diff -u src/sys/dev/acpi/acpi.c:1.254 src/sys/dev/acpi/acpi.c:1.254.2.1
--- src/sys/dev/acpi/acpi.c:1.254	Tue Aug 14 14:38:02 2012
+++ src/sys/dev/acpi/acpi.c	Wed Sep 12 06:15:32 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: acpi.c,v 1.254 2012/08/14 14:38:02 jruoho Exp $	*/
+/*	$NetBSD: acpi.c,v 1.254.2.1 2012/09/12 06:15:32 tls Exp $	*/
 
 /*-
  * Copyright (c) 2003, 2007 The NetBSD Foundation, Inc.
@@ -100,7 +100,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: acpi.c,v 1.254 2012/08/14 14:38:02 jruoho Exp $");
+__KERNEL_RCSID(0, "$NetBSD: acpi.c,v 1.254.2.1 2012/09/12 06:15:32 tls Exp $");
 
 #include "opt_acpi.h"
 #include "opt_pcifixup.h"
@@ -438,6 +438,9 @@ acpi_attach(device_t parent, device_t se
 
 	acpi_unmap_rsdt(rsdt);
 
+	/* Clamp the max transfer size - assume LPC devs may be beneath us. */
+	self->dv_maxphys = MIN(parent->dv_maxphys, 64 * 1024);
+
 	sc->sc_dev = self;
 	sc->sc_root = NULL;
 

Index: src/sys/dev/ic/mpt_netbsd.c
diff -u src/sys/dev/ic/mpt_netbsd.c:1.18 src/sys/dev/ic/mpt_netbsd.c:1.18.2.1
--- src/sys/dev/ic/mpt_netbsd.c:1.18	Sun Mar 18 21:05:21 2012
+++ src/sys/dev/ic/mpt_netbsd.c	Wed Sep 12 06:15:32 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: mpt_netbsd.c,v 1.18 2012/03/18 21:05:21 martin Exp $	*/
+/*	$NetBSD: mpt_netbsd.c,v 1.18.2.1 2012/09/12 06:15:32 tls Exp $	*/
 
 /*
  * Copyright (c) 2003 Wasabi Systems, Inc.
@@ -77,7 +77,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: mpt_netbsd.c,v 1.18 2012/03/18 21:05:21 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: mpt_netbsd.c,v 1.18.2.1 2012/09/12 06:15:32 tls Exp $");
 
 #include <dev/ic/mpt.h>			/* pulls in all headers */
 
@@ -250,8 +250,11 @@ mpt_dma_mem_alloc(mpt_softc_t *mpt)
 		req->sense_pbuf = (pptr - MPT_SENSE_SIZE);
 		req->sense_vbuf = (vptr - MPT_SENSE_SIZE);
 
-		error = bus_dmamap_create(mpt->sc_dmat, MAXPHYS,
-		    MPT_SGL_MAX, MAXPHYS, 0, 0, &req->dmap);
+		error = bus_dmamap_create(mpt->sc_dmat,
+		    MPT_SGL_MAX * PAGE_SIZE ,
+		    MPT_SGL_MAX,
+		    MPT_SGL_MAX * PAGE_SIZE,
+		    0, 0, &req->dmap);
 		if (error) {
 			aprint_error_dev(mpt->sc_dev, "unable to create req %d DMA map, "
 			    "error = %d\n", i, error);
@@ -1365,13 +1368,6 @@ mpt_scsipi_request(struct scsipi_channel
 static void
 mpt_minphys(struct buf *bp)
 {
-
-/*
- * Subtract one from the SGL limit, since we need an extra one to handle
- * an non-page-aligned transfer.
- */
-#define	MPT_MAX_XFER	((MPT_SGL_MAX - 1) * PAGE_SIZE)
-
 	if (bp->b_bcount > MPT_MAX_XFER)
 		bp->b_bcount = MPT_MAX_XFER;
 	minphys(bp);

Index: src/sys/dev/ic/mpt_netbsd.h
diff -u src/sys/dev/ic/mpt_netbsd.h:1.10 src/sys/dev/ic/mpt_netbsd.h:1.10.2.1
--- src/sys/dev/ic/mpt_netbsd.h:1.10	Sun Mar 18 21:05:21 2012
+++ src/sys/dev/ic/mpt_netbsd.h	Wed Sep 12 06:15:32 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: mpt_netbsd.h,v 1.10 2012/03/18 21:05:21 martin Exp $	*/
+/*	$NetBSD: mpt_netbsd.h,v 1.10.2.1 2012/09/12 06:15:32 tls Exp $	*/
 
 /*
  * Copyright (c) 2003 Wasabi Systems, Inc.
@@ -133,6 +133,12 @@
 	  sizeof(SGE_IO_UNION)) / sizeof(SGE_SIMPLE32))
 
 /*
+ * Subtract one from the SGL limit, since we need an extra one to handle
+ * an non-page-aligned transfer.
+ */
+#define MPT_MAX_XFER		((MPT_SGL_MAX - 1) * PAGE_SIZE)
+
+/*
  * Convert a physical address returned from IOC to a virtual address
  * needed to access the data.
  */

Index: src/sys/dev/isa/isa.c
diff -u src/sys/dev/isa/isa.c:1.138 src/sys/dev/isa/isa.c:1.138.18.1
--- src/sys/dev/isa/isa.c:1.138	Sat Aug 21 17:08:15 2010
+++ src/sys/dev/isa/isa.c	Wed Sep 12 06:15:32 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: isa.c,v 1.138 2010/08/21 17:08:15 jmcneill Exp $	*/
+/*	$NetBSD: isa.c,v 1.138.18.1 2012/09/12 06:15:32 tls Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2001, 2008 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: isa.c,v 1.138 2010/08/21 17:08:15 jmcneill Exp $");
+__KERNEL_RCSID(0, "$NetBSD: isa.c,v 1.138.18.1 2012/09/12 06:15:32 tls Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -91,6 +91,9 @@ isaattach(device_t parent, device_t self
 		ISACF_IRQ_DEFAULT, ISACF_DRQ_DEFAULT, ISACF_DRQ2_DEFAULT
 	};
 
+	/* Clamp the maximum transfer size.  The hook may clamp it further. */
+	self->dv_maxphys = MIN(parent->dv_maxphys, 64 * 1024);
+
 	TAILQ_INIT(&sc->sc_knowndevs);
 	sc->sc_dynamicdevs = 0;
 

Index: src/sys/dev/pci/amr.c
diff -u src/sys/dev/pci/amr.c:1.55 src/sys/dev/pci/amr.c:1.55.2.1
--- src/sys/dev/pci/amr.c:1.55	Fri Jul 27 16:25:11 2012
+++ src/sys/dev/pci/amr.c	Wed Sep 12 06:15:32 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: amr.c,v 1.55 2012/07/27 16:25:11 jakllsch Exp $	*/
+/*	$NetBSD: amr.c,v 1.55.2.1 2012/09/12 06:15:32 tls Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2003 The NetBSD Foundation, Inc.
@@ -64,7 +64,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: amr.c,v 1.55 2012/07/27 16:25:11 jakllsch Exp $");
+__KERNEL_RCSID(0, "$NetBSD: amr.c,v 1.55.2.1 2012/09/12 06:15:32 tls Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -401,9 +401,11 @@ amr_attach(device_t parent, device_t sel
 	amr->amr_flags |= AMRF_CCBS;
 
 	if (amr_max_xfer == 0) {
-		amr_max_xfer = min(((AMR_MAX_SEGS - 1) * PAGE_SIZE), MAXPHYS);
+		amr_max_xfer = min(((AMR_MAX_SEGS - 1) * PAGE_SIZE),
+				   device_maxphys(amr->amr_dv));
 		amr_max_segs = (amr_max_xfer + (PAGE_SIZE * 2) - 1) / PAGE_SIZE;
 	}
+	amr->amr_dv->dv_maxphys = amr_max_xfer;
 
 	for (i = 0; i < AMR_MAX_CMDS; i++, ac++) {
 		rv = bus_dmamap_create(amr->amr_dmat, amr_max_xfer,
@@ -1364,7 +1366,8 @@ amrioctl(dev_t dev, u_long cmd, void *da
 		return EOPNOTSUPP;
 	}
 
-	if (au_length <= 0 || au_length > MAXPHYS || au_cmd[0] == 0x06)
+	if (au_length <= 0 || au_length > device_maxphys(amr->amr_dv) ||
+	    au_cmd[0] == 0x06)
 		return (EINVAL);
 
 	/*

Index: src/sys/dev/pci/mlyvar.h
diff -u src/sys/dev/pci/mlyvar.h:1.5 src/sys/dev/pci/mlyvar.h:1.5.44.1
--- src/sys/dev/pci/mlyvar.h:1.5	Mon Apr 28 20:23:55 2008
+++ src/sys/dev/pci/mlyvar.h	Wed Sep 12 06:15:32 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: mlyvar.h,v 1.5 2008/04/28 20:23:55 martin Exp $	*/
+/*	$NetBSD: mlyvar.h,v 1.5.44.1 2012/09/12 06:15:32 tls Exp $	*/
 
 /*-
  * Copyright (c) 2001 The NetBSD Foundation, Inc.
@@ -73,7 +73,7 @@
  * The firmware interface allows for a 16-bit s/g list length.  We limit
  * ourselves to a reasonable maximum.
  */
-#define	MLY_MAX_SEGS	17
+#define	MLY_MAX_SEGS	257
 #define	MLY_SGL_SIZE	(MLY_MAX_SEGS * sizeof(struct mly_sg_entry))
 
 #define	MLY_MAX_XFER	((MLY_MAX_SEGS - 1) * PAGE_SIZE)

Index: src/sys/dev/pci/mpt_pci.c
diff -u src/sys/dev/pci/mpt_pci.c:1.22 src/sys/dev/pci/mpt_pci.c:1.22.2.1
--- src/sys/dev/pci/mpt_pci.c:1.22	Sun Mar 18 21:05:21 2012
+++ src/sys/dev/pci/mpt_pci.c	Wed Sep 12 06:15:32 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: mpt_pci.c,v 1.22 2012/03/18 21:05:21 martin Exp $	*/
+/*	$NetBSD: mpt_pci.c,v 1.22.2.1 2012/09/12 06:15:32 tls Exp $	*/
 
 /*
  * Copyright (c) 2003 Wasabi Systems, Inc.
@@ -46,7 +46,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: mpt_pci.c,v 1.22 2012/03/18 21:05:21 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: mpt_pci.c,v 1.22.2.1 2012/09/12 06:15:32 tls Exp $");
 
 #include <dev/ic/mpt.h>			/* pulls in all headers */
 
@@ -229,6 +229,8 @@ mpt_pci_attach(device_t parent, device_t
 		return;
 	}
 
+	mpt->sc_dev->dv_maxphys = MIN(mpt->sc_dev->dv_maxphys, MPT_MAX_XFER);
+
 	/* Attach to scsipi. */
 	mpt_scsipi_attach(mpt);
 }

Index: src/sys/dev/pci/pci.c
diff -u src/sys/dev/pci/pci.c:1.142 src/sys/dev/pci/pci.c:1.142.12.1
--- src/sys/dev/pci/pci.c:1.142	Mon Aug 29 14:47:08 2011
+++ src/sys/dev/pci/pci.c	Wed Sep 12 06:15:32 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: pci.c,v 1.142 2011/08/29 14:47:08 jmcneill Exp $	*/
+/*	$NetBSD: pci.c,v 1.142.12.1 2012/09/12 06:15:32 tls Exp $	*/
 
 /*
  * Copyright (c) 1995, 1996, 1997, 1998
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pci.c,v 1.142 2011/08/29 14:47:08 jmcneill Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pci.c,v 1.142.12.1 2012/09/12 06:15:32 tls Exp $");
 
 #include "opt_pci.h"
 
@@ -142,6 +142,9 @@ pciattach(device_t parent, device_t self
 		PCICF_DEV_DEFAULT, PCICF_FUNCTION_DEFAULT
 	};
 
+	/* Clamp the maximum transfer size.  The hook may clamp it further. */
+	self->dv_maxphys = MIN(parent->dv_maxphys, INT_MAX);
+
 	sc->sc_dev = self;
 
 	pci_attach_hook(parent, self, pba);

Index: src/sys/dev/pci/pciide.c
diff -u src/sys/dev/pci/pciide.c:1.219 src/sys/dev/pci/pciide.c:1.219.18.1
--- src/sys/dev/pci/pciide.c:1.219	Sat Nov  6 00:29:09 2010
+++ src/sys/dev/pci/pciide.c	Wed Sep 12 06:15:32 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: pciide.c,v 1.219 2010/11/06 00:29:09 jakllsch Exp $	*/
+/*	$NetBSD: pciide.c,v 1.219.18.1 2012/09/12 06:15:32 tls Exp $	*/
 
 
 /*
@@ -70,7 +70,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pciide.c,v 1.219 2010/11/06 00:29:09 jakllsch Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pciide.c,v 1.219.18.1 2012/09/12 06:15:32 tls Exp $");
 
 #include <sys/param.h>
 
@@ -108,6 +108,9 @@ pciide_attach(device_t parent, device_t 
 	struct pci_attach_args *pa = aux;
 	struct pciide_softc *sc = device_private(self);
 
+	/* Clamp max transfer size - XXX how to do 128K on pciide? */
+	self->dv_maxphys = MIN(parent->dv_maxphys, IDEDMA_BYTE_COUNT_MAX);
+
 	sc->sc_wdcdev.sc_atac.atac_dev = self;
 
 	pciide_common_attach(sc, pa, NULL);

Index: src/sys/dev/scsipi/cd.c
diff -u src/sys/dev/scsipi/cd.c:1.309 src/sys/dev/scsipi/cd.c:1.309.2.1
--- src/sys/dev/scsipi/cd.c:1.309	Sun May  6 17:23:10 2012
+++ src/sys/dev/scsipi/cd.c	Wed Sep 12 06:15:33 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: cd.c,v 1.309 2012/05/06 17:23:10 martin Exp $	*/
+/*	$NetBSD: cd.c,v 1.309.2.1 2012/09/12 06:15:33 tls Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2001, 2003, 2004, 2005, 2008 The NetBSD Foundation,
@@ -50,7 +50,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cd.c,v 1.309 2012/05/06 17:23:10 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cd.c,v 1.309.2.1 2012/09/12 06:15:33 tls Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -1097,6 +1097,14 @@ cdminphys(struct buf *bp)
 			bp->b_bcount = xmax;
 	}
 
+        /* Impose any restrictions inherited down the device tree */
+	xmax = cd->sc_dev->dv_maxphys;
+	if (bp->b_bcount > xmax)
+		bp->b_bcount = xmax;
+        
+	/* Adapters could enforce their own limits on the
+	   attached scsibuses via the device tree, but existing
+	   drivers mostly do it in their own minphys routines. */
 	(*cd->sc_periph->periph_channel->chan_adapter->adapt_minphys)(bp);
 }
 

Index: src/sys/dev/scsipi/sd.c
diff -u src/sys/dev/scsipi/sd.c:1.298 src/sys/dev/scsipi/sd.c:1.298.2.1
--- src/sys/dev/scsipi/sd.c:1.298	Thu Apr 19 17:45:20 2012
+++ src/sys/dev/scsipi/sd.c	Wed Sep 12 06:15:33 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: sd.c,v 1.298 2012/04/19 17:45:20 bouyer Exp $	*/
+/*	$NetBSD: sd.c,v 1.298.2.1 2012/09/12 06:15:33 tls Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2003, 2004 The NetBSD Foundation, Inc.
@@ -47,7 +47,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sd.c,v 1.298 2012/04/19 17:45:20 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sd.c,v 1.298.2.1 2012/09/12 06:15:33 tls Exp $");
 
 #include "opt_scsi.h"
 
@@ -971,6 +971,14 @@ sdminphys(struct buf *bp)
 			bp->b_bcount = xmax;
 	}
 
+	/* Impose any restrictions inherited down the device tree */
+	xmax = sd->sc_dev->dv_maxphys;
+	if (bp->b_bcount > xmax)
+		bp->b_bcount = xmax;
+
+	/* Adapters could enforce their own limits on the
+	   attached scsibuses via the device tree, but existing
+	   drivers mostly do it in their own minphys routines. */
 	scsipi_adapter_minphys(sd->sc_periph->periph_channel, bp);
 }
 

Index: src/sys/dev/scsipi/ss.c
diff -u src/sys/dev/scsipi/ss.c:1.84 src/sys/dev/scsipi/ss.c:1.84.2.1
--- src/sys/dev/scsipi/ss.c:1.84	Tue Feb 28 14:04:19 2012
+++ src/sys/dev/scsipi/ss.c	Wed Sep 12 06:15:33 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: ss.c,v 1.84 2012/02/28 14:04:19 mbalmer Exp $	*/
+/*	$NetBSD: ss.c,v 1.84.2.1 2012/09/12 06:15:33 tls Exp $	*/
 
 /*
  * Copyright (c) 1995 Kenneth Stailey.  All rights reserved.
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ss.c,v 1.84 2012/02/28 14:04:19 mbalmer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ss.c,v 1.84.2.1 2012/09/12 06:15:33 tls Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -328,7 +328,16 @@ ssminphys(struct buf *bp)
 {
 	struct ss_softc *ss = device_lookup_private(&ss_cd, SSUNIT(bp->b_dev));
 	struct scsipi_periph *periph = ss->sc_periph;
+	long xmax;
 
+	/* Impose any restrictions inherited down the device tree */
+	xmax = ss->sc_dev->dv_maxphys;
+	if (bp->b_bcount > xmax)
+		bp->b_bcount = xmax;
+        
+	/* Adapters could enforce their own limits on the
+	   attached scsibuses via the device tree, but existing
+	   drivers mostly do it in their own minphys routines. */
 	scsipi_adapter_minphys(periph->periph_channel, bp);
 
 	/*

Index: src/sys/kern/kern_physio.c
diff -u src/sys/kern/kern_physio.c:1.92 src/sys/kern/kern_physio.c:1.92.14.1
--- src/sys/kern/kern_physio.c:1.92	Thu Feb 10 14:46:45 2011
+++ src/sys/kern/kern_physio.c	Wed Sep 12 06:15:34 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_physio.c,v 1.92 2011/02/10 14:46:45 pooka Exp $	*/
+/*	$NetBSD: kern_physio.c,v 1.92.14.1 2012/09/12 06:15:34 tls Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993
@@ -71,7 +71,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_physio.c,v 1.92 2011/02/10 14:46:45 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_physio.c,v 1.92.14.1 2012/09/12 06:15:34 tls Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -296,7 +296,7 @@ physio(void (*strategy)(struct buf *), s
 				error = EINVAL;
 				goto done;
 			}
-			bp->b_bcount = MIN(MAXPHYS, iovp->iov_len);
+			bp->b_bcount = iovp->iov_len;
 			bp->b_data = iovp->iov_base;
 
 			/*
@@ -307,7 +307,7 @@ physio(void (*strategy)(struct buf *), s
 			(*min_phys)(bp);
 			todo = bp->b_bufsize = bp->b_bcount;
 #if defined(DIAGNOSTIC)
-			if (todo > MAXPHYS)
+			if (todo > MACHINE_MAXPHYS)
 				panic("todo(%zu) > MAXPHYS; minphys broken",
 				    todo);
 #endif /* defined(DIAGNOSTIC) */
@@ -423,6 +423,6 @@ void
 minphys(struct buf *bp)
 {
 
-	if (bp->b_bcount > MAXPHYS)
-		bp->b_bcount = MAXPHYS;
+	if (bp->b_bcount > MACHINE_MAXPHYS)
+		bp->b_bcount = MACHINE_MAXPHYS;
 }

Index: src/sys/kern/subr_autoconf.c
diff -u src/sys/kern/subr_autoconf.c:1.223 src/sys/kern/subr_autoconf.c:1.223.2.1
--- src/sys/kern/subr_autoconf.c:1.223	Thu Aug 30 02:24:20 2012
+++ src/sys/kern/subr_autoconf.c	Wed Sep 12 06:15:34 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: subr_autoconf.c,v 1.223 2012/08/30 02:24:20 matt Exp $ */
+/* $NetBSD: subr_autoconf.c,v 1.223.2.1 2012/09/12 06:15:34 tls Exp $ */
 
 /*
  * Copyright (c) 1996, 2000 Christopher G. Demetriou
@@ -77,7 +77,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_autoconf.c,v 1.223 2012/08/30 02:24:20 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_autoconf.c,v 1.223.2.1 2012/09/12 06:15:34 tls Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_ddb.h"
@@ -1370,10 +1370,14 @@ config_devalloc(const device_t parent, c
 	memcpy(dev->dv_xname, cd->cd_name, lname);
 	memcpy(dev->dv_xname + lname, xunit, lunit);
 	dev->dv_parent = parent;
-	if (parent != NULL)
+	if (parent != NULL) {
 		dev->dv_depth = parent->dv_depth + 1;
-	else
+		dev->dv_maxphys = parent->dv_maxphys;
+	} else {
 		dev->dv_depth = 0;
+		dev->dv_maxphys = MACHINE_MAXPHYS;
+	}
+	aprint_debug_dev(dev, "dv_maxphys = %d", dev->dv_maxphys);
 	dev->dv_flags |= DVF_ACTIVE;	/* always initially active */
 	if (locs) {
 		KASSERT(parent); /* no locators at root */

Index: src/sys/kern/subr_disk.c
diff -u src/sys/kern/subr_disk.c:1.100 src/sys/kern/subr_disk.c:1.100.18.1
--- src/sys/kern/subr_disk.c:1.100	Thu Oct 14 00:47:16 2010
+++ src/sys/kern/subr_disk.c	Wed Sep 12 06:15:34 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: subr_disk.c,v 1.100 2010/10/14 00:47:16 mrg Exp $	*/
+/*	$NetBSD: subr_disk.c,v 1.100.18.1 2012/09/12 06:15:34 tls Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1999, 2000, 2009 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_disk.c,v 1.100 2010/10/14 00:47:16 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_disk.c,v 1.100.18.1 2012/09/12 06:15:34 tls Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -75,6 +75,7 @@ __KERNEL_RCSID(0, "$NetBSD: subr_disk.c,
 #include <sys/buf.h>
 #include <sys/syslog.h>
 #include <sys/disklabel.h>
+#include <sys/conf.h>
 #include <sys/disk.h>
 #include <sys/sysctl.h>
 #include <lib/libkern/libkern.h>
@@ -178,6 +179,42 @@ disk_find(const char *name)
 	return (NULL);
 }
 
+/*
+ * Searches for the disk corresponding to a supplied block device
+ * using major, minor, unit.
+ */
+struct disk *
+disk_find_blk(dev_t dev)
+{
+	devmajor_t major;
+	int unit;
+	char name[16];	/* XXX */
+	const char *swname;
+
+	major = major(dev);
+	unit = DISKUNIT(dev);
+
+	if ((swname = devsw_blk2name(major)) == NULL) {
+		return NULL;
+	}
+
+	if (snprintf(name, sizeof(name), "%s%d",
+		     swname, unit) > sizeof(name)) {
+		return NULL;
+	}
+
+	return disk_find(name);
+}
+
+int disk_maxphys(const struct disk *const diskp)
+{
+	struct buf b = { b_bcount: MACHINE_MAXPHYS };
+
+	diskp->dk_driver->d_minphys(&b);
+
+	return b.b_bcount;
+}
+
 void
 disk_init(struct disk *diskp, const char *name, const struct dkdriver *driver)
 {

Index: src/sys/kern/sys_descrip.c
diff -u src/sys/kern/sys_descrip.c:1.27 src/sys/kern/sys_descrip.c:1.27.2.1
--- src/sys/kern/sys_descrip.c:1.27	Sun Aug  5 04:26:10 2012
+++ src/sys/kern/sys_descrip.c	Wed Sep 12 06:15:34 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: sys_descrip.c,v 1.27 2012/08/05 04:26:10 riastradh Exp $	*/
+/*	$NetBSD: sys_descrip.c,v 1.27.2.1 2012/09/12 06:15:34 tls Exp $	*/
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.27 2012/08/05 04:26:10 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.27.2.1 2012/09/12 06:15:34 tls Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -708,7 +708,9 @@ do_posix_fadvise(int fd, off_t offset, o
 
 	case POSIX_FADV_WILLNEED:
 		vp = fp->f_data;
-		error = uvm_readahead(&vp->v_uobj, offset, endoffset - offset);
+		error = uvm_readahead(&vp->v_uobj, offset,
+				      endoffset - offset,
+				      vp->v_ractx);
 		break;
 
 	case POSIX_FADV_DONTNEED:

Index: src/sys/kern/vfs_vnops.c
diff -u src/sys/kern/vfs_vnops.c:1.185 src/sys/kern/vfs_vnops.c:1.185.2.1
--- src/sys/kern/vfs_vnops.c:1.185	Fri Aug 24 05:52:17 2012
+++ src/sys/kern/vfs_vnops.c	Wed Sep 12 06:15:34 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_vnops.c,v 1.185 2012/08/24 05:52:17 dholland Exp $	*/
+/*	$NetBSD: vfs_vnops.c,v 1.185.2.1 2012/09/12 06:15:34 tls Exp $	*/
 
 /*-
  * Copyright (c) 2009 The NetBSD Foundation, Inc.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.185 2012/08/24 05:52:17 dholland Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.185.2.1 2012/09/12 06:15:34 tls Exp $");
 
 #include "veriexec.h"
 
@@ -943,8 +943,22 @@ vn_ra_allocctx(struct vnode *vp)
 		ra = uvm_ra_allocctx();
 		mutex_enter(vp->v_interlock);
 		if (ra != NULL && vp->v_ractx == NULL) {
-			vp->v_ractx = ra;
-			ra = NULL;
+			size_t iochunk = 512, ioc = 512;
+
+			while(1) {
+				size_t mp_mp = MAX(vp->v_mount->mnt_maxphys,
+						   MAXPHYS); /* XXX NFS */
+				size_t ra_max = MIN(mp_mp, UVM_RA_WINSIZE_MAX);
+				if (ioc > ra_max) {
+					ra->ra_iochunk = iochunk;
+					vp->v_ractx = ra;
+					ra = NULL;
+					break;
+				} else {
+					iochunk = ioc;
+					ioc *= 2;
+				}
+			}
 		}
 	}
 	if (ra != NULL) {

Index: src/sys/kern/vfs_wapbl.c
diff -u src/sys/kern/vfs_wapbl.c:1.52 src/sys/kern/vfs_wapbl.c:1.52.2.1
--- src/sys/kern/vfs_wapbl.c:1.52	Sun Apr 29 22:55:11 2012
+++ src/sys/kern/vfs_wapbl.c	Wed Sep 12 06:15:34 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_wapbl.c,v 1.52 2012/04/29 22:55:11 chs Exp $	*/
+/*	$NetBSD: vfs_wapbl.c,v 1.52.2.1 2012/09/12 06:15:34 tls Exp $	*/
 
 /*-
  * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc.
@@ -36,7 +36,7 @@
 #define WAPBL_INTERNAL
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.52 2012/04/29 22:55:11 chs Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.52.2.1 2012/09/12 06:15:34 tls Exp $");
 
 #include <sys/param.h>
 #include <sys/bitops.h>
@@ -415,7 +415,7 @@ wapbl_start(struct wapbl ** wlp, struct 
 	 * to complete a transaction. (probably truncate)
 	 */
 	/* XXX for now pick something minimal */
-	if ((count * blksize) < MAXPHYS) {
+	if ((count * blksize) < mp->mnt_maxphys) {
 		return ENOSPC;
 	}
 
@@ -841,6 +841,7 @@ wapbl_begin(struct wapbl *wl, const char
 {
 	int doflush;
 	unsigned lockcount;
+	uint32_t maxphys;
 
 	KDASSERT(wl);
 
@@ -851,7 +852,8 @@ wapbl_begin(struct wapbl *wl, const char
 	 */
 	mutex_enter(&wl->wl_mtx);
 	lockcount = wl->wl_lock_count;
-	doflush = ((wl->wl_bufbytes + (lockcount * MAXPHYS)) >
+	maxphys = wl->wl_mount->mnt_maxphys;
+	doflush = ((wl->wl_bufbytes + (lockcount * maxphys)) >
 		   wl->wl_bufbytes_max / 2) ||
 		  ((wl->wl_bufcount + (lockcount * 10)) >
 		   wl->wl_bufcount_max / 2) ||

Index: src/sys/miscfs/genfs/genfs_io.c
diff -u src/sys/miscfs/genfs/genfs_io.c:1.55 src/sys/miscfs/genfs/genfs_io.c:1.55.2.1
--- src/sys/miscfs/genfs/genfs_io.c:1.55	Tue May 22 14:20:39 2012
+++ src/sys/miscfs/genfs/genfs_io.c	Wed Sep 12 06:15:35 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: genfs_io.c,v 1.55 2012/05/22 14:20:39 yamt Exp $	*/
+/*	$NetBSD: genfs_io.c,v 1.55.2.1 2012/09/12 06:15:35 tls Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.55 2012/05/22 14:20:39 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.55.2.1 2012/09/12 06:15:35 tls Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -806,11 +806,16 @@ genfs_do_putpages(struct vnode *vp, off_
 	struct uvm_object * const uobj = &vp->v_uobj;
 	kmutex_t * const slock = uobj->vmobjlock;
 	off_t off;
-	/* Even for strange MAXPHYS, the shift rounds down to a page */
-#define maxpages (MAXPHYS >> PAGE_SHIFT)
 	int i, error, npages, nback;
 	int freeflag;
-	struct vm_page *pgs[maxpages], *pg, *nextpg, *tpg, curmp, endmp;
+#if 1
+	unsigned int maxpages;
+	struct vm_page *pgs[MACHINE_MAXPHYS >> PAGE_SHIFT];
+#else
+	unsigned int maxpages = 64;
+	struct vm_page *pgs[64];
+#endif
+	struct vm_page *pg, *nextpg, *tpg, curmp, endmp;
 	bool wasclean, by_list, needs_clean, yld;
 	bool async = (origflags & PGO_SYNCIO) == 0;
 	bool pagedaemon = curlwp == uvm.pagedaemon_lwp;
@@ -823,6 +828,18 @@ genfs_do_putpages(struct vnode *vp, off_
 	bool has_trans;
 	bool cleanall;
 	bool onworklst;
+	static int printed;
+
+        if (vp && vp->v_mount && vp->v_mount->mnt_maxphys) {
+                maxpages = vp->v_mount->mnt_maxphys >> PAGE_SHIFT;
+        } else {
+                maxpages = MAXPHYS >> PAGE_SHIFT;
+        }
+
+	if (!printed || maxpages > printed ) {
+		printf("putpages: maxpages %d\n", maxpages);
+		printed = maxpages;
+	}
 
 	UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
 

Index: src/sys/sys/device.h
diff -u src/sys/sys/device.h:1.142 src/sys/sys/device.h:1.142.2.1
--- src/sys/sys/device.h:1.142	Sat Jul  7 16:15:21 2012
+++ src/sys/sys/device.h	Wed Sep 12 06:15:35 2012
@@ -1,4 +1,4 @@
-/* $NetBSD: device.h,v 1.142 2012/07/07 16:15:21 tsutsui Exp $ */
+/* $NetBSD: device.h,v 1.142.2.1 2012/09/12 06:15:35 tls Exp $ */
 
 /*
  * Copyright (c) 1996, 2000 Christopher G. Demetriou
@@ -84,6 +84,7 @@
 #ifdef _KERNEL
 #include <sys/mutex.h>
 #include <sys/condvar.h>
+#include <sys/param.h>
 #include <sys/pmf.h>
 #endif
 
@@ -146,6 +147,7 @@ struct device {
 	cfdriver_t	dv_cfdriver;	/* our cfdriver */
 	cfattach_t	dv_cfattach;	/* our cfattach */
 	int		dv_unit;	/* device unit number */
+	int		dv_maxphys;	/* maximum transfer size supported */
 	char		dv_xname[16];	/* external name (name + unit) */
 	device_t	dv_parent;	/* pointer to parent device
 					   (NULL if pseudo- or root node) */
@@ -500,6 +502,15 @@ void		*device_lookup_private(cfdriver_t,
 void		device_register(device_t, void *);
 void		device_register_post_config(device_t, void *);
 
+static inline int device_maxphys(device_t device)
+{
+	if (__predict_true(device->dv_maxphys))	{
+		return device->dv_maxphys;
+	}
+
+	return MAXPHYS;		/* XXX should emit error */
+}
+
 devclass_t	device_class(device_t);
 cfdata_t	device_cfdata(device_t);
 cfdriver_t	device_cfdriver(device_t);

Index: src/sys/sys/disk.h
diff -u src/sys/sys/disk.h:1.57 src/sys/sys/disk.h:1.57.2.1
--- src/sys/sys/disk.h:1.57	Sun Jun 10 17:05:18 2012
+++ src/sys/sys/disk.h	Wed Sep 12 06:15:35 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: disk.h,v 1.57 2012/06/10 17:05:18 mlelstv Exp $	*/
+/*	$NetBSD: disk.h,v 1.57.2.1 2012/09/12 06:15:35 tls Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 2004 The NetBSD Foundation, Inc.
@@ -522,6 +522,8 @@ void	disk_unbusy(struct disk *, long, in
 bool	disk_isbusy(struct disk *);
 void	disk_blocksize(struct disk *, int);
 struct disk *disk_find(const char *);
+struct disk *disk_find_blk(dev_t);
+int	disk_maxphys(const struct disk *const);
 int	disk_ioctl(struct disk *, u_long, void *, int, struct lwp *);
 
 void	dkwedge_init(void);

Index: src/sys/sys/mount.h
diff -u src/sys/sys/mount.h:1.207 src/sys/sys/mount.h:1.207.6.1
--- src/sys/sys/mount.h:1.207	Wed Feb  1 05:34:42 2012
+++ src/sys/sys/mount.h	Wed Sep 12 06:15:35 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: mount.h,v 1.207 2012/02/01 05:34:42 dholland Exp $	*/
+/*	$NetBSD: mount.h,v 1.207.6.1 2012/09/12 06:15:35 tls Exp $	*/
 
 /*
  * Copyright (c) 1989, 1991, 1993
@@ -116,12 +116,13 @@ struct mount {
 	void		*mnt_data;		/* private data */
 	krwlock_t	mnt_unmounting;		/* to prevent new activity */
 	kmutex_t	mnt_renamelock;		/* per-fs rename lock */
-	int		mnt_refcnt;		/* ref count on this structure */
+	int		mnt_refcnt;		/* refcnt on this structure */
 	int		mnt_recursecnt;		/* count of write locks */
 	int		mnt_flag;		/* flags */
 	int		mnt_iflag;		/* internal flags */
 	int		mnt_fs_bshift;		/* offset shift for lblkno */
 	int		mnt_dev_bshift;		/* shift for device sectors */
+	uint32_t	mnt_maxphys;		/* largest xfer allowed */
 	struct statvfs	mnt_stat;		/* cache of filesystem stats */
 	specificdata_reference
 			mnt_specdataref;	/* subsystem specific data */

Index: src/sys/ufs/ffs/ffs_vfsops.c
diff -u src/sys/ufs/ffs/ffs_vfsops.c:1.278 src/sys/ufs/ffs/ffs_vfsops.c:1.278.2.1
--- src/sys/ufs/ffs/ffs_vfsops.c:1.278	Mon Sep 10 07:57:50 2012
+++ src/sys/ufs/ffs/ffs_vfsops.c	Wed Sep 12 06:15:35 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_vfsops.c,v 1.278 2012/09/10 07:57:50 manu Exp $	*/
+/*	$NetBSD: ffs_vfsops.c,v 1.278.2.1 2012/09/12 06:15:35 tls Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
@@ -61,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.278 2012/09/10 07:57:50 manu Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.278.2.1 2012/09/12 06:15:35 tls Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
@@ -881,6 +881,7 @@ ffs_mountfs(struct vnode *devvp, struct 
 	struct buf *bp;
 	struct fs *fs;
 	dev_t dev;
+	struct disk *diskp;
 	struct dkwedge_info dkw;
 	void *space;
 	daddr_t sblockloc, fsblockloc;
@@ -896,6 +897,10 @@ ffs_mountfs(struct vnode *devvp, struct 
 	int32_t fsbsize;
 
 	dev = devvp->v_rdev;
+	if ((diskp = disk_find_blk(dev)) == NULL) {
+		panic("no disk for device %d %d", major(dev), DISKUNIT(dev));
+	}
+
 	cred = l ? l->l_cred : NOCRED;
 
 	/* Flush out any old buffers remaining from a previous use. */
@@ -917,6 +922,13 @@ ffs_mountfs(struct vnode *devvp, struct 
 	if (error)
 		return error;
 
+	/*
+	 * Get the maximum I/O size for the underlying device.
+	 */
+	mp->mnt_maxphys = disk_maxphys(diskp);
+	aprint_debug("ffs_mount: disk %s maxphys %d\n",
+		     diskp->dk_name, mp->mnt_maxphys);
+
 	ump = kmem_zalloc(sizeof(*ump), KM_SLEEP);
 	mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
 	error = ffs_snapshot_init(ump);

Index: src/sys/uvm/uvm_io.c
diff -u src/sys/uvm/uvm_io.c:1.27 src/sys/uvm/uvm_io.c:1.27.6.1
--- src/sys/uvm/uvm_io.c:1.27	Fri Jan 27 19:48:41 2012
+++ src/sys/uvm/uvm_io.c	Wed Sep 12 06:15:35 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_io.c,v 1.27 2012/01/27 19:48:41 para Exp $	*/
+/*	$NetBSD: uvm_io.c,v 1.27.6.1 2012/09/12 06:15:35 tls Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_io.c,v 1.27 2012/01/27 19:48:41 para Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_io.c,v 1.27.6.1 2012/09/12 06:15:35 tls Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -83,7 +83,8 @@ uvm_io(struct vm_map *map, struct uio *u
 		togo = togo - (endva - VM_MAXUSER_ADDRESS + 1);
 	pageoffset = baseva & PAGE_MASK;
 	baseva = trunc_page(baseva);
-	chunksz = MIN(round_page(togo + pageoffset), trunc_page(MAXPHYS));
+	chunksz = MIN(round_page(togo + pageoffset),
+		      trunc_page(MACHINE_MAXPHYS));
 	error = 0;
 
 	/*

Index: src/sys/uvm/uvm_map.c
diff -u src/sys/uvm/uvm_map.c:1.322 src/sys/uvm/uvm_map.c:1.322.2.1
--- src/sys/uvm/uvm_map.c:1.322	Tue Sep  4 13:37:42 2012
+++ src/sys/uvm/uvm_map.c	Wed Sep 12 06:15:35 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_map.c,v 1.322 2012/09/04 13:37:42 matt Exp $	*/
+/*	$NetBSD: uvm_map.c,v 1.322.2.1 2012/09/12 06:15:35 tls Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.322 2012/09/04 13:37:42 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.322.2.1 2012/09/12 06:15:35 tls Exp $");
 
 #include "opt_ddb.h"
 #include "opt_uvmhist.h"
@@ -3205,8 +3205,14 @@ uvm_map_willneed(struct vm_map *map, vad
 		 * XXX It might be useful to pmap_enter() the already-in-core
 		 * pages by inventing a "weak" mode for uvm_fault() which would
 		 * only do the PGO_LOCKED pgo_get().
-		 */
-		if (UVM_ET_ISOBJ(entry) && amap == NULL && uobj != NULL) {
+		 *
+		 * XXX The readahead contexts are vnode-associated.  That
+		 * XXX means we can't readhead on swap-backed objects.
+		 * XXX Should the readahead context move to the uobj?
+		 */
+		if (UVM_ET_ISOBJ(entry) && uobj != NULL &&
+		    UVM_OBJ_IS_VNODE(uobj) && amap == NULL) {
+                        struct vnode *vp = (struct vnode *)uobj;
 			off_t offset;
 			off_t size;
 
@@ -3218,7 +3224,7 @@ uvm_map_willneed(struct vm_map *map, vad
 			if (entry->end < end) {
 				size -= end - entry->end;
 			}
-			uvm_readahead(uobj, offset, size);
+			uvm_readahead(uobj, offset, size, vp->v_ractx);
 		}
 		entry = entry->next;
 	}

Index: src/sys/uvm/uvm_readahead.c
diff -u src/sys/uvm/uvm_readahead.c:1.8 src/sys/uvm/uvm_readahead.c:1.8.12.1
--- src/sys/uvm/uvm_readahead.c:1.8	Sun Jun 12 03:36:04 2011
+++ src/sys/uvm/uvm_readahead.c	Wed Sep 12 06:15:36 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_readahead.c,v 1.8 2011/06/12 03:36:04 rmind Exp $	*/
+/*	$NetBSD: uvm_readahead.c,v 1.8.12.1 2012/09/12 06:15:36 tls Exp $	*/
 
 /*-
  * Copyright (c)2003, 2005, 2009 YAMAMOTO Takashi,
@@ -40,7 +40,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_readahead.c,v 1.8 2011/06/12 03:36:04 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_readahead.c,v 1.8.12.1 2012/09/12 06:15:36 tls Exp $");
 
 #include <sys/param.h>
 #include <sys/pool.h>
@@ -54,32 +54,13 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_readahea
 #define	DPRINTF(a)	/* nothing */
 #endif /* defined(READAHEAD_DEBUG) */
 
-/*
- * uvm_ractx: read-ahead context.
- */
-
-struct uvm_ractx {
-	int ra_flags;
-#define	RA_VALID	1
-	off_t ra_winstart;	/* window start offset */
-	size_t ra_winsize;	/* window size */
-	off_t ra_next;		/* next offset to read-ahead */
-};
-
 #if defined(sun2) || defined(sun3)
 /* XXX: on sun2 and sun3 MAXPHYS is 0xe000 */
 #undef MAXPHYS	
 #define MAXPHYS		0x8000	/* XXX */
 #endif
 
-#define	RA_WINSIZE_INIT	MAXPHYS			/* initial window size */
-#define	RA_WINSIZE_MAX	(MAXPHYS * 8)		/* max window size */
-#define	RA_WINSIZE_SEQENTIAL	RA_WINSIZE_MAX	/* fixed window size used for
-						   SEQUENTIAL hint */
-#define	RA_MINSIZE	(MAXPHYS * 2)		/* min size to start i/o */
-#define	RA_IOCHUNK	MAXPHYS			/* read-ahead i/o chunk size */
-
-static off_t ra_startio(struct uvm_object *, off_t, size_t);
+static off_t ra_startio(struct uvm_object *, off_t, size_t, size_t);
 static struct uvm_ractx *ra_allocctx(void);
 static void ra_freectx(struct uvm_ractx *);
 
@@ -116,10 +97,13 @@ ra_freectx(struct uvm_ractx *ra)
  *
  * => start i/o for each RA_IOCHUNK sized chunk.
  * => return offset to which we started i/o.
+ *
+ * => If the next layer up has given us less than IOCHUNK, assume
+ *    it knew best (don't always perform minimal readahead).
  */
 
 static off_t
-ra_startio(struct uvm_object *uobj, off_t off, size_t sz)
+ra_startio(struct uvm_object *uobj, off_t off, size_t sz, size_t chunksz)
 {
 	const off_t endoff = off + sz;
 
@@ -127,16 +111,26 @@ ra_startio(struct uvm_object *uobj, off_
 	    __func__, uobj, off, endoff));
 	off = trunc_page(off);
 	while (off < endoff) {
-		const size_t chunksize = RA_IOCHUNK;
+		const size_t chunksize = MIN(chunksz, round_page(sz));
 		int error;
 		size_t donebytes;
 		int npages;
 		int orignpages;
 		size_t bytelen;
 
-		KASSERT((chunksize & (chunksize - 1)) == 0);
+		if ((chunksize & (chunksize - 1)) != 0) {
+		    panic("bad chunksize %d, iochunk %d, request size %d",
+			  (int)chunksize, (int)chunksz, (int)sz);
+		}
+		/* KASSERT((chunksize & (chunksize - 1)) == 0); */
 		KASSERT((off & PAGE_MASK) == 0);
 		bytelen = ((off + chunksize) & -(off_t)chunksize) - off;
+		if ((bytelen & PAGE_MASK) != 0) {
+			panic("bad bytelen %d with off %d, chunksize %d"
+			      "(iochunk %d, sz %d)",
+			      (int)bytelen, (int)off, (int)chunksize,
+			      (int)chunksz, (int)sz);
+		}
 		KASSERT((bytelen & PAGE_MASK) == 0);
 		npages = orignpages = bytelen >> PAGE_SHIFT;
 		KASSERT(npages != 0);
@@ -178,6 +172,7 @@ uvm_ra_allocctx(void)
 	ra = ra_allocctx();
 	if (ra != NULL) {
 		ra->ra_flags = 0;
+		ra->ra_iochunk = MAXPHYS;
 	}
 
 	return ra;
@@ -219,14 +214,14 @@ uvm_ra_request(struct uvm_ractx *ra, int
 		 * always do read-ahead with a large window.
 		 */
 
-		if ((ra->ra_flags & RA_VALID) == 0) {
+		if ((ra->ra_flags & UVM_RA_VALID) == 0) {
 			ra->ra_winstart = ra->ra_next = 0;
-			ra->ra_flags |= RA_VALID;
+			ra->ra_flags |= UVM_RA_VALID;
 		}
 		if (reqoff < ra->ra_winstart) {
 			ra->ra_next = reqoff;
 		}
-		ra->ra_winsize = RA_WINSIZE_SEQENTIAL;
+		ra->ra_winsize = UVM_RA_WINSIZE_SEQUENTIAL;
 		goto do_readahead;
 	}
 
@@ -243,11 +238,11 @@ uvm_ra_request(struct uvm_ractx *ra, int
 	 * initialize context and return.
 	 */
 
-	if ((ra->ra_flags & RA_VALID) == 0) {
+	if ((ra->ra_flags & UVM_RA_VALID) == 0) {
 initialize:
 		ra->ra_winstart = ra->ra_next = reqoff + reqsize;
-		ra->ra_winsize = RA_WINSIZE_INIT;
-		ra->ra_flags |= RA_VALID;
+		ra->ra_winsize = UVM_RA_WINSIZE_INIT;
+		ra->ra_flags |= UVM_RA_VALID;
 		goto done;
 	}
 
@@ -301,9 +296,9 @@ do_readahead:
 		size_t rasize = reqoff + ra->ra_winsize - ra->ra_next;
 
 #if defined(DIAGNOSTIC)
-		if (rasize > RA_WINSIZE_MAX) {
+		if (rasize > UVM_RA_WINSIZE_MAX) {
 			printf("%s: corrupted context", __func__);
-			rasize = RA_WINSIZE_MAX;
+			rasize = UVM_RA_WINSIZE_MAX;
 		}
 #endif /* defined(DIAGNOSTIC) */
 
@@ -312,11 +307,11 @@ do_readahead:
 		 * otherwise we end up with a stream of small i/o.
 		 */
 
-		if (rasize >= RA_MINSIZE) {
+		if (rasize >= UVM_RA_MINSIZE) {
 			off_t next;
 
 			mutex_exit(uobj->vmobjlock);
-			next = ra_startio(uobj, raoff, rasize);
+			next = ra_startio(uobj, raoff, rasize, ra->ra_iochunk);
 			mutex_enter(uobj->vmobjlock);
 			ra->ra_next = next;
 		}
@@ -330,21 +325,22 @@ do_readahead:
 	 */
 
 	ra->ra_winstart = reqoff + reqsize;
-	ra->ra_winsize = MIN(RA_WINSIZE_MAX, ra->ra_winsize + reqsize);
+	ra->ra_winsize = MIN(UVM_RA_WINSIZE_MAX, ra->ra_winsize + reqsize);
 
 done:;
 }
 
 int
-uvm_readahead(struct uvm_object *uobj, off_t off, off_t size)
+uvm_readahead(struct uvm_object *uobj, off_t off, off_t size,
+	      struct uvm_ractx *ra)
 {
 
 	/*
 	 * don't allow too much read-ahead.
 	 */
-	if (size > RA_WINSIZE_MAX) {
-		size = RA_WINSIZE_MAX;
+	if (size > UVM_RA_WINSIZE_MAX) {
+		size = UVM_RA_WINSIZE_MAX;
 	}
-	ra_startio(uobj, off, size);
+	ra_startio(uobj, off, size, ra->ra_iochunk);
 	return 0;
 }

Index: src/sys/uvm/uvm_readahead.h
diff -u src/sys/uvm/uvm_readahead.h:1.4 src/sys/uvm/uvm_readahead.h:1.4.22.1
--- src/sys/uvm/uvm_readahead.h:1.4	Wed Jun 10 01:54:08 2009
+++ src/sys/uvm/uvm_readahead.h	Wed Sep 12 06:15:36 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_readahead.h,v 1.4 2009/06/10 01:54:08 yamt Exp $	*/
+/*	$NetBSD: uvm_readahead.h,v 1.4.22.1 2012/09/12 06:15:36 tls Exp $	*/
 
 /*-
  * Copyright (c)2003, 2005, 2009 YAMAMOTO Takashi,
@@ -30,13 +30,30 @@
 #define _UVM_UVM_READAHEAD_H_
 
 struct uvm_object;
-struct uvm_ractx;
+
+/*
+ * uvm_ractx: read-ahead context.
+ */
+
+struct uvm_ractx {
+        int ra_flags;
+#define UVM_RA_VALID        1
+        size_t ra_iochunk;
+        size_t ra_winsize;      /* window size */
+        off_t ra_winstart;      /* window start offset */
+        off_t ra_next;          /* next offset to read-ahead */
+};
+
+#define UVM_RA_WINSIZE_INIT	MAXPHYS		/* initial window size */
+#define UVM_RA_WINSIZE_MAX	(MAXPHYS * 8)	/* max window size */
+#define UVM_RA_WINSIZE_SEQUENTIAL	UVM_RA_WINSIZE_MAX
+#define UVM_RA_MINSIZE		(MAXPHYS * 2)	/* min size to start i/o */
 
 void uvm_ra_init(void);
 struct uvm_ractx *uvm_ra_allocctx(void);
 void uvm_ra_freectx(struct uvm_ractx *);
 void uvm_ra_request(struct uvm_ractx *, int, struct uvm_object *, off_t,
     size_t);
-int uvm_readahead(struct uvm_object *, off_t, off_t);
+int uvm_readahead(struct uvm_object *, off_t, off_t, struct uvm_ractx *);
 
 #endif /* defined(_UVM_UVM_READAHEAD_H_) */

Added files:

Index: src/MAXPHYS-NOTES
diff -u /dev/null src/MAXPHYS-NOTES:1.1.2.1
--- /dev/null	Wed Sep 12 06:15:37 2012
+++ src/MAXPHYS-NOTES	Wed Sep 12 06:15:31 2012
@@ -0,0 +1,76 @@
+Notes on eliminating fixed (usually 64K) MAXPHYS, for more efficient
+operation both with single disk drives/SSDs (transfers in the 128K-256K
+range of sizes are advantageous for many workloads), and particularly with
+RAID sets (consider a typical 12-disk chassis of 2.5" SAS drives, set up
+as an entirely ordinary P+Q parity RAID array with a single hot spare.  To
+feed 64K transfers to each of the resulting 8 data disks requires 512K
+transfers fed to the RAID controller -- is it any wonder NetBSD performs
+so poorly with such hardware for many workloads?).
+
+The basic approach taken here:
+
+	1) Propagate maximum-transfer size down the device tree at
+	   autoconf time.  Drivers take the max of their own
+	   transfer-size limitations and their parents' limitations,
+	   apply that in their minphys() routines (if they are disk
+	   drivers) and propagate it down to their children.
+
+	2) This is just about sufficient, for physio, since once you've
+	   got the disk, you can find its minphys routine, and *that*
+	   can get access to the device-instance's softc which has the
+	   size determined by autoconf.
+
+	3) For filesystem I/O, however, we need to be able to find that
+	   maximum transfer size starting not with a device_t but with
+	   a disk driver name (or major number) and unit number.
+
+	   The "disk" interface within the kernel is extended to
+	   let us fish out the dkdevice's minphys routine starting
+	   with the data we've got.  We then feed a fake, huge buffer
+	   to that minphys and see what we get back.
+
+	   This is stashed in the mount point's datastructure and is
+	   then available to the filesystem and pager code via
+	   vp->v_mount any time you've got a filesystem-backed vnode.
+
+The rest is a "simple" matter of making the necessary MD adjustments
+and figuring out where the rest of the hidden 64K bottlenecks are....
+
+MAXPHYS is retained and is used as a default.  A new MACHINE_MAXPHYS
+must be defined, and is the actual largest transfer any hardware for
+a given port can do, or which the portmaster considers appropriate.
+
+MACHINE_MAXPHYS is used to size some on-stack arrays in the pager code
+so don't go too crazy with it.
+
+==== STATUS ====
+
+All work done on amd64.  Not hard to get it going on other ports.  Every
+top-level bus attachment will need code to clamp transfer sizes
+appropriately; see the PCI or ISA code here, or for an unfortunate
+example of when you have to clamp more than you'd like, the pnpbios code.
+
+Access through physio: done?  Disk drivers other than sd, cd, wd
+will need their minphys functions adjusted like those were, and
+will be limited to MAXPHYS per transfer until they do.
+
+	A notable exception is RAIDframe.  It could benefit immediately
+	but needs something a little more sophisticated done to its
+	minphys -- per-unit, it needs to sum up the maxphyses of the unit's
+	data (not parity!) components and return that value.
+
+Access through filesystems - for read, controlled by uvm readahead code.
+We can stash the ra max size in the ra ctx -- we can get it from v_mount
+in the vnode (the uobj!) *if* we put it into struct mount.  Then we only
+have to do the awful walk-the-device-list crap at mount time.  This likely
+wins!
+
+	Unfortunately, there is still a bottleneck, probably from
+	the pager code (genfs I/O code).  The genfs read/getpages
+	code is repellent and huge.  Haven't even started on it yet.
+
+I have attacked the genfs write path already, but though my printfs
+show the appropriate maxpages value propagates down, the resulting
+stream of I/O requests is 64K.  This needs further investigation:
+with maxcontig now gone from the FFS code, where on earth are we
+still clamping the I/O size?

Reply via email to