Hi,
I changed the subject line above..details follow below.
On Thu, 18 May 2000 10:42:48 EDT, "Eric Youngdale" <[EMAIL PROTECTED]> wrote:
>
>----- Original Message -----
>From: Alan Cox <[EMAIL PROTECTED]>
>To: Eric Youngdale <[EMAIL PROTECTED]>
>Cc: Douglas Gilbert <[EMAIL PROTECTED]>; Brian Pomerantz
><[EMAIL PROTECTED]>; <[EMAIL PROTECTED]>
>Sent: Thursday, May 18, 2000 9:50 AM
>Subject: Re: Request splits
>
>
>> > I don't think there are other issues with increasing the limit. For
>> > that matter, I am not entirely sure why there is this arbitrary limit in
>the
>> > code here. In other words, I am not sure if I even added it - it may
>have
>> > gotten added by someone else as part of the ll_rw_blk cleanups.
>>
>> The sglist itself presumably has a size limit and we would need to keep
>the
>> sg list allocation under a page.
>
> Even this is correctly handled, I think. It looks like we allocate from
>the DMA safe pool, and handle arbitrary lengths.
>
> SCpnt->sglist_len = (SCpnt->use_sg * sizeof(struct scatterlist) + 511) &
>~511;
>
> sgpnt = (struct scatterlist *) scsi_malloc(SCpnt->sglist_len);
>
>-Eric
I've had the same question in my mind. I've also wondered why raw I/O was
restricted to only KIO_MAX_SECTORS at a time. So, I enhanced Stephen Tweedie's
raw I/O and the queueing/scsi layers to handle kiobufs-based requests. This is
in addition to the current buffer_head based request processing.
Thus, in cases where the kiobuf-based request size is greater than the HBA
sg_tablesize, I limit the I/O size of the scsi command to the HBA sg_tablesize.
While collecting the finished sectors, I detect this condition and use the
existing mechanism for finishing scsi command leftovers to (re)issue the
command till the entire request is done.
Thus, ll_rw_blk.c has two new functions:
o ll_rw_kio()
o __make_kio_request()
I also split the processing of buffer_head-based requests from kiobuf-based
requests into separate functions in the scsi midlayer:
scsi_lib.c: scsi_merge.c:
__scsi_collect_kio_sectors() scsi_kio_sgl()
__scsi_collect_bh_sectors() scsi_bh_sgl()
I've only implemented/tested it against scsi-disks to date.
Here's the patch against a 2.3.99-pre2 kernel. To recap, two primary reasons
for this patch:
1. To enhance the queueing and scsi-mid layers to handle kiobuf-based
requests as well,
2. Remove request size limits on the upper layers (above ll_rw_blk.c).
The KIO_MAX_SECTORS seems to have been inspired by MAX_SECTORS
(128 per request) in ll_rw_blk.c. The scsi mid-layer should handle
`oversize' requests based on the HBA sg_tablesize.
I'm not too sure about 2. above; so I'd love to hear from more knowledgeable
people on that score.
I'd highly appreciate any feedback before I submit this patch `officially'.
Thanks,
-Chait.
[Chaitanya Tumuluri, [EMAIL PROTECTED]]
--- linux-2.3pure/linux/drivers/block/ll_rw_blk.c Thu May 18 12:21:40 2000
+++ linux-2.3rawio/linux/drivers/block/ll_rw_blk.c Thu May 18 10:48:57 2000
@@ -4,6 +4,7 @@
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 1994, Karl Keyte: Added support for disk statistics
* Elevator latency, (C) 2000 Andrea Arcangeli <[EMAIL PROTECTED]> SuSE
+ * Added support for kiobuf-based I/O requests: Chaitanya Tumuluri [[EMAIL PROTECTED]]
*/
/*
@@ -656,7 +657,10 @@
starving = 1;
if (latency < 0)
continue;
-
+#if CONFIG_KIOBUF_IO
+ if (req->kiobuf)
+ continue;
+#endif
if (req->sem)
continue;
if (req->cmd != rw)
@@ -761,6 +765,9 @@
req->nr_hw_segments = 1; /* Always 1 for a new request. */
req->buffer = bh->b_data;
req->sem = NULL;
+#if CONFIG_KIOBUF_IO
+ req->kiobuf = NULL;
+#endif
req->bh = bh;
req->bhtail = bh;
req->q = q;
@@ -903,6 +910,310 @@
__ll_rw_block(rw, nr, bh, 1);
}
+#if CONFIG_KIOBUF_IO
+/*
+ * Function: __make_kio_request()
+ *
+ * Purpose: Construct a kiobuf-based request and insert into request queue.
+ *
+ * Arguments: q - request queue of device
+ * rw - read/write
+ * kiobuf - collection of pages
+ * dev - device against which I/O requested
+ * blocknr - dev block number at which to start I/O
+ * blksize - units (512B or other) of blocknr
+ *
+ * Lock status: No lock held upon entry.
+ *
+ * Returns: Nothing
+ *
+ * Notes: Requests generated by this function should _NOT_ be merged by
+ * the __make_request() (new check for `req->kiobuf')
+ *
+ * All (relevant) req->Y parameters are expressed in sector size
+ * of 512B for kiobuf based I/O. This is assumed in the scsi
+ * mid-layer as well.
+ */
+static inline void __make_kio_request(request_queue_t * q,
+ int rw,
+ struct kiobuf * kiobuf,
+ kdev_t dev,
+ unsigned long blocknr,
+ size_t blksize)
+{
+ int major = MAJOR(dev);
+ unsigned int sector, count, nr_bytes, total_bytes, nr_seg;
+ struct request * req;
+ int rw_ahead, max_req;
+ unsigned long flags;
+ struct list_head * head = &q->queue_head;
+ size_t curr_offset;
+ int orig_latency;
+ elevator_t * elevator;
+ int correct_size, i, kioind;
+
+ /*
+ * Sanity Tests:
+ *
+ * The input arg. `blocknr' is in units of the
+ * input arg. `blksize' (inode->i_sb->s_blocksize).
+ * Convert to 512B unit used in blk_size[] array.
+ */
+ count = kiobuf->length >> 9;
+ sector = blocknr * (blksize >> 9);
+
+ if (blk_size[major]) {
+ unsigned long maxsector = (blk_size[major][MINOR(dev)] << 1) + 1;
+
+ if (maxsector < count || maxsector - count < sector) {
+ if (!blk_size[major][MINOR(dev)]) {
+ kiobuf->errno = -EINVAL;
+ goto end_io;
+ }
+ /* This may well happen - the kernel calls bread()
+ without checking the size of the device, e.g.,
+ when mounting a device. */
+ printk(KERN_INFO
+ "attempt to access beyond end of device\n");
+ printk(KERN_INFO "%s: rw=%d, want=%d, limit=%d\n",
+ kdevname(dev), rw,
+ (sector + count)>>1,
+ blk_size[major][MINOR(dev)]);
+ kiobuf->errno = -ESPIPE;
+ goto end_io;
+ }
+ }
+ /*
+ * Allow only basic block size multiples in the
+ * kiobuf->length.
+ */
+ correct_size = BLOCK_SIZE;
+ if (blksize_size[major]) {
+ i = blksize_size[major][MINOR(dev)];
+ if (i)
+ correct_size = i;
+ }
+ if ((kiobuf->length % correct_size) != 0) {
+ printk(KERN_NOTICE "ll_rw_kio: "
+ "request size [%d] not a multiple of device [%s] block-size
+[%d]\n",
+ kiobuf->length,
+ kdevname(dev),
+ correct_size);
+ kiobuf->errno = -EINVAL;
+ goto end_io;
+ }
+ rw_ahead = 0; /* normal case; gets changed below for READA */
+ switch (rw) {
+ case READA:
+ rw_ahead = 1;
+ rw = READ; /* drop into READ */
+ case READ:
+ kstat.pgpgin++;
+ max_req = NR_REQUEST; /* reads take precedence */
+ break;
+ case WRITERAW:
+ rw = WRITE;
+ goto do_write; /* Skip the buffer refile */
+ case WRITE:
+ do_write:
+ /*
+ * We don't allow the write-requests to fill up the
+ * queue completely: we want some room for reads,
+ * as they take precedence. The last third of the
+ * requests are only for reads.
+ */
+ kstat.pgpgout++;
+ max_req = (NR_REQUEST * 2) / 3;
+ break;
+ default:
+ BUG();
+ kiobuf->errno = -EINVAL;
+ goto end_io;
+ }
+
+ /*
+ * Creation of bounce buffers for data in high memory
+ * should (is) be handled lower in the food-chain.
+ * Ccurrently done in scsi_merge.c for scsi disks.
+ *
+ * Look for a free request with spinlock held.
+ * Apart from atomic queue access, it prevents
+ * another thread that has already queued a kiobuf-request
+ * into this queue from starting it, till we are done.
+ */
+ elevator = &q->elevator;
+ orig_latency = elevator_request_latency(elevator, rw);
+ spin_lock_irqsave(&io_request_lock,flags);
+
+ if (list_empty(head))
+ q->plug_device_fn(q, dev);
+ /*
+ * The scsi disk and cdrom drivers completely remove the request
+ * from the queue when they start processing an entry. For this
+ * reason it is safe to continue to add links to the top entry
+ * for those devices.
+ *
+ * All other drivers need to jump over the first entry, as that
+ * entry may be busy being processed and we thus can't change
+ * it.
+ */
+ if (q->head_active && !q->plugged)
+ head = head->next;
+
+ /* find an unused request. */
+ req = get_request(max_req, dev);
+
+ /*
+ * if no request available: if rw_ahead, forget it,
+ * otherwise try again blocking..
+ */
+ if (!req) {
+ spin_unlock_irqrestore(&io_request_lock,flags);
+ if (rw_ahead){
+ kiobuf->errno = -EBUSY;
+ goto end_io;
+ }
+ req = __get_request_wait(max_req, dev);
+ spin_lock_irqsave(&io_request_lock,flags);
+
+ /* revalidate elevator */
+ head = &q->queue_head;
+ if (q->head_active && !q->plugged)
+ head = head->next;
+ }
+
+ /* fill up the request-info, and add it to the queue */
+ req->cmd = rw;
+ req->errors = 0;
+ req->sector = sector;
+ req->nr_hw_segments = 1; /* Always 1 for a new request. */
+ req->nr_sectors = count; /* Length of kiobuf */
+ req->sem = NULL;
+ req->kiobuf = kiobuf;
+ req->bh = NULL;
+ req->bhtail = NULL;
+ req->q = q;
+ /* Calculate req->buffer */
+ curr_offset = kiobuf->offset;
+ for (kioind=0; kioind<kiobuf->nr_pages; kioind++)
+ if (curr_offset >= PAGE_SIZE)
+ curr_offset -= PAGE_SIZE;
+ else
+ break;
+ req->buffer = (char *) page_address(kiobuf->maplist[kioind]) +
+ curr_offset;
+
+ /* Calculate current_nr_sectors and # of scatter gather segments needed */
+ total_bytes = kiobuf->length;
+ nr_bytes = (PAGE_SIZE - curr_offset) > total_bytes ?
+ total_bytes : (PAGE_SIZE - curr_offset);
+ req->current_nr_sectors = nr_bytes >> 9;
+
+ for (nr_seg = 1;
+ kioind<kiobuf->nr_pages && nr_bytes != total_bytes;
+ kioind++) {
+ ++nr_seg;
+ if((nr_bytes + PAGE_SIZE) > total_bytes){
+ break;
+ } else {
+ nr_bytes += PAGE_SIZE;
+ }
+ }
+ req->nr_segments = nr_seg;
+
+ add_request(q, req, head, orig_latency);
+ elevator_account_request(elevator, req);
+
+ spin_unlock_irqrestore(&io_request_lock, flags);
+
+end_io:
+ return;
+}
+
+
+
+/*
+ * Function: ll_rw_kio()
+ *
+ * Purpose: Insert kiobuf-based request into request queue.
+ *
+ * Arguments: rw - read/write
+ * kiobuf - collection of pages
+ * dev - device against which I/O requested
+ * blocknr - dev block number at which to start I/O
+ * sector - units (512B or other) of blocknr
+ * error - return status
+ *
+ * Lock status: Assumed no lock held upon entry.
+ * Assumed that the pages in the kiobuf ___ARE LOCKED DOWN___.
+ *
+ * Returns: Nothing
+ *
+ * Notes: This function is called from any subsystem using kiovec[]
+ * collection of kiobufs for I/O (e.g. `pagebufs', raw-io).
+ * Relies on "kiobuf" field in the request structure.
+ */
+void ll_rw_kio(int rw,
+ struct kiobuf *kiobuf,
+ kdev_t dev,
+ unsigned long blocknr,
+ size_t sector,
+ int *error)
+{
+ request_queue_t *q;
+ /*
+ * Only support SCSI disk for now.
+ *
+ * ENOSYS to indicate caller
+ * should try ll_rw_block()
+ * for non-SCSI (e.g. IDE) disks.
+ */
+ if (!SCSI_DISK_MAJOR(MAJOR(dev))){
+ *error = -ENOSYS;
+ goto end_io;
+ }
+ /*
+ * Sanity checks
+ */
+ q = blk_get_queue(dev);
+ if (!q) {
+ printk(KERN_ERR
+ "ll_rw_kio: Trying to read nonexistent block-device %s\n",
+ kdevname(dev));
+ *error = -ENODEV;
+ goto end_io;
+ }
+ if ((rw & WRITE) && is_read_only(dev)) {
+ printk(KERN_NOTICE "Can't write to read-only device %s\n",
+ kdevname(dev));
+ *error = -EPERM;
+ goto end_io;
+ }
+ if (q->make_request_fn) {
+ printk(KERN_ERR
+ "ll_rw_kio: Unexpected device [%s] queueing function encountered\n",
+ kdevname(dev));
+ *error = -ENOSYS;
+ goto end_io;
+ }
+
+ __make_kio_request(q, rw, kiobuf, dev, blocknr, sector);
+ if (kiobuf->errno != 0) {
+ *error = kiobuf->errno;
+ goto end_io;
+ }
+
+ return;
+end_io:
+ /*
+ * We come here only on an error so, just set
+ * kiobuf->errno and call the completion fn.
+ */
+ if(kiobuf->errno == 0)
+ kiobuf->errno = *error;
+}
+#endif /* CONFIG_KIOBUF_IO */
+
#ifdef CONFIG_STRAM_SWAP
extern int stram_device_init (void);
#endif
@@ -1085,3 +1396,7 @@
EXPORT_SYMBOL(blk_queue_pluggable);
EXPORT_SYMBOL(blk_queue_make_request);
EXPORT_SYMBOL(generic_make_request);
+#if CONFIG_KIOBUF_IO
+EXPORT_SYMBOL(__make_kio_request);
+EXPORT_SYMBOL(ll_rw_kio);
+#endif
--- linux-2.3pure/linux/drivers/scsi/Config.in Thu May 18 12:21:58 2000
+++ linux-2.3rawio/linux/drivers/scsi/Config.in Mon May 15 15:44:09 2000
@@ -20,6 +20,7 @@
#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
bool ' Enable extra checks in new queueing code' CONFIG_SCSI_DEBUG_QUEUES
+ bool ' Enable kiobuf-based I/O request handling' CONFIG_KIOBUF_IO
#fi
bool ' Probe all LUNs on each SCSI device' CONFIG_SCSI_MULTI_LUN
--- linux-2.3pure/linux/drivers/scsi/scsi_lib.c Thu May 18 12:22:01 2000
+++ linux-2.3rawio/linux/drivers/scsi/scsi_lib.c Thu May 18 10:48:57 2000
@@ -15,6 +15,8 @@
* a low-level driver if they wished. Note however that this file also
* contains the "default" versions of these functions, as we don't want to
* go through and retrofit queueing functions into all 30 some-odd drivers.
+ *
+ * Added support for kiobuf-based I/O requests. [Chaitanya Tumuluri, [EMAIL PROTECTED]]
*/
#define __NO_VERSION__
@@ -369,6 +371,161 @@
spin_unlock_irqrestore(&io_request_lock, flags);
}
+
+/*
+ * Function: __scsi_collect_bh_sectors()
+ *
+ * Purpose: Helper routine for __scsi_end_request() to mark some number
+ * (or all, if that is the case) of sectors complete.
+ *
+ * Arguments: req - request struct. from scsi command block.
+ * uptodate - 1 if I/O indicates success, 0 for I/O error.
+ * sectors - number of sectors we want to mark.
+ * leftovers- indicates if any sectors were not done.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns: Nothing
+ *
+ * Notes: Separate buffer-head processing from kiobuf processing
+ */
+__inline static void __scsi_collect_bh_sectors(struct request *req,
+ int uptodate,
+ int sectors,
+ char **leftovers)
+{
+ struct buffer_head *bh;
+
+ do {
+ if ((bh = req->bh) != NULL) {
+ req->bh = bh->b_reqnext;
+ req->nr_sectors -= bh->b_size >> 9;
+ req->sector += bh->b_size >> 9;
+ bh->b_reqnext = NULL;
+ sectors -= bh->b_size >> 9;
+ bh->b_end_io(bh, uptodate);
+ if ((bh = req->bh) != NULL) {
+ req->current_nr_sectors = bh->b_size >> 9;
+ if (req->nr_sectors < req->current_nr_sectors) {
+ req->nr_sectors = req->current_nr_sectors;
+ printk("collect_bh: buffer-list destroyed\n");
+ }
+ }
+ }
+ } while (sectors && bh);
+
+ /* Check for leftovers */
+ if (req->bh) {
+ *leftovers = req->bh->b_data;
+ }
+ return;
+
+}
+
+#if CONFIG_KIOBUF_IO
+/*
+ * Function: __scsi_collect_kio_sectors()
+ *
+ * Purpose: Helper routine for __scsi_end_request() to mark some number
+ * (or all) of the I/O sectors and attendant pages complete.
+ * Updates the request nr_segments, nr_sectors accordingly.
+ *
+ * Arguments: req - request struct. from scsi command block.
+ * uptodate - 1 if I/O indicates success, 0 for I/O error.
+ * sectors - number of sectors we want to mark.
+ * leftovers- indicates if any sectors were not done.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns: Nothing
+ *
+ * Notes: Separate buffer-head processing from kiobuf processing.
+ * We don't know if this was a single or multi-segment sgl
+ * request. Treat it as though it were a multi-segment one.
+ */
+__inline static void __scsi_collect_kio_sectors(struct request *req,
+ int uptodate,
+ int sectors,
+ char **leftovers)
+{
+ int pgcnt, nr_pages;
+ size_t curr_offset;
+ unsigned long va = 0;
+ unsigned int nr_bytes, total_bytes, page_sectors;
+
+ nr_pages = req->kiobuf->nr_pages;
+ total_bytes = (req->nr_sectors << 9);
+ curr_offset = req->kiobuf->offset;
+
+ /*
+ * In the case of leftover requests, the kiobuf->length
+ * remains the same, but req->nr_sectors would be smaller.
+ * Adjust curr_offset in this case. If not a leftover,
+ * the following makes no difference.
+ */
+ curr_offset += (((req->kiobuf->length >> 9) - req->nr_sectors) << 9);
+
+ /* How far into the kiobuf is the offset? */
+ for (pgcnt=0; pgcnt<nr_pages; pgcnt++) {
+ if(curr_offset >= PAGE_SIZE) {
+ curr_offset -= PAGE_SIZE;
+ continue;
+ } else {
+ break;
+ }
+ }
+ /*
+ * Reusing the pgcnt and va value from above:
+ * Harvest pages to account for number of sectors
+ * passed into function.
+ */
+ for (nr_bytes = 0;
+ pgcnt<nr_pages && nr_bytes != total_bytes;
+ pgcnt++) {
+ va = page_address(req->kiobuf->maplist[pgcnt])
+ + curr_offset;
+ /* First page or final page? Partial page? */
+ if (curr_offset != 0) {
+ page_sectors = (PAGE_SIZE - curr_offset) > total_bytes ?
+ total_bytes >> 9 : (PAGE_SIZE - curr_offset) >> 9;
+ curr_offset = 0;
+ } else if((nr_bytes + PAGE_SIZE) > total_bytes) {
+ page_sectors = (total_bytes - nr_bytes) >> 9;
+ } else {
+ page_sectors = PAGE_SIZE >> 9;
+ }
+ nr_bytes += (page_sectors << 9);
+ /* Leftover sectors in this page (onward)? */
+ if (sectors < page_sectors) {
+ req->nr_sectors -= sectors;
+ req->sector += sectors;
+ req->current_nr_sectors = page_sectors - sectors;
+ va += (sectors << 9); /* Update for req->buffer */
+ sectors = 0;
+ break;
+ } else {
+ /* Mark this page as done */
+ req->nr_segments--; /* No clustering for kiobuf */
+ req->nr_sectors -= page_sectors;
+ req->sector += page_sectors;
+ if (!uptodate && (req->kiobuf->errno != 0)){
+ req->kiobuf->errno = -EIO;
+ }
+ sectors -= page_sectors;
+ }
+ }
+
+ /* Check for leftovers */
+ if (req->nr_sectors) {
+ *leftovers = (char *)va;
+ } else if (req->kiobuf->end_io) {
+ req->kiobuf->end_io(req->kiobuf);
+ }
+
+ return;
+}
+#endif
+
/*
* Function: scsi_end_request()
*
@@ -396,8 +553,8 @@
int requeue)
{
struct request *req;
- struct buffer_head *bh;
-
+ char * leftovers = NULL;
+
ASSERT_LOCK(&io_request_lock, 0);
req = &SCpnt->request;
@@ -406,45 +563,38 @@
printk(" I/O error: dev %s, sector %lu\n",
kdevname(req->rq_dev), req->sector);
}
- do {
- if ((bh = req->bh) != NULL) {
- req->bh = bh->b_reqnext;
- req->nr_sectors -= bh->b_size >> 9;
- req->sector += bh->b_size >> 9;
- bh->b_reqnext = NULL;
- sectors -= bh->b_size >> 9;
- bh->b_end_io(bh, uptodate);
- if ((bh = req->bh) != NULL) {
- req->current_nr_sectors = bh->b_size >> 9;
- if (req->nr_sectors < req->current_nr_sectors) {
- req->nr_sectors = req->current_nr_sectors;
- printk("scsi_end_request: buffer-list
destroyed\n");
- }
- }
- }
- } while (sectors && bh);
+ leftovers = NULL;
+ if (req->bh != NULL) { /* Buffer head based request */
+ __scsi_collect_bh_sectors(req, uptodate, sectors, &leftovers);
+ }
+#if CONFIG_KIOBUF_IO
+ else if (req->kiobuf != NULL) { /* Kiobuf based request */
+ __scsi_collect_kio_sectors(req, uptodate, sectors, &leftovers);
+ } else {
+ panic("Both bh and kiobuf pointers are unset in request!\n");
+ }
+#endif
/*
* If there are blocks left over at the end, set up the command
* to queue the remainder of them.
*/
- if (req->bh) {
- request_queue_t *q;
-
- if( !requeue )
- {
- return SCpnt;
- }
-
- q = &SCpnt->device->request_queue;
-
- req->buffer = bh->b_data;
- /*
- * Bleah. Leftovers again. Stick the leftovers in
- * the front of the queue, and goose the queue again.
- */
- scsi_queue_next_request(q, SCpnt);
- return SCpnt;
+ if (leftovers != NULL) {
+ request_queue_t *q;
+
+ if( !requeue ) {
+ return SCpnt;
+ }
+
+ q = &SCpnt->device->request_queue;
+
+ req->buffer = leftovers;
+ /*
+ * Bleah. Leftovers again. Stick the leftovers in
+ * the front of the queue, and goose the queue again.
+ */
+ scsi_queue_next_request(q, SCpnt);
+ return SCpnt;
}
/*
* This request is done. If there is someone blocked waiting for this
@@ -604,13 +754,13 @@
scsi_free(SCpnt->buffer, SCpnt->sglist_len);
} else {
if (SCpnt->buffer != SCpnt->request.buffer) {
- if (SCpnt->request.cmd == READ) {
- memcpy(SCpnt->request.buffer, SCpnt->buffer,
- SCpnt->bufflen);
- }
- scsi_free(SCpnt->buffer, SCpnt->bufflen);
+ if (SCpnt->request.cmd == READ) {
+ memcpy(SCpnt->request.buffer, SCpnt->buffer,
+ SCpnt->bufflen);
+ }
+ scsi_free(SCpnt->buffer, SCpnt->bufflen);
}
- }
+ }
/*
* Zero these out. They now point to freed memory, and it is
@@ -653,7 +803,7 @@
* rest of the command, or start a new one.
*/
if (result == 0 || SCpnt == NULL ) {
- return;
+ return;
}
}
/*
@@ -837,7 +987,7 @@
Scsi_Device *SDpnt;
struct Scsi_Host *SHpnt;
struct Scsi_Device_Template *STpnt;
-
+
ASSERT_LOCK(&io_request_lock, 1);
SDpnt = (Scsi_Device *) q->queuedata;
--- linux-2.3pure/linux/drivers/scsi/scsi_merge.c Thu May 18 12:22:01 2000
+++ linux-2.3rawio/linux/drivers/scsi/scsi_merge.c Thu May 18 10:48:57 2000
@@ -6,6 +6,7 @@
* Based upon conversations with large numbers
* of people at Linux Expo.
* Support for dynamic DMA mapping: Jakub Jelinek ([EMAIL PROTECTED]).
+ * Support for kiobuf-based I/O requests. [Chaitanya Tumuluri, [EMAIL PROTECTED]]
*/
/*
@@ -90,15 +91,17 @@
printk("nr_segments is %x\n", req->nr_segments);
printk("counted segments is %x\n", segments);
printk("Flags %d %d\n", use_clustering, dma_host);
- for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext)
- {
- printk("Segment 0x%p, blocks %d, addr 0x%lx\n",
- bh,
- bh->b_size >> 9,
- virt_to_phys(bh->b_data - 1));
+ if (req->bh != NULL) {
+ for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext) {
+ printk("Segment 0x%p, blocks %d, addr 0x%lx\n",
+ bh,
+ bh->b_size >> 9,
+ virt_to_phys(bh->b_data - 1));
+ }
}
+
panic("Ththththaats all folks. Too dangerous to continue.\n");
-}
+}
/*
@@ -298,9 +301,25 @@
SHpnt = SCpnt->host;
SDpnt = SCpnt->device;
- req->nr_segments = __count_segments(req,
- CLUSTERABLE_DEVICE(SHpnt, SDpnt),
- SHpnt->unchecked_isa_dma, NULL);
+ if (req->bh){
+ req->nr_segments = __count_segments(req,
+ CLUSTERABLE_DEVICE(SHpnt, SDpnt),
+ SHpnt->unchecked_isa_dma, NULL);
+ }
+#if CONFIG_KIOBUF_IO
+ else if (req->kiobuf){
+ /* Since there is no clustering/merging in kiobuf
+ * requests, the nr_segments is simply a count of
+ * the number of pages needing I/O. nr_segments is
+ * updated in __scsi_collect_kio_sectors() called
+ * from scsi_end_request(), for the leftover case.
+ * [[EMAIL PROTECTED]]
+ */
+ return;
+ } else {
+ panic("Both kiobuf and bh pointers are NULL!");
+ }
+#endif
}
#define MERGEABLE_BUFFERS(X,Y) \
@@ -746,6 +765,191 @@
MERGEREQFCT(scsi_merge_requests_fn_, 0, 0)
MERGEREQFCT(scsi_merge_requests_fn_c, 1, 0)
MERGEREQFCT(scsi_merge_requests_fn_dc, 1, 1)
+
+
+
+/*
+ * Function: scsi_bh_sgl()
+ *
+ * Purpose: Helper routine to construct S(catter) G(ather) L(ist)
+ * assuming buffer_head-based request in the Scsi_Cmnd.
+ *
+ * Arguments: SCpnt - Command descriptor
+ * use_clustering - 1 if host uses clustering
+ * dma_host - 1 if this host has ISA DMA issues (bus doesn't
+ * expose all of the address lines, so that DMA cannot
+ * be done from an arbitrary address).
+ * sgpnt - pointer to sgl
+ *
+ * Returns: Number of sg segments in the sgl.
+ *
+ * Notes: Only the SCpnt argument should be a non-constant variable.
+ * This functionality was abstracted out of the original code
+ * in __init_io().
+ */
+__inline static int scsi_bh_sgl(Scsi_Cmnd * SCpnt,
+ int use_clustering,
+ int dma_host,
+ struct scatterlist * sgpnt)
+{
+ int count;
+ struct buffer_head * bh;
+ struct buffer_head * bhprev;
+
+ bhprev = NULL;
+
+ for (count = 0, bh = SCpnt->request.bh;
+ bh; bh = bh->b_reqnext) {
+ if (use_clustering && bhprev != NULL) {
+ if (dma_host &&
+ virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) {
+ /* Nothing - fall through */
+ } else if (CONTIGUOUS_BUFFERS(bhprev, bh)) {
+ /*
+ * This one is OK. Let it go. Note that we
+ * do not have the ability to allocate
+ * bounce buffer segments > PAGE_SIZE, so
+ * for now we limit the thing.
+ */
+ if( dma_host ) {
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+ if( virt_to_phys(bh->b_data) - 1 <
+ISA_DMA_THRESHOLD
+ || sgpnt[count - 1].length + bh->b_size <=
+PAGE_SIZE ) {
+ sgpnt[count - 1].length += bh->b_size;
+ bhprev = bh;
+ continue;
+ }
+#else
+ sgpnt[count - 1].length += bh->b_size;
+ bhprev = bh;
+ continue;
+#endif
+ } else {
+ sgpnt[count - 1].length += bh->b_size;
+ SCpnt->request_bufflen += bh->b_size;
+ bhprev = bh;
+ continue;
+ }
+ }
+ }
+ count++;
+ sgpnt[count - 1].address = bh->b_data;
+ sgpnt[count - 1].length += bh->b_size;
+ if (!dma_host) {
+ SCpnt->request_bufflen += bh->b_size;
+ }
+ bhprev = bh;
+ }
+
+ return count;
+}
+
+#if CONFIG_KIOBUF_IO
+/*
+ * Function: scsi_kio_sgl()
+ *
+ * Purpose: Helper routine to construct S(catter) G(ather) L(ist)
+ * assuming kiobuf-based request in the Scsi_Cmnd.
+ *
+ * Arguments: SCpnt - Command descriptor
+ * dma_host - 1 if this host has ISA DMA issues (bus doesn't
+ * expose all of the address lines, so that DMA cannot
+ * be done from an arbitrary address).
+ * sgpnt - pointer to sgl
+ *
+ * Returns: Number of sg segments in the sgl.
+ *
+ * Notes: Only the SCpnt argument should be a non-constant variable.
+ * This functionality was created out of __ini_io() in the
+ * original implementation for constructing the sgl for
+ * kiobuf-based I/Os as well.
+ *
+ * Constructs SCpnt->use_sg sgl segments for the kiobuf.
+ *
+ * No clustering of pages is attempted unlike the buffer_head
+ * case. Primarily because the pages in a kiobuf are unlikely to
+ * be contiguous. Bears checking.
+ */
+__inline static int scsi_kio_sgl(Scsi_Cmnd * SCpnt,
+ int dma_host,
+ struct scatterlist * sgpnt)
+{
+ int pgcnt, nr_seg, curr_seg, nr_sectors;
+ size_t curr_offset;
+ unsigned long va;
+ unsigned int nr_bytes, total_bytes, sgl_seg_bytes;
+
+ curr_seg = SCpnt->use_sg; /* This many sgl segments */
+ nr_sectors = SCpnt->request.nr_sectors;
+ total_bytes = (nr_sectors << 9);
+ curr_offset = SCpnt->request.kiobuf->offset;
+
+ /*
+ * In the case of leftover requests, the kiobuf->length
+ * remains the same, but req->nr_sectors would be smaller.
+ * Use this difference to adjust curr_offset in this case.
+ * If not a leftover, the following makes no difference.
+ */
+ curr_offset += (((SCpnt->request.kiobuf->length >> 9) - nr_sectors) << 9);
+ /* How far into the kiobuf is the offset? */
+ for (pgcnt=0; pgcnt<SCpnt->request.kiobuf->nr_pages; pgcnt++) {
+ if(curr_offset >= PAGE_SIZE) {
+ curr_offset -= PAGE_SIZE;
+ continue;
+ } else {
+ break;
+ }
+ }
+ /*
+ * Reusing the pgcnt value from above:
+ * Starting at the right page and offset, build curr_seg
+ * sgl segments (one per page). Account for both a
+ * potentially partial last page and unrequired pages
+ * at the end of the kiobuf.
+ */
+ nr_bytes = 0;
+ for (nr_seg = 0; nr_seg < curr_seg; nr_seg++) {
+ va = page_address(SCpnt->request.kiobuf->maplist[pgcnt])
+ + curr_offset;
+ ++pgcnt;
+
+ /*
+ * If this is the first page, account for offset.
+ * If this the final (maybe partial) page, get remainder.
+ */
+ if (curr_offset != 0) {
+ sgl_seg_bytes = PAGE_SIZE - curr_offset;
+ curr_offset = 0;
+ } else if((nr_bytes + PAGE_SIZE) > total_bytes) {
+ sgl_seg_bytes = total_bytes - nr_bytes;
+ } else {
+ sgl_seg_bytes = PAGE_SIZE;
+ }
+
+ nr_bytes += sgl_seg_bytes;
+ sgpnt[nr_seg].address = (char *)va;
+ sgpnt[nr_seg].alt_address = 0;
+ sgpnt[nr_seg].length = sgl_seg_bytes;
+
+ if (!dma_host) {
+ SCpnt->request_bufflen += sgl_seg_bytes;
+ }
+ }
+ /* Sanity Check */
+ if ((nr_bytes > total_bytes) ||
+ (pgcnt > SCpnt->request.kiobuf->nr_pages)) {
+ printk(KERN_ERR
+ "scsi_kio_sgl: sgl bytes[%d], request bytes[%d]\n"
+ "scsi_kio_sgl: pgcnt[%d], kiobuf->pgcnt[%d]!\n",
+ nr_bytes, total_bytes, pgcnt, SCpnt->request.kiobuf->nr_pages);
+ BUG();
+ }
+ return nr_seg;
+
+}
+#endif
+
+
/*
* Function: __init_io()
*
@@ -778,6 +982,9 @@
* gather list, the sg count in the request won't be valid
* (mainly because we don't need queue management functions
* which keep the tally uptodate.
+ *
+ * Modified to handle kiobuf argument in the SCpnt->request
+ * structure.
*/
__inline static int __init_io(Scsi_Cmnd * SCpnt,
int sg_count_valid,
@@ -785,7 +992,6 @@
int dma_host)
{
struct buffer_head * bh;
- struct buffer_head * bhprev;
char * buff;
int count;
int i;
@@ -800,13 +1006,17 @@
* needed any more. Need to play with it and see if we hit the
* panic. If not, then don't bother.
*/
- if (!SCpnt->request.bh) {
+ if ((!SCpnt->request.bh
+#if CONFIG_KIOBUF_IO
+ && !SCpnt->request.kiobuf) ||
+ (SCpnt->request.bh && SCpnt->request.kiobuf
+#endif
+ )){
/*
- * Case of page request (i.e. raw device), or unlinked buffer
- * Typically used for swapping, but this isn't how we do
- * swapping any more.
+ * Case of unlinked buffer. Typically used for swapping,
+ * but this isn't how we do swapping any more.
*/
- panic("I believe this is dead code. If we hit this, I was wrong");
+ panic("I believe this is dead code. If we hit this, I was wrong");
#if 0
SCpnt->request_bufflen = SCpnt->request.nr_sectors << 9;
SCpnt->request_buffer = SCpnt->request.buffer;
@@ -820,6 +1030,12 @@
req = &SCpnt->request;
/*
* First we need to know how many scatter gather segments are needed.
+ *
+ * Redundant test per comment below indicating sg_count_valid is always
+ * set to 1.(ll_rw_blk.c's estimate of req->nr_segments is always trusted).
+ *
+ * count is initialized in ll_rw_kio() for the kiobuf path and since these
+ * requests are never merged, the counts are stay valid.
*/
if (!sg_count_valid) {
count = __count_segments(req, use_clustering, dma_host, NULL);
@@ -838,17 +1054,31 @@
}
/*
* Don't bother with scatter-gather if there is only one segment.
- */
+ */
if (count == 1) {
this_count = SCpnt->request.nr_sectors;
goto single_segment;
}
+#if CONFIG_KIOBUF_IO
+ /* Check if size of the sgl would be greater than the size
+ * of the host sgl table. In which case, limit the sgl size.
+ * When the request sectors are harvested after completion of
+ * I/O in __scsi_collect_kio_sectors, the additional sectors
+ * will be reinjected into the request queue as a special cmd.
+ * This will be done till all the request sectors are done.
+ * [[EMAIL PROTECTED]]
+ */
+ if((SCpnt->request.kiobuf != NULL) &&
+ (count > SCpnt->host->sg_tablesize)) {
+ count = SCpnt->host->sg_tablesize - 1;
+ }
+#endif
SCpnt->use_sg = count;
-
/*
* Allocate the actual scatter-gather table itself.
* scsi_malloc can only allocate in chunks of 512 bytes
*/
+
SCpnt->sglist_len = (SCpnt->use_sg
* sizeof(struct scatterlist) + 511) & ~511;
@@ -873,51 +1103,17 @@
memset(sgpnt, 0, SCpnt->use_sg * sizeof(struct scatterlist));
SCpnt->request_buffer = (char *) sgpnt;
SCpnt->request_bufflen = 0;
- bhprev = NULL;
- for (count = 0, bh = SCpnt->request.bh;
- bh; bh = bh->b_reqnext) {
- if (use_clustering && bhprev != NULL) {
- if (dma_host &&
- virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) {
- /* Nothing - fall through */
- } else if (CONTIGUOUS_BUFFERS(bhprev, bh)) {
- /*
- * This one is OK. Let it go. Note that we
- * do not have the ability to allocate
- * bounce buffer segments > PAGE_SIZE, so
- * for now we limit the thing.
- */
- if( dma_host ) {
-#ifdef DMA_SEGMENT_SIZE_LIMITED
- if( virt_to_phys(bh->b_data) - 1 <
ISA_DMA_THRESHOLD
- || sgpnt[count - 1].length + bh->b_size <=
PAGE_SIZE ) {
- sgpnt[count - 1].length += bh->b_size;
- bhprev = bh;
- continue;
- }
-#else
- sgpnt[count - 1].length += bh->b_size;
- bhprev = bh;
- continue;
-#endif
- } else {
- sgpnt[count - 1].length += bh->b_size;
- SCpnt->request_bufflen += bh->b_size;
- bhprev = bh;
- continue;
- }
- }
- }
- count++;
- sgpnt[count - 1].address = bh->b_data;
- sgpnt[count - 1].length += bh->b_size;
- if (!dma_host) {
- SCpnt->request_bufflen += bh->b_size;
- }
- bhprev = bh;
+ if (SCpnt->request.bh){
+ count = scsi_bh_sgl(SCpnt, use_clustering, dma_host, sgpnt);
}
-
+#if CONFIG_KIOBUF_IO
+ else if (SCpnt->request.kiobuf) {
+ count = scsi_kio_sgl(SCpnt, dma_host, sgpnt);
+ } else {
+ panic("Yowza! Both kiobuf and buffer_head pointers are null!");
+ }
+#endif
/*
* Verify that the count is correct.
*/
@@ -1009,6 +1205,18 @@
SCpnt->use_sg = 0;
scsi_free(SCpnt->request_buffer, SCpnt->sglist_len);
+#if CONFIG_KIOBUF_IO
+ /*
+ * Shouldn't ever get here for a kiobuf request.
+ *
+ * Since each segment is a page and also, we couldn't
+ * allocate bounce buffers for even the first page,
+ * this means that the DMA buffer pool is exhausted!
+ */
+ if (SCpnt->request.kiobuf){
+ dma_exhausted(SCpnt, 0);
+ }
+#endif
/*
* Make an attempt to pick up as much as we reasonably can.
* Just keep adding sectors until the pool starts running kind of
@@ -1044,34 +1252,33 @@
* segment. Possibly the entire request, or possibly a small
* chunk of the entire request.
*/
- bh = SCpnt->request.bh;
buff = SCpnt->request.buffer;
if (dma_host) {
- /*
- * Allocate a DMA bounce buffer. If the allocation fails, fall
- * back and allocate a really small one - enough to satisfy
- * the first buffer.
- */
- if (virt_to_phys(SCpnt->request.bh->b_data)
- + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) {
- buff = (char *) scsi_malloc(this_count << 9);
- if (!buff) {
- printk("Warning - running low on DMA memory\n");
- this_count = SCpnt->request.current_nr_sectors;
- buff = (char *) scsi_malloc(this_count << 9);
- if (!buff) {
- dma_exhausted(SCpnt, 0);
- }
- }
- if (SCpnt->request.cmd == WRITE)
- memcpy(buff, (char *) SCpnt->request.buffer,
this_count << 9);
- }
+ /*
+ * Allocate a DMA bounce buffer. If the allocation fails, fall
+ * back and allocate a really small one - enough to satisfy
+ * the first buffer.
+ */
+ if (virt_to_phys(SCpnt->request.buffer) + (this_count << 9) - 1 >
+ ISA_DMA_THRESHOLD) {
+ buff = (char *) scsi_malloc(this_count << 9);
+ if (!buff) {
+ printk("Warning - running low on DMA memory\n");
+ this_count = SCpnt->request.current_nr_sectors;
+ buff = (char *) scsi_malloc(this_count << 9);
+ if (!buff) {
+ dma_exhausted(SCpnt, 0);
+ }
+ }
+ if (SCpnt->request.cmd == WRITE)
+ memcpy(buff, (char *) SCpnt->request.buffer, this_count << 9);
+ }
}
SCpnt->request_bufflen = this_count << 9;
SCpnt->request_buffer = buff;
SCpnt->use_sg = 0;
- return 1;
+ return 1;
}
#define INITIO(_FUNCTION, _VALID, _CLUSTER, _DMA) \
--- linux-2.3pure/linux/drivers/scsi/sd.c Thu May 18 12:22:01 2000
+++ linux-2.3rawio/linux/drivers/scsi/sd.c Mon May 15 15:44:09 2000
@@ -512,7 +512,8 @@
static void rw_intr(Scsi_Cmnd * SCpnt)
{
- int result = SCpnt->result;
+ int result = SCpnt->result;
+
#if CONFIG_SCSI_LOGGING
char nbuff[6];
#endif
@@ -542,7 +543,12 @@
(SCpnt->sense_buffer[4] << 16) |
(SCpnt->sense_buffer[5] << 8) |
SCpnt->sense_buffer[6];
- if (SCpnt->request.bh != NULL)
+#if CONFIG_KIOBUF_IO
+ /* Tweak to support kiobuf-based I/O requests, [[EMAIL PROTECTED]]
+*/
+ if (SCpnt->request.kiobuf != NULL)
+ block_sectors = SCpnt->request.kiobuf->length >> 9;
+ else if (SCpnt->request.bh != NULL)
+#endif
block_sectors = SCpnt->request.bh->b_size >> 9;
switch (SCpnt->device->sector_size) {
case 1024:
--- linux-2.3pure/linux/include/linux/blkdev.h Thu May 18 12:22:28 2000
+++ linux-2.3rawio/linux/include/linux/blkdev.h Mon May 15 17:08:24 2000
@@ -7,6 +7,9 @@
#include <linux/tqueue.h>
#include <linux/list.h>
+#if CONFIG_KIOBUF_IO
+#include <linux/iobuf.h>
+#endif
struct request_queue;
typedef struct request_queue request_queue_t;
@@ -38,6 +41,9 @@
void * special;
char * buffer;
struct semaphore * sem;
+#if CONFIG_KIOBUF_IO
+ struct kiobuf * kiobuf;
+#endif
struct buffer_head * bh;
struct buffer_head * bhtail;
request_queue_t * q;
--- linux-2.3pure/linux/include/linux/elevator.h Thu May 18 12:22:28 2000
+++ linux-2.3rawio/linux/include/linux/elevator.h Mon May 15 15:57:04 2000
@@ -106,8 +106,13 @@
{
elevator->sequence++;
if (req->cmd == READ)
- elevator->read_pendings++;
- elevator->nr_segments++;
+ elevator->read_pendings++;
+#if CONFIG_KIOBUF_IO
+ if (req->kiobuf != NULL) {
+ elevator->nr_segments += req->nr_segments;
+ } else
+#endif
+ elevator->nr_segments++;
}
static inline int elevator_request_latency(elevator_t * elevator, int rw)
--- linux-2.3pure/linux/include/linux/fs.h Thu May 18 12:22:28 2000
+++ linux-2.3rawio/linux/include/linux/fs.h Mon May 15 15:57:04 2000
@@ -1014,6 +1014,9 @@
extern struct buffer_head * get_hash_table(kdev_t, int, int);
extern struct buffer_head * getblk(kdev_t, int, int);
extern void ll_rw_block(int, int, struct buffer_head * bh[]);
+#if CONFIG_KIOBUF_IO
+extern void ll_rw_kio(int , struct kiobuf *, kdev_t, unsigned long, size_t, int *);
+#endif
extern int is_read_only(kdev_t);
extern void __brelse(struct buffer_head *);
extern inline void brelse(struct buffer_head *buf)
--- linux-2.3pure/linux/include/linux/iobuf.h Thu May 18 12:22:29 2000
+++ linux-2.3rawio/linux/include/linux/iobuf.h Mon May 15 17:05:14 2000
@@ -54,6 +54,9 @@
atomic_t io_count; /* IOs still in progress */
int errno; /* Status of completed IO */
void (*end_io) (struct kiobuf *); /* Completion callback */
+#if CONFIG_KIOBUF_IO
+ void *k_dev_id; /* Store kiovec (or pagebuf) here */
+#endif
wait_queue_head_t wait_queue;
};
-
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to [EMAIL PROTECTED]