On 2011-04-19 11:09, Jens Axboe wrote:
> On 2011-04-18 20:32, Bart Van Assche wrote:
>> On Mon, Apr 18, 2011 at 8:28 PM, Jens Axboe <[email protected]> wrote:
>>> On 2011-04-18 20:21, Bart Van Assche wrote:
>>>> a performance regression in the block layer not related to the md
>>>> issue. If I run a small block IOPS test on a block device created by
>>>> ib_srp (NOOP scheduler) I see about 11% less IOPS than with 2.6.38.3
>>>> (155.000 IOPS with 2.6.38.3 and 140.000 IOPS with 2.6.39-rc3+).
>>>
>>> That's not good. What's the test case?
>>
>> Nothing more than a fio IOPS test:
>>
>> fio --bs=512 --ioengine=libaio --buffered=0 --rw=read --thread
>> --iodepth=64 --numjobs=2 --loops=10000 --group_reporting --size=1G
>>     --gtod_reduce=1 --name=iops-test --filename=/dev/${dev} --invalidate=1
> 
> Bart, can you try the below:

Here's a more complete variant. James, lets get rid of this REENTER
crap. It's completely bogus and triggers falsely for a variety of
reasons. The below will work, but there may be room for improvement on
the SCSI side.

diff --git a/block/blk-core.c b/block/blk-core.c
index 5fa3dd2..4e49665 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -303,15 +303,7 @@ void __blk_run_queue(struct request_queue *q)
        if (unlikely(blk_queue_stopped(q)))
                return;
 
-       /*
-        * Only recurse once to avoid overrunning the stack, let the unplug
-        * handling reinvoke the handler shortly if we already got there.
-        */
-       if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
-               q->request_fn(q);
-               queue_flag_clear(QUEUE_FLAG_REENTER, q);
-       } else
-               queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
+       q->request_fn(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -328,6 +320,7 @@ void blk_run_queue_async(struct request_queue *q)
        if (likely(!blk_queue_stopped(q)))
                queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 }
+EXPORT_SYMBOL(blk_run_queue_async);
 
 /**
  * blk_run_queue - run a single device queue
diff --git a/block/blk.h b/block/blk.h
index c9df8fc..6126346 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -22,7 +22,6 @@ void blk_rq_timed_out_timer(unsigned long data);
 void blk_delete_timer(struct request *);
 void blk_add_timer(struct request *);
 void __generic_unplug_device(struct request_queue *);
-void blk_run_queue_async(struct request_queue *q);
 
 /*
  * Internal atomic flags for request handling
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ab55c2f..e9901b8 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -411,8 +411,6 @@ static void scsi_run_queue(struct request_queue *q)
        list_splice_init(&shost->starved_list, &starved_list);
 
        while (!list_empty(&starved_list)) {
-               int flagset;
-
                /*
                 * As long as shost is accepting commands and we have
                 * starved queues, call blk_run_queue. scsi_request_fn
@@ -435,20 +433,7 @@ static void scsi_run_queue(struct request_queue *q)
                        continue;
                }
 
-               spin_unlock(shost->host_lock);
-
-               spin_lock(sdev->request_queue->queue_lock);
-               flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
-                               !test_bit(QUEUE_FLAG_REENTER,
-                                       &sdev->request_queue->queue_flags);
-               if (flagset)
-                       queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
-               __blk_run_queue(sdev->request_queue);
-               if (flagset)
-                       queue_flag_clear(QUEUE_FLAG_REENTER, 
sdev->request_queue);
-               spin_unlock(sdev->request_queue->queue_lock);
-
-               spin_lock(shost->host_lock);
+               blk_run_queue_async(sdev->request_queue);
        }
        /* put any unprocessed entries back */
        list_splice(&starved_list, &shost->starved_list);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 28c3350..815069d 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3816,28 +3816,17 @@ fail_host_msg:
 static void
 fc_bsg_goose_queue(struct fc_rport *rport)
 {
-       int flagset;
-       unsigned long flags;
-
        if (!rport->rqst_q)
                return;
 
+       /*
+        * This get/put dance makes no sense
+        */
        get_device(&rport->dev);
-
-       spin_lock_irqsave(rport->rqst_q->queue_lock, flags);
-       flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) &&
-                 !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
-       if (flagset)
-               queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q);
-       __blk_run_queue(rport->rqst_q);
-       if (flagset)
-               queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
-       spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
-
+       blk_run_queue_async(rport->rqst_q);
        put_device(&rport->dev);
 }
 
-
 /**
  * fc_bsg_rport_dispatch - process rport bsg requests and dispatch to LLDD
  * @q:         rport request queue
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cbbfd98..2ad95fa 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -388,20 +388,19 @@ struct request_queue
 #define        QUEUE_FLAG_SYNCFULL     3       /* read queue has been filled */
 #define QUEUE_FLAG_ASYNCFULL   4       /* write queue has been filled */
 #define QUEUE_FLAG_DEAD                5       /* queue being torn down */
-#define QUEUE_FLAG_REENTER     6       /* Re-entrancy avoidance */
-#define QUEUE_FLAG_ELVSWITCH   7       /* don't use elevator, just do FIFO */
-#define QUEUE_FLAG_BIDI                8       /* queue supports bidi requests 
*/
-#define QUEUE_FLAG_NOMERGES     9      /* disable merge attempts */
-#define QUEUE_FLAG_SAME_COMP   10      /* force complete on same CPU */
-#define QUEUE_FLAG_FAIL_IO     11      /* fake timeout */
-#define QUEUE_FLAG_STACKABLE   12      /* supports request stacking */
-#define QUEUE_FLAG_NONROT      13      /* non-rotational device (SSD) */
+#define QUEUE_FLAG_ELVSWITCH   6       /* don't use elevator, just do FIFO */
+#define QUEUE_FLAG_BIDI                7       /* queue supports bidi requests 
*/
+#define QUEUE_FLAG_NOMERGES     8      /* disable merge attempts */
+#define QUEUE_FLAG_SAME_COMP   9       /* force complete on same CPU */
+#define QUEUE_FLAG_FAIL_IO     10      /* fake timeout */
+#define QUEUE_FLAG_STACKABLE   11      /* supports request stacking */
+#define QUEUE_FLAG_NONROT      12      /* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
-#define QUEUE_FLAG_IO_STAT     15      /* do IO stats */
-#define QUEUE_FLAG_DISCARD     16      /* supports DISCARD */
-#define QUEUE_FLAG_NOXMERGES   17      /* No extended merges */
-#define QUEUE_FLAG_ADD_RANDOM  18      /* Contributes to random pool */
-#define QUEUE_FLAG_SECDISCARD  19      /* supports SECDISCARD */
+#define QUEUE_FLAG_IO_STAT     13      /* do IO stats */
+#define QUEUE_FLAG_DISCARD     14      /* supports DISCARD */
+#define QUEUE_FLAG_NOXMERGES   15      /* No extended merges */
+#define QUEUE_FLAG_ADD_RANDOM  16      /* Contributes to random pool */
+#define QUEUE_FLAG_SECDISCARD  17      /* supports SECDISCARD */
 
 #define QUEUE_FLAG_DEFAULT     ((1 << QUEUE_FLAG_IO_STAT) |            \
                                 (1 << QUEUE_FLAG_STACKABLE)    |       \
@@ -699,6 +698,7 @@ extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(struct request_queue *q);
 extern void __blk_run_queue(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
+extern void blk_run_queue_async(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
                           struct rq_map_data *, void __user *, unsigned long,
                           gfp_t);

-- 
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe kernel-testers" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to