>Number:         169403
>Category:       kern
>Synopsis:       CAM layer, I/O starvation, no fairness
>Confidential:   no
>Severity:       serious
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Mon Jun 25 17:50:06 UTC 2012
>Closed-Date:
>Last-Modified:
>Originator:     jerry Toung
>Release:        FreeBSD 8.1-RELEASE #0
>Organization:
Opnet
>Environment:
FreeBSD dev8 8.1-RELEASE FreeBSD 8.1-RELEASE #0: Mon Jul 19 02:36:49 UTC 2010   
  [email protected]:/usr/obj/usr/src/sys/GENERIC  amd64

>Description:
I am convinced that there is a bug in the CAM code that leads to I/O starvation.
I have already discussed this privately with some. I am now bringing this up to
the general audience to get more feedback.

My setup is that I have 1 RAID controller with 2 arrays connected to
it, da0 and da1.
The controller supports 252 tags. After boot up, camcontrol tags on
da0 and da1 shows that both devices have 252 openings each. A process
P0 writing on da0 is dormant most of the time, but would wake up with
burst of I/Os, 5000-6000 ops as reported by gstat.
A process P1 writing on da1 has a fixed data rate to da1 as reported by gstat.

The issue: When P0 generates that burst of 5000-6000 ops, the write
rate of P1 on da1 goes to 0 MB/sec for up to 8-9sec,
vfs.hirunningspace starts climbing and we get into waithirunning() or
getblk() sleep channel. BTW, raising hirunningspace has no effect on
the 0 MB/s behavior.

The first problem that I see here, is that if the sim's devq has 252
alloc_queue and send_queue, the struct cam_ed representing da0 and da1 should 
each have 126 openings and not 252. 
The second problem is that clearly, there is no I/O fairness in CAM as seen
in gstat output and da0 exclusively takes a hold of the sim/controller
until it has processed all it's I/Os (8-9 seconds). The code that does
this is at

cam/cam_xpt.c:3030
3030             && (devq->alloc_openings > 0)

and

cam/cam_xpt.c:3091
3091             && (devq->send_openings > 0)

After you've split the openings to 126 each, the tests above will always be true

I have a patch and it fixes those problems. 

da0 and da1 now both automatically get 126 openings and based on that,
extra logic implements fairness in cam/cam_xpt.c. No more 0 MB/s on
da1. This is on 8.1-RELEASE FreeBSD.
>How-To-Repeat:

>Fix:


Patch attached with submission follows:

diff -rup cam/cam_queue.c cam/cam_queue.c
--- cam/cam_queue.c     2010-06-13 19:09:06.000000000 -0700
+++ cam/cam_queue.c     2012-03-22 10:50:50.000000000 -0700
@@ -412,7 +412,7 @@ heap_down(cam_pinfo **queue_array, int i
 {
        int child;
        int parent;
-       
+
        parent = index;
        child = parent << 1;
        for (; child <= num_entries; child = parent << 1) {
diff -rup cam/cam_sim.c cam/cam_sim.c
--- cam/cam_sim.c       2010-06-13 19:09:06.000000000 -0700
+++ cam/cam_sim.c       2012-03-19 13:05:10.000000000 -0700
@@ -87,6 +87,7 @@ cam_sim_alloc(sim_action_func sim_action
        sim->refcount = 1;
        sim->devq = queue;
        sim->max_ccbs = 8;      /* Reserve for management purposes. */
+       sim->dev_count = 0;     
        sim->mtx = mtx;
        if (mtx == &Giant) {
                sim->flags |= 0;
diff -rup cam/cam_sim.h cam/cam_sim.h
--- cam/cam_sim.h       2010-06-13 19:09:06.000000000 -0700
+++ cam/cam_sim.h       2012-03-19 15:34:17.000000000 -0700
@@ -118,6 +118,8 @@ struct cam_sim {
        u_int                   max_ccbs;
        /* Current count of allocated ccbs */
        u_int                   ccb_count;
+       /* Number of peripheral drivers mapped to this sim */
+       u_int                   dev_count;      
 
 };
 
diff -rup cam/cam_xpt.c cam/cam_xpt.c
--- cam/cam_xpt.c       2010-06-13 19:09:06.000000000 -0700
+++ cam/cam_xpt.c       2012-03-29 11:41:51.000000000 -0700
@@ -303,7 +303,7 @@ xpt_schedule_dev_allocq(struct cam_eb *b
        int retval;
 
        if ((dev->drvq.entries > 0) &&
-           (dev->ccbq.devq_openings > 0) &&
+           (dev->runs_token < dev->ccbq.queue.array_size) &&
            (cam_ccbq_frozen(&dev->ccbq, CAM_PRIORITY_TO_RL(
                CAMQ_GET_PRIO(&dev->drvq))) == 0)) {
                /*
@@ -327,7 +327,7 @@ xpt_schedule_dev_sendq(struct cam_eb *bu
        int     retval;
 
        if ((dev->ccbq.queue.entries > 0) &&
-           (dev->ccbq.dev_openings > 0) &&
+           (dev->runs_token < dev->ccbq.queue.array_size) &&
            (cam_ccbq_frozen_top(&dev->ccbq) == 0)) {
                /*
                 * The priority of a device waiting for controller
@@ -973,6 +973,9 @@ xpt_add_periph(struct cam_periph *periph
        struct cam_ed *device;
        int32_t  status;
        struct periph_list *periph_head;
+       struct cam_eb *bus;
+       struct cam_et *target;
+       struct cam_ed *devptr;
 
        mtx_assert(periph->sim->mtx, MA_OWNED);
 
@@ -991,6 +994,8 @@ xpt_add_periph(struct cam_periph *periph
                status = camq_resize(&device->drvq,
                                     device->drvq.array_size + 1);
 
+               if (periph->periph_name != NULL &&  
strncmp(periph->periph_name, "da",2) ==0 )
+                       device->sim->dev_count++;
                device->generation++;
 
                SLIST_INSERT_HEAD(periph_head, periph, periph_links);
@@ -998,6 +1003,24 @@ xpt_add_periph(struct cam_periph *periph
 
        mtx_lock(&xsoftc.xpt_topo_lock);
        xsoftc.xpt_generation++;
+
+       if (device != NULL && device->sim->dev_count > 1 &&
+            (device->sim->max_dev_openings > device->sim->dev_count)) {
+               TAILQ_FOREACH(bus, &xsoftc.xpt_busses, links) {
+                       if (bus->sim != device->sim)
+                               continue;
+                       TAILQ_FOREACH(target, &bus->et_entries, links) {
+                               TAILQ_FOREACH(devptr, &target->ed_entries, 
links) {
+                               /*
+                                * The number of openings/tags supported by the 
sim (i.e controller)
+                                * is evenly distributed between all devices 
that share this sim.
+                                */ 
+                                       cam_ccbq_resize(&devptr->ccbq, 
+                                                       
(devptr->sim->max_dev_openings/devptr->sim->dev_count));
+                                }
+                        }
+                }
+        }
        mtx_unlock(&xsoftc.xpt_topo_lock);
 
        return (status);
@@ -3072,6 +3095,11 @@ xpt_run_dev_allocq(struct cam_eb *bus)
                }
 
                /* We may have more work. Attempt to reschedule. */
+               device->runs_token++;
+               if (device->runs_token >= device->ccbq.queue.array_size) {
+                       device->runs_token = 0;
+                       break;
+               }
                xpt_schedule_dev_allocq(bus, device);
        }
        devq->alloc_queue.qfrozen_cnt[0]--;
@@ -3139,7 +3167,6 @@ xpt_run_dev_sendq(struct cam_eb *bus)
                devq->send_openings--;
                devq->send_active++;
 
-               xpt_schedule_dev_sendq(bus, device);
 
                if (work_ccb && (work_ccb->ccb_h.flags & CAM_DEV_QFREEZE) != 0){
                        /*
@@ -3170,6 +3197,13 @@ xpt_run_dev_sendq(struct cam_eb *bus)
                 */
                sim = work_ccb->ccb_h.path->bus->sim;
                (*(sim->sim_action))(sim, work_ccb);
+
+               device->runs_token++;
+               if (device->runs_token >= device->ccbq.queue.array_size) {
+                       device->runs_token = 0;
+                       break;
+               }
+               xpt_schedule_dev_sendq(bus, device);
        }
        devq->send_queue.qfrozen_cnt[0]--;
 }
@@ -4285,6 +4319,7 @@ xpt_alloc_device(struct cam_eb *bus, str
                device->tag_delay_count = 0;
                device->tag_saved_openings = 0;
                device->refcount = 1;
+               device->runs_token = 0;
                callout_init_mtx(&device->callout, bus->sim->mtx, 0);
 
                /*
diff -rup cam/cam_xpt_internal.h cam/cam_xpt_internal.h
--- cam/cam_xpt_internal.h      2010-06-13 19:09:06.000000000 -0700
+++ cam/cam_xpt_internal.h      2012-03-21 13:57:45.000000000 -0700
@@ -118,6 +118,7 @@ struct cam_ed {
 #define        CAM_TAG_DELAY_COUNT             5
        u_int32_t        tag_saved_openings;
        u_int32_t        refcount;
+       u_int32_t        runs_token;
        struct callout   callout;
 };
 
diff -rup cam/scsi/scsi_da.c cam/scsi/scsi_da.c
--- cam/scsi/scsi_da.c  2010-06-13 19:09:06.000000000 -0700
+++ cam/scsi/scsi_da.c  2012-03-21 14:16:00.000000000 -0700
@@ -56,7 +56,13 @@ __FBSDID("$FreeBSD: src/sys/cam/scsi/scs
 #include <cam/cam_ccb.h>
 #include <cam/cam_periph.h>
 #include <cam/cam_xpt_periph.h>
+#include <cam/cam_queue.h>
 #include <cam/cam_sim.h>
+#include <cam/cam_xpt.h>
+#include <cam/cam_xpt_sim.h>
+#include <cam/cam_xpt_periph.h>
+#include <cam/cam_xpt_internal.h>
+#include <cam/cam_debug.h>
 
 #include <cam/scsi/scsi_message.h>
 
@@ -1102,6 +1108,26 @@ dasysctlinit(void *context, int pending)
                &softc->minimum_cmd_size, 0, dacmdsizesysctl, "I",
                "Minimum CDB size");
 
+       SYSCTL_ADD_INT(&softc->sysctl_ctx,SYSCTL_CHILDREN(softc->sysctl_tree),
+               OID_AUTO, "outstanding_cmds", CTLTYPE_INT | CTLFLAG_RD,
+               &softc->outstanding_cmds, 0, "Outstanding CDB Cmds");
+
+       SYSCTL_ADD_INT(&softc->sysctl_ctx,SYSCTL_CHILDREN(softc->sysctl_tree),
+               OID_AUTO, "ccbq_devq_openings", CTLTYPE_INT | CTLFLAG_RD,
+               &periph->path->device->ccbq.devq_openings, 0, "CCBQ Dev 
Openings");
+
+       SYSCTL_ADD_INT(&softc->sysctl_ctx,SYSCTL_CHILDREN(softc->sysctl_tree),
+               OID_AUTO, "ccbq_array_size", CTLTYPE_INT | CTLFLAG_RW,
+               &periph->path->device->ccbq.queue.array_size, 0, "CCBQ Array 
Size");
+
+       SYSCTL_ADD_INT(&softc->sysctl_ctx,SYSCTL_CHILDREN(softc->sysctl_tree),
+               OID_AUTO, "sim_ccb_count", CTLTYPE_INT | CTLFLAG_RD,
+               &periph->sim->ccb_count, 0, "SIM CCB COUNT");
+
+       SYSCTL_ADD_INT(&softc->sysctl_ctx,SYSCTL_CHILDREN(softc->sysctl_tree),
+               OID_AUTO, "sim_devq_alloc_openings", CTLTYPE_INT | CTLFLAG_RD,
+               &periph->sim->devq->alloc_openings, 0, "SIM Devq Alloc 
Openings");
+
        cam_periph_release(periph);
 }
 


>Release-Note:
>Audit-Trail:
>Unformatted:
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-bugs
To unsubscribe, send any mail to "[email protected]"

Reply via email to