On 2018-04-07 10:23 PM, Ming Lei wrote:
On Fri, Feb 09, 2018 at 09:36:39PM -0500, Douglas Gilbert wrote:
The Start Stop Unit (SSU) command takes in the order of a second to
complete on some SAS SSDs and longer on hard disks. Synchronize Cache (SC)
can also take some time. Both commands have an IMMED bit in their cdbs for
those apps that don't want to wait. This patch introduces a long delay for
those commands when the IMMED bit is clear.
Since SC is a media access command then when the fake_rw option is active,
its cdb processing is skipped and it returns immediately. The SSU command
is not altered by the setting of the fake_rw option. These actions are
not changed by this patch.

Changes since v1:
   - clear the cdb mask of SYNCHRONIZE CACHE(16) cdb in byte 1, bit 0

Changes:
   - add the SYNCHRONIZE CACHE(16) command
   - together with the existing START STOP UNIT and SYNCHRONIZE CACHE(10)
     commands process the IMMED bit in their cdbs
   - if the IMMED bit is set, return immediately
   - if the IMMED bit is clear, treat the delay parameter as having
     a unit of one second
   - in the SYNCHRONIZE CACHE processing do a bounds check

Hello Douglas,

I found this patch makes my test on scsi_debug much much slow, and
basically make scsi_debug not usable in sync IO related tests, such
as make partitions(parted), or 'dbench -s'.

For example:

1) scsi_debug:
   modprobe scsi_debug dev_size_mb=1024 max_queue=1

2) parted
- time taken by the following commands is increased from 1.3sec to 22.3 sec

        parted -m -s -a none $DISK mkpart primary 0MB 32MB &&
        parted -m -s -a none $DISK mkpart primary 32MB $DEV_SIZE

3) dbench(dbench -t 20 -s 64)
- write throughput is decreased from 38MB to 1.89MB

Definitely it doesn't simulate an actual scsi device from performance
view.

IMO, this kind of simulation by completing SYNCHRONIZE_CACHE in unit of
second shouldn't be good since the actual completion time depends if
there is data cached in drive, or how much data is cached.

So is it possible to remove the very very slow response by doing that
only for 1/nth times?  For example, do long delay for every 10 or 20
SYNCHRONIZE_CACHE commands.

Or other approaches to avoid this issue?

Hi,
Could you try the attached patch. It splits the LONG_DELAY in two, one
for SSU, the other for SC. The SC one is now 1/10 what it was. Also
if nothing is written since the previous SC then SC returns immediately.
Similarly SSU only delays when the start-stop state changes.

Doug Gilbert

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 26ce022dd6f4..9a36af481be9 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -234,11 +234,13 @@ static const char *sdebug_version_date = "20180128";
 #define F_INV_OP		0x200
 #define F_FAKE_RW		0x400
 #define F_M_ACCESS		0x800	/* media access */
-#define F_LONG_DELAY		0x1000
+#define F_SSU_DELAY		0x1000
+#define F_SYNC_DELAY		0x2000
 
 #define FF_RESPOND (F_RL_WLUN_OK | F_SKIP_UA | F_DELAY_OVERR)
 #define FF_MEDIA_IO (F_M_ACCESS | F_FAKE_RW)
 #define FF_SA (F_SA_HIGH | F_SA_LOW)
+#define F_LONG_DELAY		(F_SSU_DELAY | F_SYNC_DELAY)
 
 #define SDEBUG_MAX_PARTS 4
 
@@ -510,7 +512,7 @@ static const struct opcode_info_t release_iarr[] = {
 };
 
 static const struct opcode_info_t sync_cache_iarr[] = {
-	{0, 0x91, 0, F_LONG_DELAY | F_M_ACCESS, resp_sync_cache, NULL,
+	{0, 0x91, 0, F_SYNC_DELAY | F_M_ACCESS, resp_sync_cache, NULL,
 	    {16,  0x6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 	     0xff, 0xff, 0xff, 0xff, 0x3f, 0xc7} },	/* SYNC_CACHE (16) */
 };
@@ -553,7 +555,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = {
 	    resp_write_dt0, write_iarr,			/* WRITE(16) */
 		{16,  0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 		 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7} },
-	{0, 0x1b, 0, F_LONG_DELAY, resp_start_stop, NULL,/* START STOP UNIT */
+	{0, 0x1b, 0, F_SSU_DELAY, resp_start_stop, NULL,/* START STOP UNIT */
 	    {6,  0x1, 0, 0xf, 0xf7, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
 	{ARRAY_SIZE(sa_in_16_iarr), 0x9e, 0x10, F_SA_LOW | F_D_IN,
 	    resp_readcap16, sa_in_16_iarr, /* SA_IN(16), READ CAPACITY(16) */
@@ -606,7 +608,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = {
 	    resp_write_same_10, write_same_iarr,	/* WRITE SAME(10) */
 		{10,  0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0,
 		 0, 0, 0, 0, 0} },
-	{ARRAY_SIZE(sync_cache_iarr), 0x35, 0, F_LONG_DELAY | F_M_ACCESS,
+	{ARRAY_SIZE(sync_cache_iarr), 0x35, 0, F_SYNC_DELAY | F_M_ACCESS,
 	    resp_sync_cache, sync_cache_iarr,
 	    {10,  0x7, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0, 0,
 	     0, 0, 0, 0} },			/* SYNC_CACHE (10) */
@@ -667,6 +669,7 @@ static bool sdebug_strict = DEF_STRICT;
 static bool sdebug_any_injecting_opt;
 static bool sdebug_verbose;
 static bool have_dif_prot;
+static bool write_since_sync;
 static bool sdebug_statistics = DEF_STATISTICS;
 
 static unsigned int sdebug_store_sectors;
@@ -1607,6 +1610,7 @@ static int resp_start_stop(struct scsi_cmnd *scp,
 {
 	unsigned char *cmd = scp->cmnd;
 	int power_cond, stop;
+	bool changing;
 
 	power_cond = (cmd[4] & 0xf0) >> 4;
 	if (power_cond) {
@@ -1614,8 +1618,12 @@ static int resp_start_stop(struct scsi_cmnd *scp,
 		return check_condition_result;
 	}
 	stop = !(cmd[4] & 1);
+	changing = atomic_read(&devip->stopped) == !stop;
 	atomic_xchg(&devip->stopped, stop);
-	return (cmd[1] & 0x1) ? SDEG_RES_IMMED_MASK : 0; /* check IMMED bit */
+	if (!changing || cmd[1] & 0x1)  /* state unchanged or IMMED set */
+		return SDEG_RES_IMMED_MASK;
+	else
+		return 0;
 }
 
 static sector_t get_sdebug_capacity(void)
@@ -2473,6 +2481,7 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 sg_skip, u64 lba,
 	if (do_write) {
 		sdb = scsi_out(scmd);
 		dir = DMA_TO_DEVICE;
+		write_since_sync = true;
 	} else {
 		sdb = scsi_in(scmd);
 		dir = DMA_FROM_DEVICE;
@@ -3598,7 +3607,12 @@ static int resp_sync_cache(struct scsi_cmnd *scp,
 		mk_sense_buffer(scp, ILLEGAL_REQUEST, LBA_OUT_OF_RANGE, 0);
 		return check_condition_result;
 	}
-	return (cmd[1] & 0x2) ? SDEG_RES_IMMED_MASK : 0; /* check IMMED bit */
+	if (!write_since_sync || cmd[1] & 0x2) {
+		return SDEG_RES_IMMED_MASK;
+	} else {	/* delay if write_since_sync and IMMED clear */
+		write_since_sync = false;
+		return 0;
+	}
 }
 
 #define RL_BUCKET_ELEMS 8
@@ -5777,13 +5791,14 @@ static int scsi_debug_queuecommand(struct Scsi_Host *shost,
 		return schedule_resp(scp, devip, errsts, pfp, 0, 0);
 	else if ((sdebug_jdelay || sdebug_ndelay) && (flags & F_LONG_DELAY)) {
 		/*
-		 * If any delay is active, want F_LONG_DELAY to be at least 1
+		 * If any delay is active, for F_SSU_DELAY want at least 1
 		 * second and if sdebug_jdelay>0 want a long delay of that
-		 * many seconds.
+		 * many seconds; for F_SYNC_DELAY want 1/10 of that.
 		 */
 		int jdelay = (sdebug_jdelay < 2) ? 1 : sdebug_jdelay;
+		int denom = (flags & F_SYNC_DELAY) ? 10 : 1;
 
-		jdelay = mult_frac(USER_HZ * jdelay, HZ, USER_HZ);
+		jdelay = mult_frac(USER_HZ * jdelay, HZ, denom * USER_HZ);
 		return schedule_resp(scp, devip, errsts, pfp, jdelay, 0);
 	} else
 		return schedule_resp(scp, devip, errsts, pfp, sdebug_jdelay,

Reply via email to