Re: Dell R310 - H200 Raid performance problem

2011-03-02 Thread Okan Demirmen
On Sun 2011.02.20 at 10:30 -0500, Okan Demirmen wrote:
 On Sun 2011.02.20 at 13:28 +0100, Mark Kettenis wrote:
   Date: Sun, 20 Feb 2011 07:03:25 -0500
   From: Kenneth R Westerback kwesterb...@rogers.com
   
   On Sun, Feb 20, 2011 at 12:39:06PM +0100, Mark Kettenis wrote:
 Date: Sun, 20 Feb 2011 19:54:21 +1000
 From: David Gwynne l...@animata.net

  how to manipulate write cache policy?
 
 the lsi firmwares dont implement handling of the mod page changes
 unfortunately. you could call the ioctl this implements yourself
 though from userland.

David, while I think that implementing the cache manipulation ioctls
for mpii(4) is a good idea, there is a problem here.  We don't have a
tool in base that actually issues those ioctls.  And unless I'm
misreading the diff, this still leaves the cache disabled on the
stupid Dell.
   
   DIOCSCACHE is called in sdattach() to enable write cache for all
   disks that DIOCGCACHE reports as having write cache disabled. Or are
   you concerned that we have no way to manipulate it from userland
   if/when the default needs to be modified?
  
  Ah, that's the bit I was missing.  A userland tool to display and
  manipulate the cache settings would still be good though.
  Functionality should probably be added to bioctl(8).  A bit
  unfortunate that both the -c and -C options are already taken.
 
 Ah, I had a diff for bioctl (enable/disable WCE/RCD) based on dlg's
 sample, but I think marco wanted more of a policy of when to do WCE/RCD
 rather than a switch - I'll send it along when I get home later this
 week.

I'm not certain this is wanted, but I said I would forward along this
very simplisitc patch, so here it is.  If something like this is wanted,
it can be re-worked to take multiple args to -e and such, but again,
only if this is deemed necessary in a userland tool outside of scsi(8).

Index: bioctl.8
===
RCS file: /cvs/src/sbin/bioctl/bioctl.8,v
retrieving revision 1.84
diff -u -p -r1.84 bioctl.8
--- bioctl.822 Dec 2010 16:25:32 -  1.84
+++ bioctl.82 Mar 2011 10:44:23 -
@@ -35,6 +35,7 @@
 .Op Fl hiqv
 .Op Fl a Ar alarm-function
 .Op Fl b Ar channel:target[.lun]
+.Op Fl e Ar flag
 .Op Fl H Ar channel:target[.lun]
 .Op Fl R Ar device \*(Ba channel:target[.lun]
 .Op Fl u Ar channel:target[.lun]
@@ -128,6 +129,24 @@ digits to four or less.
 .It Fl i
 Enumerate the selected RAID devices.
 This is the default if no other option is given.
+.It Fl e Ar flag
+Pass
+.Ar flag
+to
+.Nm .
+May be one of:
+.Bl -tag -width disable -compact
+.It Ar q
+Query the read/write cache status.
+.It Ar R
+Enable the read cache.
+.It Ar r
+Disable the read cache.
+.It Ar W
+Enable the write cache.
+.It Ar w
+Disable the write cache.
+.El
 .It Fl q
 Show vendor, product, revision, and serial number for the given disk.
 .It Fl R Ar device \*(Ba channel:target[.lun]
Index: bioctl.c
===
RCS file: /cvs/src/sbin/bioctl/bioctl.c,v
retrieving revision 1.98
diff -u -p -r1.98 bioctl.c
--- bioctl.c1 Dec 2010 19:40:18 -   1.98
+++ bioctl.c2 Mar 2011 10:44:23 -
@@ -77,6 +77,7 @@ void  bio_changepass(char *);
 u_int32_t  bio_createflags(char *);
 char   *bio_vis(char *);
 void   bio_diskinq(char *);
+void   bio_cache(char *, char *);
 
 intdevh = -1;
 inthuman;
@@ -97,17 +98,17 @@ main(int argc, char *argv[])
char*devicename = NULL;
char*realname = NULL, *al_arg = NULL;
char*bl_arg = NULL, *dev_list = NULL;
-   char*key_disk = NULL;
+   char*key_disk = NULL, *ca_arg = NULL;
const char  *errstr;
int ch, rv, blink = 0, changepass = 0, diskinq = 0;
-   int ss_func = 0;
+   int ss_func = 0, diskcache = 0;
u_int16_t   cr_level = 0;
int biodev = 0;
 
if (argc  2)
usage();
 
-   while ((ch = getopt(argc, argv, a:b:C:c:dH:hik:l:Pp:qr:R:svu:)) !=
+   while ((ch = getopt(argc, argv, a:b:C:c:de:H:hik:l:Pp:qr:R:svu:)) !=
-1) {
switch (ch) {
case 'a': /* alarm */
@@ -133,6 +134,10 @@ main(int argc, char *argv[])
/* delete volume */
func |= BIOC_DELETERAID;
break;
+   case 'e': /* cache */
+   diskcache = 1;
+   ca_arg = optarg;
+   break;
case 'u': /* unblink */
func |= BIOC_BLINK;
blink = BIOC_SBUNBLINK;
@@ -219,6 +224,8 @@ main(int 

Re: Dell R310 - H200 Raid performance problem

2011-03-02 Thread Mike Belopuhov
On Wed, Mar 2, 2011 at 11:54 AM, Okan Demirmen o...@demirmen.com wrote:
 I'm not certain this is wanted, but I said I would forward along this
 very simplisitc patch, so here it is.  If something like this is wanted,
 it can be re-worked to take multiple args to -e and such, but again,
 only if this is deemed necessary in a userland tool outside of scsi(8).


i think this is pointless.  if you have an ioctl implemented in the
driver that enables cache, then sd(4) itself will enable it for you.
if your driver doesn't implement those ioctls it gives you a false
idea that you can turn it on which is not true obviously.

 Index: bioctl.8
 ===
 RCS file: /cvs/src/sbin/bioctl/bioctl.8,v
 retrieving revision 1.84
 diff -u -p -r1.84 bioctl.8
 --- bioctl.822 Dec 2010 16:25:32 -  1.84
 +++ bioctl.82 Mar 2011 10:44:23 -
 @@ -35,6 +35,7 @@
  .Op Fl hiqv
  .Op Fl a Ar alarm-function
  .Op Fl b Ar channel:target[.lun]
 +.Op Fl e Ar flag
  .Op Fl H Ar channel:target[.lun]
  .Op Fl R Ar device \*(Ba channel:target[.lun]
  .Op Fl u Ar channel:target[.lun]
 @@ -128,6 +129,24 @@ digits to four or less.
  .It Fl i
  Enumerate the selected RAID devices.
  This is the default if no other option is given.
 +.It Fl e Ar flag
 +Pass
 +.Ar flag
 +to
 +.Nm .
 +May be one of:
 +.Bl -tag -width disable -compact
 +.It Ar q
 +Query the read/write cache status.
 +.It Ar R
 +Enable the read cache.
 +.It Ar r
 +Disable the read cache.
 +.It Ar W
 +Enable the write cache.
 +.It Ar w
 +Disable the write cache.
 +.El
  .It Fl q
  Show vendor, product, revision, and serial number for the given disk.
  .It Fl R Ar device \*(Ba channel:target[.lun]



Re: Dell R310 - H200 Raid performance problem

2011-03-02 Thread Stuart Henderson
On 2011/03/02 12:09, Mike Belopuhov wrote:
 On Wed, Mar 2, 2011 at 11:54 AM, Okan Demirmen o...@demirmen.com wrote:
  I'm not certain this is wanted, but I said I would forward along this
  very simplisitc patch, so here it is.  If something like this is wanted,
  it can be re-worked to take multiple args to -e and such, but again,
  only if this is deemed necessary in a userland tool outside of scsi(8).
 
 
 i think this is pointless.  if you have an ioctl implemented in the
 driver that enables cache, then sd(4) itself will enable it for you.
 if your driver doesn't implement those ioctls it gives you a false
 idea that you can turn it on which is not true obviously.

I guess some people might be thinking users may want to disable
this cache for safety or something.  Those people might reconsider
if they actually try one of these systems - it isn't just a bit
slower, the system really is unusable without it.



Re: Dell R310 - H200 Raid performance problem

2011-03-02 Thread Marco Peereboom
I really think this heuristic belongs in the kernel.  I think there is a
desire to make the policy a knob (the old, I prefer slow and safe over
fast and dangerous; well use a ups! they don't! debate).

So instead of bioctl I think we need a sysctl, for example hw.diskcache,
that by default is enabled which is the drive manufacturers suggested
setting.  Then if so desired one can turn it off.

Or do people think this would be too large a hammer and would like to
have a more granular control?

On Wed, Mar 02, 2011 at 05:54:23AM -0500, Okan Demirmen wrote:
 On Sun 2011.02.20 at 10:30 -0500, Okan Demirmen wrote:
  On Sun 2011.02.20 at 13:28 +0100, Mark Kettenis wrote:
Date: Sun, 20 Feb 2011 07:03:25 -0500
From: Kenneth R Westerback kwesterb...@rogers.com

On Sun, Feb 20, 2011 at 12:39:06PM +0100, Mark Kettenis wrote:
  Date: Sun, 20 Feb 2011 19:54:21 +1000
  From: David Gwynne l...@animata.net
 
   how to manipulate write cache policy?
  
  the lsi firmwares dont implement handling of the mod page changes
  unfortunately. you could call the ioctl this implements yourself
  though from userland.
 
 David, while I think that implementing the cache manipulation ioctls
 for mpii(4) is a good idea, there is a problem here.  We don't have a
 tool in base that actually issues those ioctls.  And unless I'm
 misreading the diff, this still leaves the cache disabled on the
 stupid Dell.

DIOCSCACHE is called in sdattach() to enable write cache for all
disks that DIOCGCACHE reports as having write cache disabled. Or are
you concerned that we have no way to manipulate it from userland
if/when the default needs to be modified?
   
   Ah, that's the bit I was missing.  A userland tool to display and
   manipulate the cache settings would still be good though.
   Functionality should probably be added to bioctl(8).  A bit
   unfortunate that both the -c and -C options are already taken.
  
  Ah, I had a diff for bioctl (enable/disable WCE/RCD) based on dlg's
  sample, but I think marco wanted more of a policy of when to do WCE/RCD
  rather than a switch - I'll send it along when I get home later this
  week.
 
 I'm not certain this is wanted, but I said I would forward along this
 very simplisitc patch, so here it is.  If something like this is wanted,
 it can be re-worked to take multiple args to -e and such, but again,
 only if this is deemed necessary in a userland tool outside of scsi(8).
 
 Index: bioctl.8
 ===
 RCS file: /cvs/src/sbin/bioctl/bioctl.8,v
 retrieving revision 1.84
 diff -u -p -r1.84 bioctl.8
 --- bioctl.8  22 Dec 2010 16:25:32 -  1.84
 +++ bioctl.8  2 Mar 2011 10:44:23 -
 @@ -35,6 +35,7 @@
  .Op Fl hiqv
  .Op Fl a Ar alarm-function
  .Op Fl b Ar channel:target[.lun]
 +.Op Fl e Ar flag
  .Op Fl H Ar channel:target[.lun]
  .Op Fl R Ar device \*(Ba channel:target[.lun]
  .Op Fl u Ar channel:target[.lun]
 @@ -128,6 +129,24 @@ digits to four or less.
  .It Fl i
  Enumerate the selected RAID devices.
  This is the default if no other option is given.
 +.It Fl e Ar flag
 +Pass
 +.Ar flag
 +to
 +.Nm .
 +May be one of:
 +.Bl -tag -width disable -compact
 +.It Ar q
 +Query the read/write cache status.
 +.It Ar R
 +Enable the read cache.
 +.It Ar r
 +Disable the read cache.
 +.It Ar W
 +Enable the write cache.
 +.It Ar w
 +Disable the write cache.
 +.El
  .It Fl q
  Show vendor, product, revision, and serial number for the given disk.
  .It Fl R Ar device \*(Ba channel:target[.lun]
 Index: bioctl.c
 ===
 RCS file: /cvs/src/sbin/bioctl/bioctl.c,v
 retrieving revision 1.98
 diff -u -p -r1.98 bioctl.c
 --- bioctl.c  1 Dec 2010 19:40:18 -   1.98
 +++ bioctl.c  2 Mar 2011 10:44:23 -
 @@ -77,6 +77,7 @@ voidbio_changepass(char *);
  u_int32_tbio_createflags(char *);
  char *bio_vis(char *);
  void bio_diskinq(char *);
 +void bio_cache(char *, char *);
  
  int  devh = -1;
  int  human;
 @@ -97,17 +98,17 @@ main(int argc, char *argv[])
   char*devicename = NULL;
   char*realname = NULL, *al_arg = NULL;
   char*bl_arg = NULL, *dev_list = NULL;
 - char*key_disk = NULL;
 + char*key_disk = NULL, *ca_arg = NULL;
   const char  *errstr;
   int ch, rv, blink = 0, changepass = 0, diskinq = 0;
 - int ss_func = 0;
 + int ss_func = 0, diskcache = 0;
   u_int16_t   cr_level = 0;
   int biodev = 0;
  
   if (argc  2)
   usage();
  
 - while ((ch = getopt(argc, argv, a:b:C:c:dH:hik:l:Pp:qr:R:svu:)) !=
 + while ((ch = 

Re: Dell R310 - H200 Raid performance problem

2011-03-02 Thread Mark Kettenis
 Date: Wed, 2 Mar 2011 12:09:01 +0100
 From: Mike Belopuhov m...@crypt.org.ru
 
 On Wed, Mar 2, 2011 at 11:54 AM, Okan Demirmen o...@demirmen.com wrote:
  I'm not certain this is wanted, but I said I would forward along this
  very simplisitc patch, so here it is.  If something like this is wanted,
  it can be re-worked to take multiple args to -e and such, but again,
  only if this is deemed necessary in a userland tool outside of scsi(8).
 
 
 i think this is pointless.  if you have an ioctl implemented in the
 driver that enables cache, then sd(4) itself will enable it for you.
 if your driver doesn't implement those ioctls it gives you a false
 idea that you can turn it on which is not true obviously.

Well, if sd(4) enables the cache by default, people actually might
want to disable the cache.  There are valid reasons for running with
write caches disabled, especially with RAID controllers that don't
have a battery backup.  And what is the point of having an ioctl if
its only supposed to be used internally by the kernel?

Also, if the ioctls aren't implemented they will fail, so bioctl(8)
presumably prints an error message in that case.

  Index: bioctl.8
  ===
  RCS file: /cvs/src/sbin/bioctl/bioctl.8,v
  retrieving revision 1.84
  diff -u -p -r1.84 bioctl.8
  --- bioctl.822 Dec 2010 16:25:32 -  1.84
  +++ bioctl.82 Mar 2011 10:44:23 -
  @@ -35,6 +35,7 @@
   .Op Fl hiqv
   .Op Fl a Ar alarm-function
   .Op Fl b Ar channel:target[.lun]
  +.Op Fl e Ar flag
   .Op Fl H Ar channel:target[.lun]
   .Op Fl R Ar device \*(Ba channel:target[.lun]
   .Op Fl u Ar channel:target[.lun]
  @@ -128,6 +129,24 @@ digits to four or less.
   .It Fl i
   Enumerate the selected RAID devices.
   This is the default if no other option is given.
  +.It Fl e Ar flag
  +Pass
  +.Ar flag
  +to
  +.Nm .
  +May be one of:
  +.Bl -tag -width disable -compact
  +.It Ar q
  +Query the read/write cache status.
  +.It Ar R
  +Enable the read cache.
  +.It Ar r
  +Disable the read cache.
  +.It Ar W
  +Enable the write cache.
  +.It Ar w
  +Disable the write cache.
  +.El
   .It Fl q
   Show vendor, product, revision, and serial number for the given disk.
   .It Fl R Ar device \*(Ba channel:target[.lun]



Re: Dell R310 - H200 Raid performance problem

2011-03-02 Thread Theo de Raadt
Where will this bioctl call be done from?

From /etc/rc.local after fsck has run and spent ages because the disks
are not cached?

I don't understand what the purpose is of giving an option that makes
disks slow.  What's the point?

Why does the kernel just always try to make it best?



Re: Dell R310 - H200 Raid performance problem

2011-02-20 Thread David Gwynne
i believe the diff below should work out of the box. it pulls in
all mikeb's fixes.

On Fri, Feb 18, 2011 at 07:54:09PM +0100, ??ukasz Czarniecki wrote:
 With following Mike's suggestions it worked.
 
 
 # scsi -f /dev/rsd0c -m 8
 IC:  0
 ABPF:  0
 CAP:  0
 DISC:  0
 SIZE:  0
 WCE:  1
 MF:  0
 RCD:  0
 Demand Retention Priority:  0
 Write Retention Priority:  0
 Disable Pre-fetch Transfer Length:  65535
 Minimum Pre-fetch:  0
 Maximum Pre-fetch:  65280
 Maximum Pre-fetch Ceiling:  65535
 FSW:  0
 LBCSS:  0
 DRA:  0
 Vendor-specific:  0
 NV_DIS:  0
 Number of Cache Segments:  15
 Cache Segment Size:  0
 
 how to manipulate write cache policy?

the lsi firmwares dont implement handling of the mod page changes
unfortunately. you could call the ioctl this implements yourself
though from userland.

Index: mpii.c
===
RCS file: /cvs/src/sys/dev/pci/mpii.c,v
retrieving revision 1.37
diff -u -p -r1.37 mpii.c
--- mpii.c  29 Dec 2010 03:55:09 -  1.37
+++ mpii.c  20 Feb 2011 09:18:58 -
@@ -29,6 +29,7 @@
 #include sys/kernel.h
 #include sys/rwlock.h
 #include sys/sensors.h
+#include sys/dkio.h
 #include sys/tree.h
 
 #include machine/bus.h
@@ -981,6 +982,51 @@ struct mpii_msg_sas_oper_reply {
u_int32_t   ioc_loginfo;
 } __packed;
 
+struct mpii_msg_raid_action_request {
+   u_int8_taction;
+#define MPII_RAID_ACTION_CHANGE_VOL_WRITE_CACHE(0x17)
+   u_int8_treserved1;
+   u_int8_tchain_offset;
+   u_int8_tfunction;
+
+   u_int16_t   vol_dev_handle;
+   u_int8_tphys_disk_num;
+   u_int8_tmsg_flags;
+
+   u_int8_tvp_id;
+   u_int8_tvf_if;
+   u_int16_t   reserved2;
+
+   u_int32_t   reserved3;
+
+   u_int32_t   action_data;
+#define MPII_RAID_VOL_WRITE_CACHE_MASK (0x03)
+#define MPII_RAID_VOL_WRITE_CACHE_DISABLE  (0x01)
+#define MPII_RAID_VOL_WRITE_CACHE_ENABLE   (0x02)
+
+   struct mpii_sge action_sge;
+} __packed;
+
+struct mpii_msg_raid_action_reply {
+   u_int8_taction;
+   u_int8_treserved1;
+   u_int8_tchain_offset;
+   u_int8_tfunction;
+
+   u_int16_t   vol_dev_handle;
+   u_int8_tphys_disk_num;
+   u_int8_tmsg_flags;
+
+   u_int8_tvp_id;
+   u_int8_tvf_if;
+   u_int16_t   reserved2;
+
+   u_int16_t   reserved3;
+   u_int16_t   ioc_status;
+
+   u_int32_t   action_data[5];
+} __packed;
+
 struct mpii_cfg_hdr {
u_int8_tpage_version;
u_int8_tpage_length;
@@ -1256,6 +1302,11 @@ struct mpii_cfg_raid_vol_pg0 {
 #define MPII_CFG_RAID_VOL_0_STATUS_RESYNC  (116)
 
u_int16_t   volume_settings;
+#define MPII_CFG_RAID_VOL_0_SETTINGS_CACHE_MASK(0x30)
+#define MPII_CFG_RAID_VOL_0_SETTINGS_CACHE_UNCHANGED   (0x00)
+#define MPII_CFG_RAID_VOL_0_SETTINGS_CACHE_DISABLED(0x10)
+#define MPII_CFG_RAID_VOL_0_SETTINGS_CACHE_ENABLED (0x20)
+
u_int8_thot_spare_pool;
u_int8_treserved1;
 
@@ -1972,6 +2023,8 @@ int   mpii_req_cfg_page(struct mpii_softc
 
 intmpii_get_ioc_pg8(struct mpii_softc *);
 
+intmpii_ioctl_cache(struct scsi_link *, u_long, struct dk_cache *);
+
 #if NBIO  0
 intmpii_ioctl(struct device *, u_long, caddr_t);
 intmpii_ioctl_inq(struct mpii_softc *, struct bioc_inq *);
@@ -4650,19 +4703,123 @@ mpii_scsi_cmd_done(struct mpii_ccb *ccb)
 
mpii_push_reply(sc, ccb-ccb_rcb);
scsi_done(xs);
-}
+}
 
 int
 mpii_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag)
 {
struct mpii_softc   *sc = (struct mpii_softc *)link-adapter_softc;
+   struct mpii_device  *dev = sc-sc_devs[link-target];
 
DNPRINTF(MPII_D_IOCTL, %s: mpii_scsi_ioctl\n, DEVNAME(sc));
 
-   if (sc-sc_ioctl)
-   return (sc-sc_ioctl(link-adapter_softc, cmd, addr));
-   else
-   return (ENOTTY);
+   switch (cmd) {
+   case DIOCGCACHE:
+   case DIOCSCACHE:
+   if (dev != NULL  ISSET(dev-flags, MPII_DF_VOLUME)) {
+   return (mpii_ioctl_cache(link, cmd,
+   (struct dk_cache *)addr));
+   }
+   break;
+
+   default:
+   if (sc-sc_ioctl)
+   return (sc-sc_ioctl(link-adapter_softc, cmd, addr));
+
+   break;
+   }
+
+   return (ENOTTY);
+}
+
+int
+mpii_ioctl_cache(struct scsi_link *link, u_long cmd, struct dk_cache *dc)
+{
+   struct mpii_softc *sc = (struct mpii_softc *)link-adapter_softc;
+   struct mpii_device *dev = sc-sc_devs[link-target];
+   struct mpii_cfg_raid_vol_pg0 *vpg;
+   struct 

Re: Dell R310 - H200 Raid performance problem

2011-02-20 Thread Mark Kettenis
 Date: Sun, 20 Feb 2011 19:54:21 +1000
 From: David Gwynne l...@animata.net

  how to manipulate write cache policy?
 
 the lsi firmwares dont implement handling of the mod page changes
 unfortunately. you could call the ioctl this implements yourself
 though from userland.

David, while I think that implementing the cache manipulation ioctls
for mpii(4) is a good idea, there is a problem here.  We don't have a
tool in base that actually issues those ioctls.  And unless I'm
misreading the diff, this still leaves the cache disabled on the
stupid Dell.

 Index: mpii.c
 ===
 RCS file: /cvs/src/sys/dev/pci/mpii.c,v
 retrieving revision 1.37
 diff -u -p -r1.37 mpii.c
 --- mpii.c29 Dec 2010 03:55:09 -  1.37
 +++ mpii.c20 Feb 2011 09:18:58 -
 @@ -29,6 +29,7 @@
  #include sys/kernel.h
  #include sys/rwlock.h
  #include sys/sensors.h
 +#include sys/dkio.h
  #include sys/tree.h
  
  #include machine/bus.h
 @@ -981,6 +982,51 @@ struct mpii_msg_sas_oper_reply {
   u_int32_t   ioc_loginfo;
  } __packed;
  
 +struct mpii_msg_raid_action_request {
 + u_int8_taction;
 +#define MPII_RAID_ACTION_CHANGE_VOL_WRITE_CACHE  (0x17)
 + u_int8_treserved1;
 + u_int8_tchain_offset;
 + u_int8_tfunction;
 +
 + u_int16_t   vol_dev_handle;
 + u_int8_tphys_disk_num;
 + u_int8_tmsg_flags;
 +
 + u_int8_tvp_id;
 + u_int8_tvf_if;
 + u_int16_t   reserved2;
 +
 + u_int32_t   reserved3;
 +
 + u_int32_t   action_data;
 +#define MPII_RAID_VOL_WRITE_CACHE_MASK   (0x03)
 +#define MPII_RAID_VOL_WRITE_CACHE_DISABLE(0x01)
 +#define MPII_RAID_VOL_WRITE_CACHE_ENABLE (0x02)
 +
 + struct mpii_sge action_sge;
 +} __packed;
 +
 +struct mpii_msg_raid_action_reply {
 + u_int8_taction;
 + u_int8_treserved1;
 + u_int8_tchain_offset;
 + u_int8_tfunction;
 +
 + u_int16_t   vol_dev_handle;
 + u_int8_tphys_disk_num;
 + u_int8_tmsg_flags;
 +
 + u_int8_tvp_id;
 + u_int8_tvf_if;
 + u_int16_t   reserved2;
 +
 + u_int16_t   reserved3;
 + u_int16_t   ioc_status;
 +
 + u_int32_t   action_data[5];
 +} __packed;
 +
  struct mpii_cfg_hdr {
   u_int8_tpage_version;
   u_int8_tpage_length;
 @@ -1256,6 +1302,11 @@ struct mpii_cfg_raid_vol_pg0 {
  #define MPII_CFG_RAID_VOL_0_STATUS_RESYNC(116)
  
   u_int16_t   volume_settings;
 +#define MPII_CFG_RAID_VOL_0_SETTINGS_CACHE_MASK  (0x30)
 +#define MPII_CFG_RAID_VOL_0_SETTINGS_CACHE_UNCHANGED (0x00)
 +#define MPII_CFG_RAID_VOL_0_SETTINGS_CACHE_DISABLED  (0x10)
 +#define MPII_CFG_RAID_VOL_0_SETTINGS_CACHE_ENABLED   (0x20)
 +
   u_int8_thot_spare_pool;
   u_int8_treserved1;
  
 @@ -1972,6 +2023,8 @@ int mpii_req_cfg_page(struct mpii_softc
  
  int  mpii_get_ioc_pg8(struct mpii_softc *);
  
 +int  mpii_ioctl_cache(struct scsi_link *, u_long, struct dk_cache *);
 +
  #if NBIO  0
  int  mpii_ioctl(struct device *, u_long, caddr_t);
  int  mpii_ioctl_inq(struct mpii_softc *, struct bioc_inq *);
 @@ -4650,19 +4703,123 @@ mpii_scsi_cmd_done(struct mpii_ccb *ccb)
  
   mpii_push_reply(sc, ccb-ccb_rcb);
   scsi_done(xs);
 -}
 +}

Looks like you're introducing spurious whitespace here.

  int
  mpii_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag)
  {
   struct mpii_softc   *sc = (struct mpii_softc *)link-adapter_softc;
 + struct mpii_device  *dev = sc-sc_devs[link-target];
  
   DNPRINTF(MPII_D_IOCTL, %s: mpii_scsi_ioctl\n, DEVNAME(sc));
  
 - if (sc-sc_ioctl)
 - return (sc-sc_ioctl(link-adapter_softc, cmd, addr));
 - else
 - return (ENOTTY);
 + switch (cmd) {
 + case DIOCGCACHE:
 + case DIOCSCACHE:
 + if (dev != NULL  ISSET(dev-flags, MPII_DF_VOLUME)) {
 + return (mpii_ioctl_cache(link, cmd,
 + (struct dk_cache *)addr));
 + }
 + break;
 +
 + default:
 + if (sc-sc_ioctl)
 + return (sc-sc_ioctl(link-adapter_softc, cmd, addr));
 +
 + break;
 + }
 +
 + return (ENOTTY);
 +}
 +
 +int
 +mpii_ioctl_cache(struct scsi_link *link, u_long cmd, struct dk_cache *dc)
 +{
 + struct mpii_softc *sc = (struct mpii_softc *)link-adapter_softc;
 + struct mpii_device *dev = sc-sc_devs[link-target];
 + struct mpii_cfg_raid_vol_pg0 *vpg;
 + struct mpii_msg_raid_action_request *req;
 + struct mpii_msg_raid_action_reply *rep;
 + struct mpii_cfg_hdr hdr;
 + struct mpii_ccb *ccb;
 + u_int32_t addr = MPII_CFG_RAID_VOL_ADDR_HANDLE | 

Re: Dell R310 - H200 Raid performance problem

2011-02-20 Thread Mark Kettenis
 Date: Sun, 20 Feb 2011 07:03:25 -0500
 From: Kenneth R Westerback kwesterb...@rogers.com
 
 On Sun, Feb 20, 2011 at 12:39:06PM +0100, Mark Kettenis wrote:
   Date: Sun, 20 Feb 2011 19:54:21 +1000
   From: David Gwynne l...@animata.net
  
how to manipulate write cache policy?
   
   the lsi firmwares dont implement handling of the mod page changes
   unfortunately. you could call the ioctl this implements yourself
   though from userland.
  
  David, while I think that implementing the cache manipulation ioctls
  for mpii(4) is a good idea, there is a problem here.  We don't have a
  tool in base that actually issues those ioctls.  And unless I'm
  misreading the diff, this still leaves the cache disabled on the
  stupid Dell.
 
 DIOCSCACHE is called in sdattach() to enable write cache for all
 disks that DIOCGCACHE reports as having write cache disabled. Or are
 you concerned that we have no way to manipulate it from userland
 if/when the default needs to be modified?

Ah, that's the bit I was missing.  A userland tool to display and
manipulate the cache settings would still be good though.
Functionality should probably be added to bioctl(8).  A bit
unfortunate that both the -c and -C options are already taken.



Re: Dell R310 - H200 Raid performance problem

2011-02-20 Thread Okan Demirmen
On Sun 2011.02.20 at 13:28 +0100, Mark Kettenis wrote:
  Date: Sun, 20 Feb 2011 07:03:25 -0500
  From: Kenneth R Westerback kwesterb...@rogers.com
  
  On Sun, Feb 20, 2011 at 12:39:06PM +0100, Mark Kettenis wrote:
Date: Sun, 20 Feb 2011 19:54:21 +1000
From: David Gwynne l...@animata.net
   
 how to manipulate write cache policy?

the lsi firmwares dont implement handling of the mod page changes
unfortunately. you could call the ioctl this implements yourself
though from userland.
   
   David, while I think that implementing the cache manipulation ioctls
   for mpii(4) is a good idea, there is a problem here.  We don't have a
   tool in base that actually issues those ioctls.  And unless I'm
   misreading the diff, this still leaves the cache disabled on the
   stupid Dell.
  
  DIOCSCACHE is called in sdattach() to enable write cache for all
  disks that DIOCGCACHE reports as having write cache disabled. Or are
  you concerned that we have no way to manipulate it from userland
  if/when the default needs to be modified?
 
 Ah, that's the bit I was missing.  A userland tool to display and
 manipulate the cache settings would still be good though.
 Functionality should probably be added to bioctl(8).  A bit
 unfortunate that both the -c and -C options are already taken.

Ah, I had a diff for bioctl (enable/disable WCE/RCD) based on dlg's
sample, but I think marco wanted more of a policy of when to do WCE/RCD
rather than a switch - I'll send it along when I get home later this
week.



Re: Dell R310 - H200 Raid performance problem

2011-02-20 Thread Marco Peereboom
bah!

On Sun, Feb 20, 2011 at 07:20:19PM +, Stuart Henderson wrote:
 On 2011/02/20 11:59, Ted Unangst wrote:
  On Sun, Feb 20, 2011 at 7:28 AM, Mark Kettenis mark.kette...@xs4all.nl
  wrote:
   Ah, that's the bit I was missing.  A userland tool to display and
   manipulate the cache settings would still be good though.
   Functionality should probably be added to bioctl(8).  A bit
   unfortunate that both the -c and -C options are already taken.
 
 -w or -W wouldn't be too bad an alternative (_w_rite cache).
 
  We also have a scsi(8) tool that seems more analogous to atactl (which
  can manipulate cache behavior).
 
 scsi(8) can manipulate write cache on some drives too. But in this
 case we're talking about a setting for the volume rather than for
 drives, so bioctl(8) wouldn't be a bad choice. (I don't know about
 mpii, but for mpi the vendor management tool in some OS allows
 you to set this, and bioctl is the closest analogue to this).



Re: Dell R310 - H200 Raid performance problem

2011-02-18 Thread Łukasz Czarniecki
With following Mike's suggestions it worked.

 could you please change this line
 if (mpii_req_cfg_page(sc, addr, 0, hdr, 1, vpg, pagelen) != 0) {

 to
 if (mpii_req_cfg_page(sc, addr, MPII_PG_POLL, hdr, 1, vpg,
pagelen) != 0) {

 and one more:

 this:
 if (mpii_req_cfg_header(sc, MPII_CONFIG_REQ_PAGE_TYPE_RAID_VOL, 0,
addr, 0, hdr) != 0)
 to:
 if (mpii_req_cfg_header(sc, MPII_CONFIG_REQ_PAGE_TYPE_RAID_VOL, 0,
addr, MPII_PG_POLL, hdr) != 0)


mpii0 at pci2 dev 0 function 0 Symbios Logic SAS2008 rev 0x02: apic 0
int 16 (irq 15)
scsibus0 at mpii0: 42 targets
sd0 at scsibus0 targ 1 lun 0: Dell, Virtual Disk, 1028 SCSI4 0/direct
fixed
sd0: 237824MB, 512 bytes/sec, 487063552 sec total
ses0 at scsibus0 targ 10 lun 0: DP, BACKPLANE, 1.07 SCSI3 13/enclosure
services fixed
ses0: unable to read enclosure configuration

# scsi -f /dev/rsd0c -m 8
IC:  0
ABPF:  0
CAP:  0
DISC:  0
SIZE:  0
WCE:  1
MF:  0
RCD:  0
Demand Retention Priority:  0
Write Retention Priority:  0
Disable Pre-fetch Transfer Length:  65535
Minimum Pre-fetch:  0
Maximum Pre-fetch:  65280
Maximum Pre-fetch Ceiling:  65535
FSW:  0
LBCSS:  0
DRA:  0
Vendor-specific:  0
NV_DIS:  0
Number of Cache Segments:  15
Cache Segment Size:  0

how to manipulate write cache policy?

Lukasz



Re: Dell R310 - H200 Raid performance problem

2011-02-17 Thread Łukasz Czarniecki
On 17.02.2011 16:22, Mike Belopuhov wrote:

 Lukasz has tested the patch below and it works fine for him.  I don't
 have the hardware myself, so I'm not going to push it for the release,
 but if someone thinks it's worth it, please speak up.

Here are some numbers:

4.8
# time tar xzf ./sys.tar.gz
0m11.06s real 0m0.80s user 0m0.86s system
w/softdeps
0m4.97s real 0m0.68s user 0m0.58s system
Current
0m7.13s real 0m0.75s user 0m0.83s system
w/softdeps
0m3.72s real 0m0.60s user 0m0.37s system

It seems that 4.9 has a lot of improvements.
Big thanks for Mike and all developers.

Lukasz



Re: Dell R310 - H200 Raid performance problem

2011-02-17 Thread Mike Belopuhov
On Thu, Feb 10, 2011 at 14:25 +0100, Lukasz Czarniecki wrote:
 Hi
 
 I've bought a Dell R310 with H200 raid controller reported in dmesg as:
 Symbios Logic SAS2008. It uses mpii driver and has two hard drives
 configured in RAID 1.
 
 Now it seems to work fine but i still have a problem with its
 performance. Raid is fully initialized.
 
 How can I help to resolve this problem?
 
 I'm doing simple benchmark:
 wget ftp.spline.de/pub/OpenBSD/4.8/sys.tar.gz
 time tar xzf ./sys.tar.gz
 
 On the same hardware Linux unpacks it in less then two seconds.
 
 Numbers for OpenBSD:
 4.8 amd64 sp: 3m40.95s real 0m0.65s user 0m0.71s system
 4.8 amd64 mp-stable: 3m43.36s real 0m0.48s user 0m0.98s system
 4.9 amd64 sp: 3m47.72s real 0m0.51s user 0m0.69s system
 4.9 i386 rd : 3m45.11s real 0m1.03s user 0m1.19s system
 

Lukasz and me have figured out that disk write cache gets turned
off by the Dell firmware when you create a volume (it doesn't get
disabled if you use single drives):

http://support.dell.com/support/edocs/storage/storlink/h200/en/ug/html/features.htm#wp1062398

H200 doesn't have and there's no possibility to install an onboard
memory and the battery, so the device becomes pretty much useless
unless the operating system takes care of it.  Apparently Linux
does.  Should OpenBSD do the same?  In my opinion yes.

Lukasz has tested the patch below and it works fine for him.  I don't
have the hardware myself, so I'm not going to push it for the release,
but if someone thinks it's worth it, please speak up.


Index: mpii.c
===
RCS file: /home/cvs/src/sys/dev/pci/mpii.c,v
retrieving revision 1.37
diff -u -p -r1.37 mpii.c
--- mpii.c  29 Dec 2010 03:55:09 -  1.37
+++ mpii.c  17 Feb 2011 15:15:25 -
@@ -981,6 +981,52 @@ struct mpii_msg_sas_oper_reply {
u_int32_t   ioc_loginfo;
 } __packed;
 
+struct mpii_msg_raid_action_request {
+   u_int8_taction;
+#define MPII_RAID_ACTION_CHANGE_VOL_WRITE_CACHE(0x17)
+   u_int8_treserved1;
+   u_int8_tchain_offset;
+   u_int8_tfunction;
+
+   u_int16_t   vol_dev_handle;
+   u_int8_tphys_disk_num;
+   u_int8_tmsg_flags;
+
+   u_int8_tvp_id;
+   u_int8_tvf_if;
+   u_int16_t   reserved2;
+
+   u_int32_t   reserved3;
+
+   u_int32_t   action_data;
+#define MPII_RAID_VOL_WRITE_CACHE_DISABLE  (0x01)
+#define MPII_RAID_VOL_WRITE_CACHE_ENABLE   (0x02)
+
+   struct mpii_sge action_sge;
+} __packed;
+
+struct mpii_msg_raid_action_reply {
+   u_int8_taction;
+   u_int8_treserved1;
+   u_int8_tchain_offset;
+   u_int8_tfunction;
+
+   u_int16_t   vol_dev_handle;
+   u_int8_tphys_disk_num;
+   u_int8_tmsg_flags;
+
+   u_int8_tvp_id;
+   u_int8_tvf_if;
+   u_int16_t   reserved2;
+
+   u_int16_t   reserved3;
+   u_int16_t   ioc_status;
+
+   u_int32_t   action_data[5];
+
+   struct mpii_sge action_sge;
+} __packed;
+
 struct mpii_cfg_hdr {
u_int8_tpage_version;
u_int8_tpage_length;
@@ -1972,6 +2018,8 @@ int   mpii_req_cfg_page(struct mpii_softc
 
 intmpii_get_ioc_pg8(struct mpii_softc *);
 
+void   mpii_cache_enable(struct mpii_softc *);
+
 #if NBIO  0
 intmpii_ioctl(struct device *, u_long, caddr_t);
 intmpii_ioctl_inq(struct mpii_softc *, struct bioc_inq *);
@@ -2175,6 +2223,9 @@ mpii_attach(struct device *parent, struc
goto free_dev;
}
 
+   /* enable write cache */
+   mpii_cache_enable(sc);
+
/* we should be good to go now, attach scsibus */
sc-sc_link.adapter = mpii_switch;
sc-sc_link.adapter_softc = sc;
@@ -3206,6 +3257,45 @@ mpii_cfg_coalescing(struct mpii_softc *s
}
 
return (0);
+}
+
+void
+mpii_cache_enable(struct mpii_softc *sc)
+{
+   struct mpii_msg_raid_action_request *req;
+   struct mpii_device  *dev;
+   struct mpii_ccb *ccb;
+   int i;
+
+   ccb = scsi_io_get(sc-sc_iopool, 0);
+   if (ccb == NULL)
+   return;
+
+   for (i = 0; i  sc-sc_max_devices; i++) {
+   if (sc-sc_devs[i] == NULL ||
+   !ISSET(sc-sc_devs[i]-flags, MPII_DF_VOLUME))
+   continue;
+
+   dev = sc-sc_devs[i];
+
+   ccb-ccb_state = MPII_CCB_READY;
+   ccb-ccb_rcb = NULL;
+   ccb-ccb_done = mpii_empty_done;
+
+   req = ccb-ccb_cmd;
+   bzero(req, sizeof(*req));
+   req-function = MPII_FUNCTION_RAID_ACTION;
+   req-action = MPII_RAID_ACTION_CHANGE_VOL_WRITE_CACHE;
+   

Re: Dell R310 - H200 Raid performance problem

2011-02-17 Thread Marco Peereboom
On Thu, Feb 17, 2011 at 04:22:54PM +0100, Mike Belopuhov wrote:
 On Thu, Feb 10, 2011 at 14:25 +0100, Lukasz Czarniecki wrote:
  Hi
  
  I've bought a Dell R310 with H200 raid controller reported in dmesg as:
  Symbios Logic SAS2008. It uses mpii driver and has two hard drives
  configured in RAID 1.
  
  Now it seems to work fine but i still have a problem with its
  performance. Raid is fully initialized.
  
  How can I help to resolve this problem?
  
  I'm doing simple benchmark:
  wget ftp.spline.de/pub/OpenBSD/4.8/sys.tar.gz
  time tar xzf ./sys.tar.gz
  
  On the same hardware Linux unpacks it in less then two seconds.
  
  Numbers for OpenBSD:
  4.8 amd64 sp: 3m40.95s real 0m0.65s user 0m0.71s system
  4.8 amd64 mp-stable: 3m43.36s real 0m0.48s user 0m0.98s system
  4.9 amd64 sp: 3m47.72s real 0m0.51s user 0m0.69s system
  4.9 i386 rd : 3m45.11s real 0m1.03s user 0m1.19s system
  
 
 Lukasz and me have figured out that disk write cache gets turned
 off by the Dell firmware when you create a volume (it doesn't get
 disabled if you use single drives):
 
 http://support.dell.com/support/edocs/storage/storlink/h200/en/ug/html/features.htm#wp1062398
 
 H200 doesn't have and there's no possibility to install an onboard
 memory and the battery, so the device becomes pretty much useless
 unless the operating system takes care of it.  Apparently Linux
 does.  Should OpenBSD do the same?  In my opinion yes.

Linux does this and we should too.  All SATA manufacturers recommend
(read recommend very very strongly and call you names when you don't
listen) enabling write cache.

 
 Lukasz has tested the patch below and it works fine for him.  I don't
 have the hardware myself, so I'm not going to push it for the release,
 but if someone thinks it's worth it, please speak up.

I am ok with this making release and think it should.  I did not realize
WB was being disabled.

 
 
 Index: mpii.c
 ===
 RCS file: /home/cvs/src/sys/dev/pci/mpii.c,v
 retrieving revision 1.37
 diff -u -p -r1.37 mpii.c
 --- mpii.c29 Dec 2010 03:55:09 -  1.37
 +++ mpii.c17 Feb 2011 15:15:25 -
 @@ -981,6 +981,52 @@ struct mpii_msg_sas_oper_reply {
   u_int32_t   ioc_loginfo;
  } __packed;
  
 +struct mpii_msg_raid_action_request {
 + u_int8_taction;
 +#define MPII_RAID_ACTION_CHANGE_VOL_WRITE_CACHE  (0x17)
 + u_int8_treserved1;
 + u_int8_tchain_offset;
 + u_int8_tfunction;
 +
 + u_int16_t   vol_dev_handle;
 + u_int8_tphys_disk_num;
 + u_int8_tmsg_flags;
 +
 + u_int8_tvp_id;
 + u_int8_tvf_if;
 + u_int16_t   reserved2;
 +
 + u_int32_t   reserved3;
 +
 + u_int32_t   action_data;
 +#define MPII_RAID_VOL_WRITE_CACHE_DISABLE(0x01)
 +#define MPII_RAID_VOL_WRITE_CACHE_ENABLE (0x02)
 +
 + struct mpii_sge action_sge;
 +} __packed;
 +
 +struct mpii_msg_raid_action_reply {
 + u_int8_taction;
 + u_int8_treserved1;
 + u_int8_tchain_offset;
 + u_int8_tfunction;
 +
 + u_int16_t   vol_dev_handle;
 + u_int8_tphys_disk_num;
 + u_int8_tmsg_flags;
 +
 + u_int8_tvp_id;
 + u_int8_tvf_if;
 + u_int16_t   reserved2;
 +
 + u_int16_t   reserved3;
 + u_int16_t   ioc_status;
 +
 + u_int32_t   action_data[5];
 +
 + struct mpii_sge action_sge;
 +} __packed;
 +
  struct mpii_cfg_hdr {
   u_int8_tpage_version;
   u_int8_tpage_length;
 @@ -1972,6 +2018,8 @@ int mpii_req_cfg_page(struct mpii_softc
  
  int  mpii_get_ioc_pg8(struct mpii_softc *);
  
 +void mpii_cache_enable(struct mpii_softc *);
 +
  #if NBIO  0
  int  mpii_ioctl(struct device *, u_long, caddr_t);
  int  mpii_ioctl_inq(struct mpii_softc *, struct bioc_inq *);
 @@ -2175,6 +2223,9 @@ mpii_attach(struct device *parent, struc
   goto free_dev;
   }
  
 + /* enable write cache */
 + mpii_cache_enable(sc);
 +
   /* we should be good to go now, attach scsibus */
   sc-sc_link.adapter = mpii_switch;
   sc-sc_link.adapter_softc = sc;
 @@ -3206,6 +3257,45 @@ mpii_cfg_coalescing(struct mpii_softc *s
   }
  
   return (0);
 +}
 +
 +void
 +mpii_cache_enable(struct mpii_softc *sc)
 +{
 + struct mpii_msg_raid_action_request *req;
 + struct mpii_device  *dev;
 + struct mpii_ccb *ccb;
 + int i;
 +
 + ccb = scsi_io_get(sc-sc_iopool, 0);
 + if (ccb == NULL)
 + return;
 +
 + for (i = 0; i  sc-sc_max_devices; i++) {
 + if (sc-sc_devs[i] == NULL ||
 + !ISSET(sc-sc_devs[i]-flags, MPII_DF_VOLUME))
 + continue;
 +
 + dev = sc-sc_devs[i];
 +
 +

Re: Dell R310 - H200 Raid performance problem

2011-02-17 Thread David Gwynne
this diff implements the disk cache ioctl handling in mpii so sd(4)
can drive the change rather than have mpii(4) whack everything.
modelled on the same functionality in mpi(4) and mikeb's code...

could someone test this please?

Index: mpii.c
===
RCS file: /cvs/src/sys/dev/pci/mpii.c,v
retrieving revision 1.37
diff -u -p -r1.37 mpii.c
--- mpii.c  29 Dec 2010 03:55:09 -  1.37
+++ mpii.c  18 Feb 2011 06:54:58 -
@@ -29,6 +29,7 @@
 #include sys/kernel.h
 #include sys/rwlock.h
 #include sys/sensors.h
+#include sys/dkio.h
 #include sys/tree.h
 
 #include machine/bus.h
@@ -981,6 +982,52 @@ struct mpii_msg_sas_oper_reply {
u_int32_t   ioc_loginfo;
 } __packed;
 
+struct mpii_msg_raid_action_request {
+   u_int8_taction;
+#define MPII_RAID_ACTION_CHANGE_VOL_WRITE_CACHE(0x17)
+   u_int8_treserved1;
+   u_int8_tchain_offset;
+   u_int8_tfunction;
+
+   u_int16_t   vol_dev_handle;
+   u_int8_tphys_disk_num;
+   u_int8_tmsg_flags;
+
+   u_int8_tvp_id;
+   u_int8_tvf_if;
+   u_int16_t   reserved2;
+
+   u_int32_t   reserved3;
+
+   u_int32_t   action_data;
+#define MPII_RAID_VOL_WRITE_CACHE_DISABLE  (0x01)
+#define MPII_RAID_VOL_WRITE_CACHE_ENABLE   (0x02)
+
+   struct mpii_sge action_sge;
+} __packed;
+
+struct mpii_msg_raid_action_reply {
+   u_int8_taction;
+   u_int8_treserved1;
+   u_int8_tchain_offset;
+   u_int8_tfunction;
+
+   u_int16_t   vol_dev_handle;
+   u_int8_tphys_disk_num;
+   u_int8_tmsg_flags;
+
+   u_int8_tvp_id;
+   u_int8_tvf_if;
+   u_int16_t   reserved2;
+
+   u_int16_t   reserved3;
+   u_int16_t   ioc_status;
+
+   u_int32_t   action_data[5];
+
+   struct mpii_sge action_sge;
+} __packed;
+
 struct mpii_cfg_hdr {
u_int8_tpage_version;
u_int8_tpage_length;
@@ -1256,6 +1303,11 @@ struct mpii_cfg_raid_vol_pg0 {
 #define MPII_CFG_RAID_VOL_0_STATUS_RESYNC  (116)
 
u_int16_t   volume_settings;
+#define MPII_CFG_RAID_VOL_0_SETTINGS_CACHE_MASK(0x30)
+#define MPII_CFG_RAID_VOL_0_SETTINGS_CACHE_UNCHANGED   (0x00)
+#define MPII_CFG_RAID_VOL_0_SETTINGS_CACHE_DISABLED(0x10)
+#define MPII_CFG_RAID_VOL_0_SETTINGS_CACHE_ENABLED (0x20)
+
u_int8_thot_spare_pool;
u_int8_treserved1;
 
@@ -1972,6 +2024,8 @@ int   mpii_req_cfg_page(struct mpii_softc
 
 intmpii_get_ioc_pg8(struct mpii_softc *);
 
+intmpii_ioctl_cache(struct scsi_link *, u_long, struct dk_cache *);
+
 #if NBIO  0
 intmpii_ioctl(struct device *, u_long, caddr_t);
 intmpii_ioctl_inq(struct mpii_softc *, struct bioc_inq *);
@@ -4650,19 +4704,113 @@ mpii_scsi_cmd_done(struct mpii_ccb *ccb)
 
mpii_push_reply(sc, ccb-ccb_rcb);
scsi_done(xs);
-}
+}
 
 int
 mpii_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag)
 {
struct mpii_softc   *sc = (struct mpii_softc *)link-adapter_softc;
+   struct mpii_device  *dev = sc-sc_devs[link-target];
 
DNPRINTF(MPII_D_IOCTL, %s: mpii_scsi_ioctl\n, DEVNAME(sc));
 
-   if (sc-sc_ioctl)
-   return (sc-sc_ioctl(link-adapter_softc, cmd, addr));
-   else
-   return (ENOTTY);
+   switch (cmd) {
+   case DIOCGCACHE:
+   case DIOCSCACHE:
+   if (dev != NULL  ISSET(dev-flags, MPII_DF_VOLUME)) {
+   return (mpii_ioctl_cache(link, cmd,
+   (struct dk_cache *)addr));
+   }
+   break;
+
+   default:
+   if (sc-sc_ioctl)
+   return (sc-sc_ioctl(link-adapter_softc, cmd, addr));
+
+   break;
+   }
+
+   return (ENOTTY);
+}
+
+int
+mpii_ioctl_cache(struct scsi_link *link, u_long cmd, struct dk_cache *dc)
+{
+   struct mpii_softc *sc = (struct mpii_softc *)link-adapter_softc;
+   struct mpii_device *dev = sc-sc_devs[link-target];
+   struct mpii_cfg_raid_vol_pg0 *vpg;
+   struct mpii_msg_raid_action_request *req;
+   struct mpii_cfg_hdr hdr;
+   struct mpii_ccb *ccb;
+   u_int32_t addr = MPII_CFG_RAID_VOL_ADDR_HANDLE | dev-dev_handle;
+   size_t pagelen;
+   int rv = 0;
+   int enabled;
+
+   if (mpii_req_cfg_header(sc, MPII_CONFIG_REQ_PAGE_TYPE_RAID_VOL, 0,
+   addr, 0, hdr) != 0)
+   return (EINVAL);
+
+   pagelen = hdr.page_length * 4;
+   vpg = malloc(pagelen, M_TEMP, M_WAITOK | M_CANFAIL | M_ZERO);
+   if (vpg == NULL)
+   return (ENOMEM);
+
+   if (mpii_req_cfg_page(sc, addr, 0, hdr, 1,