Author: mav
Date: Thu May 24 02:34:03 2012
New Revision: 235874
URL: http://svn.freebsd.org/changeset/base/235874

Log:
  MFC r234458, r234603, r234610, r234727, r234816, r234848, r234868,
  r234869, r234899, r234940, r234993, r234994, r235071 -c r235076, r235080,
  r235096:
   - Add support for the DDF metadata format, as defined by the SNIA Common
  RAID Disk Data Format Specification v2.0;
   - Add support for reading non-degraded RAID4/5/5E/5EE/5R/6/MDF volumes.
  
  Sponsored by: iXsystems, Inc.

Added:
  stable/9/sys/geom/raid/md_ddf.c
     - copied, changed from r234848, head/sys/geom/raid/md_ddf.c
  stable/9/sys/geom/raid/md_ddf.h
     - copied unchanged from r234848, head/sys/geom/raid/md_ddf.h
  stable/9/sys/geom/raid/tr_raid5.c
     - copied, changed from r234458, head/sys/geom/raid/tr_raid5.c
Modified:
  stable/9/sbin/geom/class/raid/geom_raid.c
  stable/9/sbin/geom/class/raid/graid.8
  stable/9/sys/conf/files
  stable/9/sys/geom/raid/g_raid.c
  stable/9/sys/geom/raid/g_raid.h
  stable/9/sys/geom/raid/g_raid_ctl.c
  stable/9/sys/geom/raid/g_raid_md_if.m
  stable/9/sys/geom/raid/md_intel.c
  stable/9/sys/geom/raid/md_jmicron.c
  stable/9/sys/geom/raid/md_nvidia.c
  stable/9/sys/geom/raid/md_promise.c
  stable/9/sys/geom/raid/md_sii.c
  stable/9/sys/geom/raid/tr_raid1.c
  stable/9/sys/geom/raid/tr_raid1e.c
  stable/9/sys/modules/geom/geom_raid/Makefile
Directory Properties:
  stable/9/sbin/geom/   (props changed)
  stable/9/sys/   (props changed)
  stable/9/sys/conf/   (props changed)
  stable/9/sys/modules/   (props changed)

Modified: stable/9/sbin/geom/class/raid/geom_raid.c
==============================================================================
--- stable/9/sbin/geom/class/raid/geom_raid.c   Thu May 24 02:24:03 2012        
(r235873)
+++ stable/9/sbin/geom/class/raid/geom_raid.c   Thu May 24 02:34:03 2012        
(r235874)
@@ -48,11 +48,12 @@ struct g_command class_commands[] = {
        { "label", G_FLAG_VERBOSE, NULL,
            {
                { 'f', "force", NULL, G_TYPE_BOOL },
+               { 'o', "fmtopt", G_VAL_OPTIONAL, G_TYPE_STRING },
                { 'S', "size", G_VAL_OPTIONAL, G_TYPE_NUMBER },
                { 's', "strip", G_VAL_OPTIONAL, G_TYPE_NUMBER },
                G_OPT_SENTINEL
            },
-           "[-fv] [-S size] [-s stripsize] format label level prov ..."
+           "[-fv] [-o fmtopt] [-S size] [-s stripsize] format label level prov 
..."
        },
        { "add", G_FLAG_VERBOSE, NULL,
            {

Modified: stable/9/sbin/geom/class/raid/graid.8
==============================================================================
--- stable/9/sbin/geom/class/raid/graid.8       Thu May 24 02:24:03 2012        
(r235873)
+++ stable/9/sbin/geom/class/raid/graid.8       Thu May 24 02:34:03 2012        
(r235874)
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 26, 2011
+.Dd May 6, 2012
 .Dt GRAID 8
 .Os
 .Sh NAME
@@ -34,6 +34,7 @@
 .Nm
 .Cm label
 .Op Fl f
+.Op Fl o Ar fmtopt
 .Op Fl S Ar size
 .Op Fl s Ar strip
 .Ar format
@@ -119,6 +120,8 @@ Additional options include:
 .It Fl f
 Enforce specified configuration creation if it is officially unsupported,
 but technically can be created.
+.It Fl o Ar fmtopt
+Specifies metadata format options.
 .It Fl S Ar size
 Use
 .Ar size
@@ -200,6 +203,23 @@ The GEOM RAID class follows a modular de
 formats to be used.
 Support is currently implemented for the following formats:
 .Bl -tag -width "Intel"
+.It DDF
+The format defined by the SNIA Common RAID Disk Data Format v2.0 specification.
+Used by some Adaptec RAID BIOSes and some hardware RAID controllers.
+Because of high format flexibility different implementations support
+different set of features and have different on-disk metadata layouts.
+To provide compatibility, the GEOM RAID class mimics capabilities
+of the first detected DDF array.
+Respecting that, it may support different number of disks per volume,
+volumes per array, partitions per disk, etc.
+The following configurations are supported: RAID0 (2+ disks), RAID1 (2+ disks),
+RAID1E (3+ disks), RAID3 (3+ disks), RAID4 (3+ disks), RAID5 (3+ disks),
+RAID5E (4+ disks), RAID5EE (4+ disks), RAID5R (3+ disks), RAID6 (4+ disks),
+RAIDMDF (4+ disks), RAID10 (4+ disks), SINGLE (1 disk), CONCAT (2+ disks).
+.Pp
+Format supports two options "BE" and "LE", that mean big-endian byte order
+defined by specification (default) and little-endian used by some Adaptec
+controllers.
 .It Intel
 The format used by Intel RAID BIOS.
 Supports up to two volumes per array.
@@ -241,8 +261,11 @@ own risk: RAID1 (3+ disks), RAID10 (6+ d
 .Sh SUPPORTED RAID LEVELS
 The GEOM RAID class follows a modular design, allowing different RAID levels
 to be used.
-Support for the following RAID levels is currently implemented: RAID0, RAID1,
-RAID1E, RAID10, SINGLE, CONCAT.
+Full support for the following RAID levels is currently implemented:
+RAID0, RAID1, RAID1E, RAID10, SINGLE, CONCAT.
+The following RAID levels supported as read-only for volumes in optimal
+state (without using redundancy): RAID4, RAID5, RAID5E, RAID5EE, RAID5R,
+RAID6, RAIDMDF.
 .Sh RAID LEVEL MIGRATION
 The GEOM RAID class has no support for RAID level migration, allowed by some
 metadata formats.
@@ -253,6 +276,33 @@ corruption!
 .Sh 2TiB BARRIERS
 Promise metadata format does not support disks above 2TiB.
 NVIDIA metadata format does not support volumes above 2TiB.
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variable can be used to control the behavior of the
+.Nm RAID
+GEOM class.
+.Bl -tag -width indent
+.It Va kern.geom.raid.aggressive_spare : No 0
+Use any disks without metadata connected to controllers of the vendor
+matching to volume metadata format as spare.
+Use it with much care to not lose data if connecting unrelated disk!
+.It Va kern.geom.raid.clean_time : No 5
+Mark volume as clean when idle for the specified number of seconds.
+.It Va kern.geom.raid.debug : No 0
+Debug level of the
+.Nm RAID
+GEOM class.
+.It Va kern.geom.raid.idle_threshold : No 1000000
+Time in microseconds to consider a volume idle for rebuild puroses.
+.It Va kern.geom.raid.name_format : No 0
+Providers name format: 0 -- raid/r{num}, 1 -- raid/{label}.
+.It Va kern.geom.raid.read_err_thresh : No 10
+Number of read errors equated to disk failure.
+Write errors are always considered as disk failures.
+.It Va kern.geom.raid.start_timeout : No 30
+Time to wait for missing array components on startup.
+.El
 .Sh EXIT STATUS
 Exit status is 0 on success, and non-zero if the command fails.
 .Sh SEE ALSO

Modified: stable/9/sys/conf/files
==============================================================================
--- stable/9/sys/conf/files     Thu May 24 02:24:03 2012        (r235873)
+++ stable/9/sys/conf/files     Thu May 24 02:34:03 2012        (r235874)
@@ -2253,6 +2253,7 @@ geom/raid/g_raid.c                optional geom_raid
 geom/raid/g_raid_ctl.c         optional geom_raid
 geom/raid/g_raid_md_if.m       optional geom_raid
 geom/raid/g_raid_tr_if.m       optional geom_raid
+geom/raid/md_ddf.c             optional geom_raid
 geom/raid/md_intel.c           optional geom_raid
 geom/raid/md_jmicron.c         optional geom_raid
 geom/raid/md_nvidia.c          optional geom_raid
@@ -2262,6 +2263,7 @@ geom/raid/tr_concat.c             optional geom_rai
 geom/raid/tr_raid0.c           optional geom_raid
 geom/raid/tr_raid1.c           optional geom_raid
 geom/raid/tr_raid1e.c          optional geom_raid
+geom/raid/tr_raid5.c           optional geom_raid
 geom/raid3/g_raid3.c           optional geom_raid3
 geom/raid3/g_raid3_ctl.c       optional geom_raid3
 geom/shsec/g_shsec.c           optional geom_shsec

Modified: stable/9/sys/geom/raid/g_raid.c
==============================================================================
--- stable/9/sys/geom/raid/g_raid.c     Thu May 24 02:24:03 2012        
(r235873)
+++ stable/9/sys/geom/raid/g_raid.c     Thu May 24 02:34:03 2012        
(r235874)
@@ -277,23 +277,87 @@ g_raid_volume_level2str(int level, int q
        case G_RAID_VOLUME_RL_RAID1:
                return ("RAID1");
        case G_RAID_VOLUME_RL_RAID3:
+               if (qual == G_RAID_VOLUME_RLQ_R3P0)
+                       return ("RAID3-P0");
+               if (qual == G_RAID_VOLUME_RLQ_R3PN)
+                       return ("RAID3-PN");
                return ("RAID3");
        case G_RAID_VOLUME_RL_RAID4:
+               if (qual == G_RAID_VOLUME_RLQ_R4P0)
+                       return ("RAID4-P0");
+               if (qual == G_RAID_VOLUME_RLQ_R4PN)
+                       return ("RAID4-PN");
                return ("RAID4");
        case G_RAID_VOLUME_RL_RAID5:
+               if (qual == G_RAID_VOLUME_RLQ_R5RA)
+                       return ("RAID5-RA");
+               if (qual == G_RAID_VOLUME_RLQ_R5RS)
+                       return ("RAID5-RS");
+               if (qual == G_RAID_VOLUME_RLQ_R5LA)
+                       return ("RAID5-LA");
+               if (qual == G_RAID_VOLUME_RLQ_R5LS)
+                       return ("RAID5-LS");
                return ("RAID5");
        case G_RAID_VOLUME_RL_RAID6:
+               if (qual == G_RAID_VOLUME_RLQ_R6RA)
+                       return ("RAID6-RA");
+               if (qual == G_RAID_VOLUME_RLQ_R6RS)
+                       return ("RAID6-RS");
+               if (qual == G_RAID_VOLUME_RLQ_R6LA)
+                       return ("RAID6-LA");
+               if (qual == G_RAID_VOLUME_RLQ_R6LS)
+                       return ("RAID6-LS");
                return ("RAID6");
+       case G_RAID_VOLUME_RL_RAIDMDF:
+               if (qual == G_RAID_VOLUME_RLQ_RMDFRA)
+                       return ("RAIDMDF-RA");
+               if (qual == G_RAID_VOLUME_RLQ_RMDFRS)
+                       return ("RAIDMDF-RS");
+               if (qual == G_RAID_VOLUME_RLQ_RMDFLA)
+                       return ("RAIDMDF-LA");
+               if (qual == G_RAID_VOLUME_RLQ_RMDFLS)
+                       return ("RAIDMDF-LS");
+               return ("RAIDMDF");
        case G_RAID_VOLUME_RL_RAID1E:
+               if (qual == G_RAID_VOLUME_RLQ_R1EA)
+                       return ("RAID1E-A");
+               if (qual == G_RAID_VOLUME_RLQ_R1EO)
+                       return ("RAID1E-O");
                return ("RAID1E");
        case G_RAID_VOLUME_RL_SINGLE:
                return ("SINGLE");
        case G_RAID_VOLUME_RL_CONCAT:
                return ("CONCAT");
        case G_RAID_VOLUME_RL_RAID5E:
+               if (qual == G_RAID_VOLUME_RLQ_R5ERA)
+                       return ("RAID5E-RA");
+               if (qual == G_RAID_VOLUME_RLQ_R5ERS)
+                       return ("RAID5E-RS");
+               if (qual == G_RAID_VOLUME_RLQ_R5ELA)
+                       return ("RAID5E-LA");
+               if (qual == G_RAID_VOLUME_RLQ_R5ELS)
+                       return ("RAID5E-LS");
                return ("RAID5E");
        case G_RAID_VOLUME_RL_RAID5EE:
+               if (qual == G_RAID_VOLUME_RLQ_R5EERA)
+                       return ("RAID5EE-RA");
+               if (qual == G_RAID_VOLUME_RLQ_R5EERS)
+                       return ("RAID5EE-RS");
+               if (qual == G_RAID_VOLUME_RLQ_R5EELA)
+                       return ("RAID5EE-LA");
+               if (qual == G_RAID_VOLUME_RLQ_R5EELS)
+                       return ("RAID5EE-LS");
                return ("RAID5EE");
+       case G_RAID_VOLUME_RL_RAID5R:
+               if (qual == G_RAID_VOLUME_RLQ_R5RRA)
+                       return ("RAID5R-RA");
+               if (qual == G_RAID_VOLUME_RLQ_R5RRS)
+                       return ("RAID5R-RS");
+               if (qual == G_RAID_VOLUME_RLQ_R5RLA)
+                       return ("RAID5R-LA");
+               if (qual == G_RAID_VOLUME_RLQ_R5RLS)
+                       return ("RAID5R-LS");
+               return ("RAID5E");
        default:
                return ("UNKNOWN");
        }
@@ -309,26 +373,111 @@ g_raid_volume_str2level(const char *str,
                *level = G_RAID_VOLUME_RL_RAID0;
        else if (strcasecmp(str, "RAID1") == 0)
                *level = G_RAID_VOLUME_RL_RAID1;
-       else if (strcasecmp(str, "RAID3") == 0)
+       else if (strcasecmp(str, "RAID3-P0") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID3;
+               *qual = G_RAID_VOLUME_RLQ_R3P0;
+       } else if (strcasecmp(str, "RAID3-PN") == 0 ||
+                  strcasecmp(str, "RAID3") == 0) {
                *level = G_RAID_VOLUME_RL_RAID3;
-       else if (strcasecmp(str, "RAID4") == 0)
+               *qual = G_RAID_VOLUME_RLQ_R3PN;
+       } else if (strcasecmp(str, "RAID4-P0") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID4;
+               *qual = G_RAID_VOLUME_RLQ_R4P0;
+       } else if (strcasecmp(str, "RAID4-PN") == 0 ||
+                  strcasecmp(str, "RAID4") == 0) {
                *level = G_RAID_VOLUME_RL_RAID4;
-       else if (strcasecmp(str, "RAID5") == 0)
+               *qual = G_RAID_VOLUME_RLQ_R4PN;
+       } else if (strcasecmp(str, "RAID5-RA") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5;
+               *qual = G_RAID_VOLUME_RLQ_R5RA;
+       } else if (strcasecmp(str, "RAID5-RS") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5;
+               *qual = G_RAID_VOLUME_RLQ_R5RS;
+       } else if (strcasecmp(str, "RAID5") == 0 ||
+                  strcasecmp(str, "RAID5-LA") == 0) {
                *level = G_RAID_VOLUME_RL_RAID5;
-       else if (strcasecmp(str, "RAID6") == 0)
+               *qual = G_RAID_VOLUME_RLQ_R5LA;
+       } else if (strcasecmp(str, "RAID5-LS") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5;
+               *qual = G_RAID_VOLUME_RLQ_R5LS;
+       } else if (strcasecmp(str, "RAID6-RA") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID6;
+               *qual = G_RAID_VOLUME_RLQ_R6RA;
+       } else if (strcasecmp(str, "RAID6-RS") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID6;
+               *qual = G_RAID_VOLUME_RLQ_R6RS;
+       } else if (strcasecmp(str, "RAID6") == 0 ||
+                  strcasecmp(str, "RAID6-LA") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID6;
+               *qual = G_RAID_VOLUME_RLQ_R6LA;
+       } else if (strcasecmp(str, "RAID6-LS") == 0) {
                *level = G_RAID_VOLUME_RL_RAID6;
-       else if (strcasecmp(str, "RAID10") == 0 ||
-                strcasecmp(str, "RAID1E") == 0)
+               *qual = G_RAID_VOLUME_RLQ_R6LS;
+       } else if (strcasecmp(str, "RAIDMDF-RA") == 0) {
+               *level = G_RAID_VOLUME_RL_RAIDMDF;
+               *qual = G_RAID_VOLUME_RLQ_RMDFRA;
+       } else if (strcasecmp(str, "RAIDMDF-RS") == 0) {
+               *level = G_RAID_VOLUME_RL_RAIDMDF;
+               *qual = G_RAID_VOLUME_RLQ_RMDFRS;
+       } else if (strcasecmp(str, "RAIDMDF") == 0 ||
+                  strcasecmp(str, "RAIDMDF-LA") == 0) {
+               *level = G_RAID_VOLUME_RL_RAIDMDF;
+               *qual = G_RAID_VOLUME_RLQ_RMDFLA;
+       } else if (strcasecmp(str, "RAIDMDF-LS") == 0) {
+               *level = G_RAID_VOLUME_RL_RAIDMDF;
+               *qual = G_RAID_VOLUME_RLQ_RMDFLS;
+       } else if (strcasecmp(str, "RAID10") == 0 ||
+                  strcasecmp(str, "RAID1E") == 0 ||
+                  strcasecmp(str, "RAID1E-A") == 0) {
                *level = G_RAID_VOLUME_RL_RAID1E;
-       else if (strcasecmp(str, "SINGLE") == 0)
+               *qual = G_RAID_VOLUME_RLQ_R1EA;
+       } else if (strcasecmp(str, "RAID1E-O") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID1E;
+               *qual = G_RAID_VOLUME_RLQ_R1EO;
+       } else if (strcasecmp(str, "SINGLE") == 0)
                *level = G_RAID_VOLUME_RL_SINGLE;
        else if (strcasecmp(str, "CONCAT") == 0)
                *level = G_RAID_VOLUME_RL_CONCAT;
-       else if (strcasecmp(str, "RAID5E") == 0)
+       else if (strcasecmp(str, "RAID5E-RA") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5E;
+               *qual = G_RAID_VOLUME_RLQ_R5ERA;
+       } else if (strcasecmp(str, "RAID5E-RS") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5E;
+               *qual = G_RAID_VOLUME_RLQ_R5ERS;
+       } else if (strcasecmp(str, "RAID5E") == 0 ||
+                  strcasecmp(str, "RAID5E-LA") == 0) {
                *level = G_RAID_VOLUME_RL_RAID5E;
-       else if (strcasecmp(str, "RAID5EE") == 0)
+               *qual = G_RAID_VOLUME_RLQ_R5ELA;
+       } else if (strcasecmp(str, "RAID5E-LS") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5E;
+               *qual = G_RAID_VOLUME_RLQ_R5ELS;
+       } else if (strcasecmp(str, "RAID5EE-RA") == 0) {
                *level = G_RAID_VOLUME_RL_RAID5EE;
-       else
+               *qual = G_RAID_VOLUME_RLQ_R5EERA;
+       } else if (strcasecmp(str, "RAID5EE-RS") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5EE;
+               *qual = G_RAID_VOLUME_RLQ_R5EERS;
+       } else if (strcasecmp(str, "RAID5EE") == 0 ||
+                  strcasecmp(str, "RAID5EE-LA") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5EE;
+               *qual = G_RAID_VOLUME_RLQ_R5EELA;
+       } else if (strcasecmp(str, "RAID5EE-LS") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5EE;
+               *qual = G_RAID_VOLUME_RLQ_R5EELS;
+       } else if (strcasecmp(str, "RAID5R-RA") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5R;
+               *qual = G_RAID_VOLUME_RLQ_R5RRA;
+       } else if (strcasecmp(str, "RAID5R-RS") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5R;
+               *qual = G_RAID_VOLUME_RLQ_R5RRS;
+       } else if (strcasecmp(str, "RAID5R") == 0 ||
+                  strcasecmp(str, "RAID5R-LA") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5R;
+               *qual = G_RAID_VOLUME_RLQ_R5RLA;
+       } else if (strcasecmp(str, "RAID5R-LS") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5R;
+               *qual = G_RAID_VOLUME_RLQ_R5RLS;
+       } else
                return (-1);
        return (0);
 }
@@ -1674,8 +1823,8 @@ g_raid_create_node(struct g_class *mp,
        sc->sc_flags = 0;
        TAILQ_INIT(&sc->sc_volumes);
        TAILQ_INIT(&sc->sc_disks);
-       sx_init(&sc->sc_lock, "gmirror:lock");
-       mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
+       sx_init(&sc->sc_lock, "graid:lock");
+       mtx_init(&sc->sc_queue_mtx, "graid:queue", NULL, MTX_DEF);
        TAILQ_INIT(&sc->sc_events);
        bioq_init(&sc->sc_queue);
        gp->softc = sc;
@@ -1707,6 +1856,7 @@ g_raid_create_volume(struct g_raid_softc
        vol->v_state = G_RAID_VOLUME_S_STARTING;
        vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
        vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_UNKNOWN;
+       vol->v_rotate_parity = 1;
        bioq_init(&vol->v_inflight);
        bioq_init(&vol->v_locked);
        LIST_INIT(&vol->v_locks);
@@ -1994,7 +2144,7 @@ g_raid_taste(struct g_class *mp, struct 
        g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
        G_RAID_DEBUG(2, "Tasting provider %s.", pp->name);
 
-       gp = g_new_geomf(mp, "mirror:taste");
+       gp = g_new_geomf(mp, "raid:taste");
        /*
         * This orphan function should be never called.
         */
@@ -2024,7 +2174,8 @@ g_raid_taste(struct g_class *mp, struct 
 }
 
 int
-g_raid_create_node_format(const char *format, struct g_geom **gp)
+g_raid_create_node_format(const char *format, struct gctl_req *req,
+    struct g_geom **gp)
 {
        struct g_raid_md_class *class;
        struct g_raid_md_object *obj;
@@ -2042,7 +2193,7 @@ g_raid_create_node_format(const char *fo
        obj = (void *)kobj_create((kobj_class_t)class, M_RAID,
            M_WAITOK);
        obj->mdo_class = class;
-       status = G_RAID_MD_CREATE(obj, &g_raid_class, gp);
+       status = G_RAID_MD_CREATE_REQ(obj, &g_raid_class, req, gp);
        if (status != G_RAID_MD_TASTE_NEW)
                kobj_delete((kobj_t)obj, M_RAID);
        return (status);

Modified: stable/9/sys/geom/raid/g_raid.h
==============================================================================
--- stable/9/sys/geom/raid/g_raid.h     Thu May 24 02:24:03 2012        
(r235873)
+++ stable/9/sys/geom/raid/g_raid.h     Thu May 24 02:34:03 2012        
(r235874)
@@ -219,14 +219,48 @@ struct g_raid_subdisk {
 #define G_RAID_VOLUME_RL_RAID4         0x04
 #define G_RAID_VOLUME_RL_RAID5         0x05
 #define G_RAID_VOLUME_RL_RAID6         0x06
+#define G_RAID_VOLUME_RL_RAIDMDF       0x07
 #define G_RAID_VOLUME_RL_RAID1E                0x11
 #define G_RAID_VOLUME_RL_SINGLE                0x0f
 #define G_RAID_VOLUME_RL_CONCAT                0x1f
 #define G_RAID_VOLUME_RL_RAID5E                0x15
 #define G_RAID_VOLUME_RL_RAID5EE       0x25
+#define G_RAID_VOLUME_RL_RAID5R                0x35
 #define G_RAID_VOLUME_RL_UNKNOWN       0xff
 
 #define G_RAID_VOLUME_RLQ_NONE         0x00
+#define G_RAID_VOLUME_RLQ_R1SM         0x00
+#define G_RAID_VOLUME_RLQ_R1MM         0x01
+#define G_RAID_VOLUME_RLQ_R3P0         0x00
+#define G_RAID_VOLUME_RLQ_R3PN         0x01
+#define G_RAID_VOLUME_RLQ_R4P0         0x00
+#define G_RAID_VOLUME_RLQ_R4PN         0x01
+#define G_RAID_VOLUME_RLQ_R5RA         0x00
+#define G_RAID_VOLUME_RLQ_R5RS         0x01
+#define G_RAID_VOLUME_RLQ_R5LA         0x02
+#define G_RAID_VOLUME_RLQ_R5LS         0x03
+#define G_RAID_VOLUME_RLQ_R6RA         0x00
+#define G_RAID_VOLUME_RLQ_R6RS         0x01
+#define G_RAID_VOLUME_RLQ_R6LA         0x02
+#define G_RAID_VOLUME_RLQ_R6LS         0x03
+#define G_RAID_VOLUME_RLQ_RMDFRA       0x00
+#define G_RAID_VOLUME_RLQ_RMDFRS       0x01
+#define G_RAID_VOLUME_RLQ_RMDFLA       0x02
+#define G_RAID_VOLUME_RLQ_RMDFLS       0x03
+#define G_RAID_VOLUME_RLQ_R1EA         0x00
+#define G_RAID_VOLUME_RLQ_R1EO         0x01
+#define G_RAID_VOLUME_RLQ_R5ERA                0x00
+#define G_RAID_VOLUME_RLQ_R5ERS                0x01
+#define G_RAID_VOLUME_RLQ_R5ELA                0x02
+#define G_RAID_VOLUME_RLQ_R5ELS                0x03
+#define G_RAID_VOLUME_RLQ_R5EERA       0x00
+#define G_RAID_VOLUME_RLQ_R5EERS       0x01
+#define G_RAID_VOLUME_RLQ_R5EELA       0x02
+#define G_RAID_VOLUME_RLQ_R5EELS       0x03
+#define G_RAID_VOLUME_RLQ_R5RRA                0x00
+#define G_RAID_VOLUME_RLQ_R5RRS                0x01
+#define G_RAID_VOLUME_RLQ_R5RLA                0x02
+#define G_RAID_VOLUME_RLQ_R5RLS                0x03
 #define G_RAID_VOLUME_RLQ_UNKNOWN      0xff
 
 struct g_raid_volume;
@@ -244,7 +278,13 @@ struct g_raid_volume {
        u_int                    v_raid_level;  /* Array RAID level. */
        u_int                    v_raid_level_qualifier; /* RAID level det. */
        u_int                    v_disks_count; /* Number of disks in array. */
+       u_int                    v_mdf_pdisks;  /* Number of parity disks
+                                                  in RAIDMDF array. */
+       uint16_t                 v_mdf_polynomial; /* Polynomial for RAIDMDF. */
+       uint8_t                  v_mdf_method;  /* Generation method for 
RAIDMDF. */
        u_int                    v_strip_size;  /* Array strip size. */
+       u_int                    v_rotate_parity; /* Rotate RAID5R parity
+                                                  after numer of stripes. */
        u_int                    v_sectorsize;  /* Volume sector size. */
        off_t                    v_mediasize;   /* Volume media size.  */
        struct bio_queue_head    v_inflight;    /* In-flight write requests. */
@@ -348,7 +388,8 @@ const char * g_raid_disk_state2str(int s
 
 struct g_raid_softc * g_raid_create_node(struct g_class *mp,
     const char *name, struct g_raid_md_object *md);
-int g_raid_create_node_format(const char *format, struct g_geom **gp);
+int g_raid_create_node_format(const char *format, struct gctl_req *req,
+    struct g_geom **gp);
 struct g_raid_volume * g_raid_create_volume(struct g_raid_softc *sc,
     const char *name, int id);
 struct g_raid_disk * g_raid_create_disk(struct g_raid_softc *sc);

Modified: stable/9/sys/geom/raid/g_raid_ctl.c
==============================================================================
--- stable/9/sys/geom/raid/g_raid_ctl.c Thu May 24 02:24:03 2012        
(r235873)
+++ stable/9/sys/geom/raid/g_raid_ctl.c Thu May 24 02:34:03 2012        
(r235874)
@@ -88,7 +88,7 @@ g_raid_ctl_label(struct gctl_req *req, s
                gctl_error(req, "No format recieved.");
                return;
        }
-       crstatus = g_raid_create_node_format(format, &geom);
+       crstatus = g_raid_create_node_format(format, req, &geom);
        if (crstatus == G_RAID_MD_TASTE_FAIL) {
                gctl_error(req, "Failed to create array with format '%s'.",
                    format);

Modified: stable/9/sys/geom/raid/g_raid_md_if.m
==============================================================================
--- stable/9/sys/geom/raid/g_raid_md_if.m       Thu May 24 02:24:03 2012        
(r235873)
+++ stable/9/sys/geom/raid/g_raid_md_if.m       Thu May 24 02:34:03 2012        
(r235874)
@@ -49,13 +49,22 @@ HEADER {
 # Default implementations of methods.
 CODE {
        static int
-       g_raid_md_create_default(struct g_raid_md_object *md)
+       g_raid_md_create_default(struct g_raid_md_object *md,
+           struct g_class *mp, struct g_geom **gp)
        {
 
                return (G_RAID_MD_TASTE_FAIL);
        }
 
        static int
+       g_raid_md_create_req_default(struct g_raid_md_object *md,
+           struct g_class *mp, struct gctl_req *req, struct g_geom **gp)
+       {
+
+               return (G_RAID_MD_CREATE(md, mp, gp));
+       }
+
+       static int
        g_raid_md_ctl_default(struct g_raid_md_object *md,
            struct gctl_req *req)
        {
@@ -95,6 +104,14 @@ METHOD int create {
        struct g_geom **gp;
 } DEFAULT g_raid_md_create_default;
 
+# create_req() - create new node from scratch, with request argument.
+METHOD int create_req {
+       struct g_raid_md_object *md;
+       struct g_class *mp;
+       struct gctl_req *req;
+       struct g_geom **gp;
+} DEFAULT g_raid_md_create_req_default;
+
 # taste() - taste disk and, if needed, create new node.
 METHOD int taste {
        struct g_raid_md_object *md;

Copied and modified: stable/9/sys/geom/raid/md_ddf.c (from r234848, 
head/sys/geom/raid/md_ddf.c)
==============================================================================
--- head/sys/geom/raid/md_ddf.c Mon Apr 30 17:53:02 2012        (r234848, copy 
source)
+++ stable/9/sys/geom/raid/md_ddf.c     Thu May 24 02:34:03 2012        
(r235874)
@@ -88,14 +88,15 @@ struct g_raid_md_ddf_pervolume {
 
 struct g_raid_md_ddf_object {
        struct g_raid_md_object  mdio_base;
+       u_int                    mdio_bigendian;
        struct ddf_meta          mdio_meta;
+       int                      mdio_starting;
        struct callout           mdio_start_co; /* STARTING state timer. */
        int                      mdio_started;
-       int                      mdio_incomplete;
        struct root_hold_token  *mdio_rootmount; /* Root mount delay token. */
 };
 
-static g_raid_md_create_t g_raid_md_create_ddf;
+static g_raid_md_create_req_t g_raid_md_create_req_ddf;
 static g_raid_md_taste_t g_raid_md_taste_ddf;
 static g_raid_md_event_t g_raid_md_event_ddf;
 static g_raid_md_volume_event_t g_raid_md_volume_event_ddf;
@@ -107,7 +108,7 @@ static g_raid_md_free_volume_t g_raid_md
 static g_raid_md_free_t g_raid_md_free_ddf;
 
 static kobj_method_t g_raid_md_ddf_methods[] = {
-       KOBJMETHOD(g_raid_md_create,    g_raid_md_create_ddf),
+       KOBJMETHOD(g_raid_md_create_req,        g_raid_md_create_req_ddf),
        KOBJMETHOD(g_raid_md_taste,     g_raid_md_taste_ddf),
        KOBJMETHOD(g_raid_md_event,     g_raid_md_event_ddf),
        KOBJMETHOD(g_raid_md_volume_event,      g_raid_md_volume_event_ddf),
@@ -172,6 +173,17 @@ static struct g_raid_md_class g_raid_md_
 #define SET32D(m, f, v)        SET32P((m), &(f), (v))
 #define SET64D(m, f, v)        SET64P((m), &(f), (v))
 
+#define GETCRNUM(m)    (GET32((m), hdr->cr_length) /                   \
+       GET16((m), hdr->Configuration_Record_Length))
+
+#define GETVDCPTR(m, n)        ((struct ddf_vdc_record *)((uint8_t *)(m)->cr + 
\
+       (n) * GET16((m), hdr->Configuration_Record_Length) *            \
+       (m)->sectorsize))
+
+#define GETSAPTR(m, n) ((struct ddf_sa_record *)((uint8_t *)(m)->cr +  \
+       (n) * GET16((m), hdr->Configuration_Record_Length) *            \
+       (m)->sectorsize))
+
 static int
 isff(uint8_t *buf, int size)
 {
@@ -254,7 +266,7 @@ g_raid_md_ddf_print(struct ddf_meta *met
            GET16(meta, cdr->Controller_Type.SubVendor_ID),
            GET16(meta, cdr->Controller_Type.SubDevice_ID));
        printf("Product_ID           '%.16s'\n", (char 
*)&meta->cdr->Product_ID[0]);
-       printf("**** Physical Disk Data ****\n");
+       printf("**** Physical Disk Records ****\n");
        printf("Populated_PDEs       %u\n", GET16(meta, pdr->Populated_PDEs));
        printf("Max_PDE_Supported    %u\n", GET16(meta, 
pdr->Max_PDE_Supported));
        for (j = 0; j < GET16(meta, pdr->Populated_PDEs); j++) {
@@ -276,7 +288,7 @@ g_raid_md_ddf_print(struct ddf_meta *met
                printf("Block_Size           %u\n",
                    GET16(meta, pdr->entry[j].Block_Size));
        }
-       printf("**** Virtual Disk Data ****\n");
+       printf("**** Virtual Disk Records ****\n");
        printf("Populated_VDEs       %u\n", GET16(meta, vdr->Populated_VDEs));
        printf("Max_VDE_Supported    %u\n", GET16(meta, 
vdr->Max_VDE_Supported));
        for (j = 0; j < GET16(meta, vdr->Populated_VDEs); j++) {
@@ -287,8 +299,8 @@ g_raid_md_ddf_print(struct ddf_meta *met
                printf("\n");
                printf("VD_Number            0x%04x\n",
                    GET16(meta, vdr->entry[j].VD_Number));
-               printf("VD_Type              0x%02x\n",
-                   GET8(meta, vdr->entry[j].VD_Type));
+               printf("VD_Type              0x%04x\n",
+                   GET16(meta, vdr->entry[j].VD_Type));
                printf("VD_State             0x%02x\n",
                    GET8(meta, vdr->entry[j].VD_State));
                printf("Init_State           0x%02x\n",
@@ -299,11 +311,9 @@ g_raid_md_ddf_print(struct ddf_meta *met
                    (char *)&meta->vdr->entry[j].VD_Name);
        }
        printf("**** Configuration Records ****\n");
-       num = GET32(meta, hdr->cr_length) / GET16(meta, 
hdr->Configuration_Record_Length);
+       num = GETCRNUM(meta);
        for (j = 0; j < num; j++) {
-               vdc = (struct ddf_vdc_record *)((uint8_t *)meta->cr +
-                   j * GET16(meta, hdr->Configuration_Record_Length) *
-                   meta->sectorsize);
+               vdc = GETVDCPTR(meta, j);
                val = GET32D(meta, vdc->Signature);
                switch (val) {
                case DDF_VDCR_SIGNATURE:
@@ -391,6 +401,7 @@ g_raid_md_ddf_print(struct ddf_meta *met
                                    GET16D(meta, 
sa->entry[i].Secondary_Element));
                        }
                        break;
+               case 0x00000000:
                case 0xFFFFFFFF:
                        break;
                default:
@@ -463,17 +474,16 @@ ddf_meta_find_vdc(struct ddf_meta *meta,
        struct ddf_vdc_record *vdc;
        int i, num;
 
-       num = GET32(meta, hdr->cr_length) / GET16(meta, 
hdr->Configuration_Record_Length);
+       num = GETCRNUM(meta);
        for (i = 0; i < num; i++) {
-               vdc = (struct ddf_vdc_record *)((uint8_t *)meta->cr +
-                   i * GET16(meta, hdr->Configuration_Record_Length) *
-                   meta->sectorsize);
+               vdc = GETVDCPTR(meta, i);
                if (GUID != NULL) {
                        if (GET32D(meta, vdc->Signature) == DDF_VDCR_SIGNATURE 
&&
                            memcmp(vdc->VD_GUID, GUID, 24) == 0)
                                return (vdc);
                } else
-                       if (GET32D(meta, vdc->Signature) == 0xffffffff)
+                       if (GET32D(meta, vdc->Signature) == 0xffffffff ||
+                           GET32D(meta, vdc->Signature) == 0)
                                return (vdc);
        }
        return (NULL);
@@ -486,11 +496,9 @@ ddf_meta_count_vdc(struct ddf_meta *meta
        int i, num, cnt;
 
        cnt = 0;
-       num = GET32(meta, hdr->cr_length) / GET16(meta, 
hdr->Configuration_Record_Length);
+       num = GETCRNUM(meta);
        for (i = 0; i < num; i++) {
-               vdc = (struct ddf_vdc_record *)((uint8_t *)meta->cr +
-                   i * GET16(meta, hdr->Configuration_Record_Length) *
-                   meta->sectorsize);
+               vdc = GETVDCPTR(meta, i);
                if (GET32D(meta, vdc->Signature) != DDF_VDCR_SIGNATURE)
                        continue;
                if (GUID == NULL || memcmp(vdc->VD_GUID, GUID, 24) == 0)
@@ -526,12 +534,36 @@ ddf_meta_find_disk(struct ddf_vol_meta *
        return (-1);
 }
 
+static struct ddf_sa_record *
+ddf_meta_find_sa(struct ddf_meta *meta, int create)
+{
+       struct ddf_sa_record *sa;
+       int i, num;
+
+       num = GETCRNUM(meta);
+       for (i = 0; i < num; i++) {
+               sa = GETSAPTR(meta, i);
+               if (GET32D(meta, sa->Signature) == DDF_SA_SIGNATURE)
+                       return (sa);
+       }
+       if (create) {
+               for (i = 0; i < num; i++) {
+                       sa = GETSAPTR(meta, i);
+                       if (GET32D(meta, sa->Signature) == 0xffffffff ||
+                           GET32D(meta, sa->Signature) == 0)
+                               return (sa);
+               }
+       }
+       return (NULL);
+}
+
 static void
 ddf_meta_create(struct g_raid_disk *disk, struct ddf_meta *sample)
 {
        struct timespec ts;
        struct clocktime ct;
        struct g_raid_md_ddf_perdisk *pd;
+       struct g_raid_md_ddf_object *mdi;
        struct ddf_meta *meta;
        struct ddf_pd_entry *pde;
        off_t anchorlba;
@@ -542,13 +574,14 @@ ddf_meta_create(struct g_raid_disk *disk
        if (sample->hdr == NULL)
                sample = NULL;
 
+       mdi = (struct g_raid_md_ddf_object *)disk->d_softc->sc_md;
        pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
        meta = &pd->pd_meta;
        ss = disk->d_consumer->provider->sectorsize;
        anchorlba = disk->d_consumer->provider->mediasize / ss - 1;
 
        meta->sectorsize = ss;
-       meta->bigendian = sample ? sample->bigendian : 0;
+       meta->bigendian = sample ? sample->bigendian : mdi->mdio_bigendian;
        getnanotime(&ts);
        clock_ts_to_ct(&ts, &ct);
 
@@ -642,9 +675,9 @@ ddf_meta_create(struct g_raid_disk *disk
        pos += GET32(meta, hdr->Diagnostic_Space_Length);
        SET32(meta, hdr->Vendor_Specific_Logs,
            GET32(meta, hdr->Vendor_Specific_Logs_Length) != 0 ? pos : 
0xffffffff);
-       pos += GET32(meta, hdr->Vendor_Specific_Logs_Length);
+       pos += min(GET32(meta, hdr->Vendor_Specific_Logs_Length), 1);
        SET64(meta, hdr->Primary_Header_LBA,
-           anchorlba - pos - 16);
+           anchorlba - pos);
        SET64(meta, hdr->Secondary_Header_LBA,
            0xffffffffffffffffULL);
        SET64(meta, hdr->WorkSpace_LBA,
@@ -756,7 +789,7 @@ ddf_meta_update(struct ddf_meta *meta, s
                if (isff(spde->PD_GUID, 24))
                        continue;
                j = ddf_meta_find_pd(meta, NULL,
-                   src->pdr->entry[i].PD_Reference);
+                   GET32(src, pdr->entry[i].PD_Reference));
                if (j < 0) {
                        j = ddf_meta_find_pd(meta, NULL, 0xffffffff);
                        pde = &meta->pdr->entry[j];
@@ -835,7 +868,8 @@ ddf_vol_meta_create(struct ddf_vol_meta 
 }
 
 static void
-ddf_vol_meta_update(struct ddf_vol_meta *dst, struct ddf_meta *src, uint8_t 
*GUID)
+ddf_vol_meta_update(struct ddf_vol_meta *dst, struct ddf_meta *src,
+    uint8_t *GUID, int started)
 {
        struct ddf_header *hdr;
        struct ddf_vd_entry *vde;
@@ -850,15 +884,15 @@ ddf_vol_meta_update(struct ddf_vol_meta 
        size = GET16(src, hdr->Configuration_Record_Length) * src->sectorsize;
 
        if (dst->vdc == NULL ||
-           ((int32_t)(GET32D(src, vdc->Sequence_Number) -
-           GET32(dst, vdc->Sequence_Number))) > 0)
+           (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) -
+           GET32(dst, vdc->Sequence_Number))) > 0))
                vnew = 1;
        else
                vnew = 0;
 
        if (dst->bvdc[bvd] == NULL ||
-           ((int32_t)(GET32D(src, vdc->Sequence_Number) -
-           GET32(dst, bvdc[bvd]->Sequence_Number))) > 0)
+           (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) -
+           GET32(dst, bvdc[bvd]->Sequence_Number))) > 0))
                bvnew = 1;
        else
                bvnew = 0;
@@ -936,12 +970,9 @@ ddf_meta_unused_range(struct ddf_meta *m
        beg[0] = 0;
        end[0] = GET64(meta, pdr->entry[pos].Configured_Size);
        n = 1;
-       num = GET32(meta, hdr->cr_length) /
-           GET16(meta, hdr->Configuration_Record_Length);
+       num = GETCRNUM(meta);
        for (i = 0; i < num; i++) {
-               vdc = (struct ddf_vdc_record *)((uint8_t *)meta->cr +
-                   i * GET16(meta, hdr->Configuration_Record_Length) *
-                   meta->sectorsize);
+               vdc = GETVDCPTR(meta, i);
                if (GET32D(meta, vdc->Signature) != DDF_VDCR_SIGNATURE)
                        continue;
                for (pos = 0; pos < GET16D(meta, vdc->Primary_Element_Count); 
pos++)
@@ -1197,7 +1228,7 @@ hdrerror:
        }
 
 done:
-       free(abuf, M_MD_DDF);
+       g_free(abuf);
        if (error != 0)
                ddf_meta_free(meta);
        return (error);
@@ -1260,11 +1291,10 @@ err:
        if (error != 0)
                goto err;
 
-       size = GET16(meta, hdr->Configuration_Record_Length);
-       num = GET32(meta, hdr->cr_length) / size;
-       size *= ss;
+       size = GET16(meta, hdr->Configuration_Record_Length) * ss;
+       num = GETCRNUM(meta);
        for (i = 0; i < num; i++) {
-               vdc = (struct ddf_vdc_record *)((uint8_t *)meta->cr + i * size);
+               vdc = GETVDCPTR(meta, i);
                SET32D(meta, vdc->CRC, 0xffffffff);
                SET32D(meta, vdc->CRC, crc32(vdc, size));
        }
@@ -1320,29 +1350,6 @@ ddf_meta_erase(struct g_consumer *cp)
        return (error);
 }
 
-#if 0
-static int
-ddf_meta_write_spare(struct g_consumer *cp)
-{
-       struct ddf_header *meta;
-       int error;
-
-       meta = malloc(sizeof(*meta), M_MD_DDF, M_WAITOK | M_ZERO);
-       memcpy(&meta->ddf_id[0], DDF_MAGIC, sizeof(DDF_MAGIC) - 1);
-       meta->dummy_0 = 0x00020000;
-       meta->integrity = DDF_I_VALID;
-       meta->disk.flags = DDF_F_SPARE | DDF_F_ONLINE | DDF_F_VALID;
-       meta->disk.number = 0xff;
-       arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0);
-       meta->disk_sectors = cp->provider->mediasize / cp->provider->sectorsize;
-       meta->disk_sectors -= 131072;
-       meta->rebuild_lba = UINT32_MAX;
-       error = ddf_meta_write(cp, &meta, 1);
-       free(meta, M_MD_DDF);
-       return (error);
-}
-#endif
-
 static struct g_raid_volume *
 g_raid_md_ddf_get_volume(struct g_raid_softc *sc, uint8_t *GUID)
 {
@@ -1510,16 +1517,14 @@ g_raid_md_ddf_supported(int level, int q
                    qual != G_RAID_VOLUME_RLQ_RMDFLA &&
                    qual != G_RAID_VOLUME_RLQ_RMDFLS)
                        return (0);
-               if (disks < 5)
+               if (disks < 4)
                        return (0);
                break;
        case G_RAID_VOLUME_RL_RAID1E:
                if (qual != G_RAID_VOLUME_RLQ_R1EA &&
                    qual != G_RAID_VOLUME_RLQ_R1EO)
                        return (0);
-               if (disks < 2)
-                       return (0);
-               if (disks % 2 != 0)
+               if (disks < 3)
                        return (0);
                break;
        case G_RAID_VOLUME_RL_SINGLE:
@@ -1578,6 +1583,7 @@ g_raid_md_ddf_start_disk(struct g_raid_d
        struct ddf_vol_meta *vmeta;
        struct ddf_meta *pdmeta, *gmeta;
        struct ddf_vdc_record *vdc1;
+       struct ddf_sa_record *sa;
        off_t size, eoff = 0, esize = 0;
        uint64_t *val2;
        int disk_pos, md_disk_bvd = -1, md_disk_pos = -1, md_pde_pos;
@@ -1600,7 +1606,8 @@ g_raid_md_ddf_start_disk(struct g_raid_d
        md_pde_pos = ddf_meta_find_pd(gmeta, NULL, reference);
 
        if (disk_pos < 0) {
-               G_RAID_DEBUG1(1, sc, "Disk %s is not part of the volume %s",
+               G_RAID_DEBUG1(1, sc,
+                   "Disk %s is not a present part of the volume %s",
                    g_raid_get_diskname(disk), vol->v_name);
 
                /* Failed stale disk is useless for us. */
@@ -1610,10 +1617,8 @@ g_raid_md_ddf_start_disk(struct g_raid_d
                }
 
                /* If disk has some metadata for this volume - erase. */
-               if (pdmeta->cr != NULL &&
-                   (vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != 
NULL) {
+               if ((vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != 
NULL)
                        SET32D(pdmeta, vdc1->Signature, 0xffffffff);
-               }
 
                /* If we are in the start process, that's all for now. */
                if (!pv->pv_started)
@@ -1634,6 +1639,8 @@ g_raid_md_ddf_start_disk(struct g_raid_d
                            g_raid_get_diskname(disk));
                        goto nofit;
                }
+               eoff *= pd->pd_meta.sectorsize;
+               esize *= pd->pd_meta.sectorsize;
                size = INT64_MAX;
                for (i = 0; i < vol->v_disks_count; i++) {
                        sd = &vol->v_subdisks[i];
@@ -1646,26 +1653,41 @@ g_raid_md_ddf_start_disk(struct g_raid_d
                }
                if (disk_pos >= 0 &&
                    vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT &&
-                   (off_t)esize * 512 < size) {
+                   esize < size) {
                        G_RAID_DEBUG1(1, sc, "Disk %s free space "
                            "is too small (%ju < %ju)",
-                           g_raid_get_diskname(disk),
-                           (off_t)esize * 512, size);
+                           g_raid_get_diskname(disk), esize, size);
                        disk_pos = -1;
                }
                if (disk_pos >= 0) {
                        if (vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT)
-                               esize = size / 512;
+                               esize = size;
                        md_disk_bvd = disk_pos / GET16(vmeta, 
vdc->Primary_Element_Count); // XXX
                        md_disk_pos = disk_pos % GET16(vmeta, 
vdc->Primary_Element_Count); // XXX
                } else {
 nofit:
-                       if (ddf_meta_count_vdc(&pd->pd_meta, NULL) == 0) {
+                       if (disk->d_state == G_RAID_DISK_S_NONE)
                                g_raid_change_disk_state(disk,
-                                   G_RAID_DISK_S_SPARE);
-                       }
+                                   G_RAID_DISK_S_STALE);
                        return (0);
                }
+
+               /*
+                * If spare is committable, delete spare record.
+                * Othersize, mark it active and leave there.
+                */
+               sa = ddf_meta_find_sa(&pd->pd_meta, 0);
+               if (sa != NULL) {
+                       if ((GET8D(&pd->pd_meta, sa->Spare_Type) &
+                           DDF_SAR_TYPE_REVERTIBLE) == 0) {
+                               SET32D(&pd->pd_meta, sa->Signature, 0xffffffff);
+                       } else {
+                               SET8D(&pd->pd_meta, sa->Spare_Type,
+                                   GET8D(&pd->pd_meta, sa->Spare_Type) |
+                                   DDF_SAR_TYPE_ACTIVE);
+                       }
+               }
+
                G_RAID_DEBUG1(1, sc, "Disk %s takes pos %d in the volume %s",
                    g_raid_get_diskname(disk), disk_pos, vol->v_name);
                resurrection = 1;
@@ -1691,8 +1713,8 @@ nofit:
                g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE);
 
        if (resurrection) {
-               sd->sd_offset = (off_t)eoff * 512;
-               sd->sd_size = (off_t)esize * 512;
+               sd->sd_offset = eoff;
+               sd->sd_size = esize;
        } else if (pdmeta->cr != NULL &&
            (vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != NULL) {
                val2 = (uint64_t *)&(vdc1->Physical_Disk_Sequence[GET16(vmeta, 
hdr->Max_Primary_Element_Entries)]);
@@ -1802,7 +1824,9 @@ g_raid_md_ddf_start(struct g_raid_volume
        struct g_raid_subdisk *sd;
        struct g_raid_disk *disk;
        struct g_raid_md_object *md;
+       struct g_raid_md_ddf_perdisk *pd;
        struct g_raid_md_ddf_pervolume *pv;
+       struct g_raid_md_ddf_object *mdi;
        struct ddf_vol_meta *vmeta;
        struct ddf_vdc_record *vdc;
        uint64_t *val2;
@@ -1810,6 +1834,7 @@ g_raid_md_ddf_start(struct g_raid_volume
 
        sc = vol->v_softc;
        md = sc->sc_md;
+       mdi = (struct g_raid_md_ddf_object *)md;
        pv = vol->v_md_data;
        vmeta = &pv->pv_meta;
        vdc = vmeta->vdc;
@@ -1826,6 +1851,13 @@ g_raid_md_ddf_start(struct g_raid_volume
        vol->v_strip_size = vol->v_sectorsize << GET8(vmeta, vdc->Stripe_Size);
        vol->v_disks_count = GET16(vmeta, vdc->Primary_Element_Count) *
            GET8(vmeta, vdc->Secondary_Element_Count);
+       vol->v_mdf_pdisks = GET8(vmeta, vdc->MDF_Parity_Disks);
+       vol->v_mdf_polynomial = GET16(vmeta, 
vdc->MDF_Parity_Generator_Polynomial);
+       vol->v_mdf_method = GET8(vmeta, vdc->MDF_Constant_Generation_Method);
+       if (GET8(vmeta, vdc->Rotate_Parity_count) > 31)
+               vol->v_rotate_parity = 1;
+       else
+               vol->v_rotate_parity = 1 << GET8(vmeta, 
vdc->Rotate_Parity_count);
        vol->v_mediasize = GET64(vmeta, vdc->VD_Size) * vol->v_sectorsize;
        for (i = 0, j = 0, bvd = 0; i < vol->v_disks_count; i++, j++) {
                if (j == GET16(vmeta, vdc->Primary_Element_Count)) {
@@ -1848,20 +1880,14 @@ g_raid_md_ddf_start(struct g_raid_volume
        g_raid_start_volume(vol);
 
        /* Make all disks found till the moment take their places. */
-       for (i = 0, j = 0, bvd = 0; i < vol->v_disks_count; i++, j++) {
-               if (j == GET16(vmeta, vdc->Primary_Element_Count)) {
-                       j = 0;
-                       bvd++;
-               }
-               if (vmeta->bvdc[bvd] == NULL)
-                       continue;
-               disk = g_raid_md_ddf_get_disk(sc, NULL,
-                   GET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[j]));
-               if (disk != NULL)
+       TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
+               pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
+               if (ddf_meta_find_vdc(&pd->pd_meta, vmeta->vdc->VD_GUID) != 
NULL)
                        g_raid_md_ddf_start_disk(disk, vol);
        }
 
        pv->pv_started = 1;
+       mdi->mdio_starting--;
        callout_stop(&pv->pv_start_co);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-stable-9@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-stable-9
To unsubscribe, send any mail to "svn-src-stable-9-unsubscr...@freebsd.org"

Reply via email to