Author: mav
Date: Thu Apr 19 12:30:12 2012
New Revision: 234458
URL: http://svn.freebsd.org/changeset/base/234458

Log:
  Add to GEOM RAID class module for reading non-degraded RAID5 volumes and
  some environment to differentiate 4 possible RAID5 on-disk layouts.
  
  Tested with Intel and AMD RAID BIOSes.
  
  MFC after:    2 weeks

Added:
  head/sys/geom/raid/tr_raid5.c   (contents, props changed)
Modified:
  head/sbin/geom/class/raid/graid.8
  head/sys/conf/files
  head/sys/geom/raid/g_raid.c
  head/sys/geom/raid/g_raid.h
  head/sys/geom/raid/md_intel.c
  head/sys/geom/raid/md_jmicron.c
  head/sys/geom/raid/md_nvidia.c
  head/sys/geom/raid/md_promise.c
  head/sys/geom/raid/md_sii.c
  head/sys/modules/geom/geom_raid/Makefile

Modified: head/sbin/geom/class/raid/graid.8
==============================================================================
--- head/sbin/geom/class/raid/graid.8   Thu Apr 19 10:53:17 2012        
(r234457)
+++ head/sbin/geom/class/raid/graid.8   Thu Apr 19 12:30:12 2012        
(r234458)
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 26, 2011
+.Dd April 19, 2012
 .Dt GRAID 8
 .Os
 .Sh NAME
@@ -242,7 +242,8 @@ own risk: RAID1 (3+ disks), RAID10 (6+ d
 The GEOM RAID class follows a modular design, allowing different RAID levels
 to be used.
 Support for the following RAID levels is currently implemented: RAID0, RAID1,
-RAID1E, RAID10, SINGLE, CONCAT.
+RAID1E, RAID5, RAID10, SINGLE, CONCAT.
+RAID5 support is read-only and only for volumes in optimal state.
 .Sh RAID LEVEL MIGRATION
 The GEOM RAID class has no support for RAID level migration, allowed by some
 metadata formats.

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files Thu Apr 19 10:53:17 2012        (r234457)
+++ head/sys/conf/files Thu Apr 19 12:30:12 2012        (r234458)
@@ -2405,6 +2405,7 @@ geom/raid/tr_concat.c             optional geom_rai
 geom/raid/tr_raid0.c           optional geom_raid
 geom/raid/tr_raid1.c           optional geom_raid
 geom/raid/tr_raid1e.c          optional geom_raid
+geom/raid/tr_raid5.c           optional geom_raid
 geom/raid3/g_raid3.c           optional geom_raid3
 geom/raid3/g_raid3_ctl.c       optional geom_raid3
 geom/shsec/g_shsec.c           optional geom_shsec

Modified: head/sys/geom/raid/g_raid.c
==============================================================================
--- head/sys/geom/raid/g_raid.c Thu Apr 19 10:53:17 2012        (r234457)
+++ head/sys/geom/raid/g_raid.c Thu Apr 19 12:30:12 2012        (r234458)
@@ -281,6 +281,14 @@ g_raid_volume_level2str(int level, int q
        case G_RAID_VOLUME_RL_RAID4:
                return ("RAID4");
        case G_RAID_VOLUME_RL_RAID5:
+               if (qual == G_RAID_VOLUME_RLQ_R5RA)
+                       return ("RAID5RA");
+               if (qual == G_RAID_VOLUME_RLQ_R5RS)
+                       return ("RAID5RS");
+               if (qual == G_RAID_VOLUME_RLQ_R5LA)
+                       return ("RAID5LA");
+               if (qual == G_RAID_VOLUME_RLQ_R5LS)
+                       return ("RAID5LS");
                return ("RAID5");
        case G_RAID_VOLUME_RL_RAID6:
                return ("RAID6");
@@ -313,9 +321,20 @@ g_raid_volume_str2level(const char *str,
                *level = G_RAID_VOLUME_RL_RAID3;
        else if (strcasecmp(str, "RAID4") == 0)
                *level = G_RAID_VOLUME_RL_RAID4;
-       else if (strcasecmp(str, "RAID5") == 0)
+       else if (strcasecmp(str, "RAID5RA") == 0) {
                *level = G_RAID_VOLUME_RL_RAID5;
-       else if (strcasecmp(str, "RAID6") == 0)
+               *qual = G_RAID_VOLUME_RLQ_R5RA;
+       } else if (strcasecmp(str, "RAID5RS") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5;
+               *qual = G_RAID_VOLUME_RLQ_R5RS;
+       } else if (strcasecmp(str, "RAID5") == 0 ||
+                  strcasecmp(str, "RAID5LA") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5;
+               *qual = G_RAID_VOLUME_RLQ_R5LA;
+       } else if (strcasecmp(str, "RAID5LS") == 0) {
+               *level = G_RAID_VOLUME_RL_RAID5;
+               *qual = G_RAID_VOLUME_RLQ_R5LS;
+       } else if (strcasecmp(str, "RAID6") == 0)
                *level = G_RAID_VOLUME_RL_RAID6;
        else if (strcasecmp(str, "RAID10") == 0 ||
                 strcasecmp(str, "RAID1E") == 0)

Modified: head/sys/geom/raid/g_raid.h
==============================================================================
--- head/sys/geom/raid/g_raid.h Thu Apr 19 10:53:17 2012        (r234457)
+++ head/sys/geom/raid/g_raid.h Thu Apr 19 12:30:12 2012        (r234458)
@@ -227,6 +227,10 @@ struct g_raid_subdisk {
 #define G_RAID_VOLUME_RL_UNKNOWN       0xff
 
 #define G_RAID_VOLUME_RLQ_NONE         0x00
+#define G_RAID_VOLUME_RLQ_R5RA         0x00
+#define G_RAID_VOLUME_RLQ_R5RS         0x01
+#define G_RAID_VOLUME_RLQ_R5LA         0x02
+#define G_RAID_VOLUME_RLQ_R5LS         0x03
 #define G_RAID_VOLUME_RLQ_UNKNOWN      0xff
 
 struct g_raid_volume;

Modified: head/sys/geom/raid/md_intel.c
==============================================================================
--- head/sys/geom/raid/md_intel.c       Thu Apr 19 10:53:17 2012        
(r234457)
+++ head/sys/geom/raid/md_intel.c       Thu Apr 19 12:30:12 2012        
(r234458)
@@ -682,11 +682,13 @@ g_raid_md_intel_supported(int level, int
                        return (0);
                if (!force && disks > 6)
                        return (0);
+               if (qual != G_RAID_VOLUME_RLQ_R5LA)
+                       return (0);
                break;
        default:
                return (0);
        }
-       if (qual != G_RAID_VOLUME_RLQ_NONE)
+       if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
                return (0);
        return (1);
 }
@@ -1029,6 +1031,7 @@ g_raid_md_intel_start(struct g_raid_soft
                mmap = intel_get_map(mvol, 0);
                vol = g_raid_create_volume(sc, mvol->name, -1);
                vol->v_md_data = (void *)(intptr_t)i;
+               vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
                if (mmap->type == INTEL_T_RAID0)
                        vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
                else if (mmap->type == INTEL_T_RAID1 &&
@@ -1045,11 +1048,11 @@ g_raid_md_intel_start(struct g_raid_soft
                                vol->v_raid_level = G_RAID_VOLUME_RL_RAID1;
                        else
                                vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
-               } else if (mmap->type == INTEL_T_RAID5)
+               } else if (mmap->type == INTEL_T_RAID5) {
                        vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
-               else
+                       vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA;
+               } else
                        vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
-               vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
                vol->v_strip_size = (u_int)mmap->strip_sectors * 512; //ZZZ
                vol->v_disks_count = mmap->total_disks;
                vol->v_mediasize = (off_t)mvol->total_sectors * 512; //ZZZ
@@ -1485,6 +1488,8 @@ g_raid_md_ctl_intel(struct g_raid_md_obj
                        gctl_error(req, "No RAID level.");
                        return (-3);
                }
+               if (strcasecmp(levelname, "RAID5") == 0)
+                       levelname = "RAID5LA";
                if (g_raid_volume_str2level(levelname, &level, &qual)) {
                        gctl_error(req, "Unknown RAID level '%s'.", levelname);
                        return (-4);
@@ -1631,7 +1636,7 @@ g_raid_md_ctl_intel(struct g_raid_md_obj
                vol = g_raid_create_volume(sc, volname, -1);
                vol->v_md_data = (void *)(intptr_t)0;
                vol->v_raid_level = level;
-               vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
+               vol->v_raid_level_qualifier = qual;
                vol->v_strip_size = strip;
                vol->v_disks_count = numdisks;
                if (level == G_RAID_VOLUME_RL_RAID0)
@@ -1658,8 +1663,12 @@ g_raid_md_ctl_intel(struct g_raid_md_obj
                        if (sd->sd_disk->d_consumer != NULL) {
                                g_raid_change_disk_state(disk,
                                    G_RAID_DISK_S_ACTIVE);
-                               g_raid_change_subdisk_state(sd,
-                                   G_RAID_SUBDISK_S_ACTIVE);
+                               if (level == G_RAID_VOLUME_RL_RAID5)
+                                       g_raid_change_subdisk_state(sd,
+                                           G_RAID_SUBDISK_S_UNINITIALIZED);
+                               else
+                                       g_raid_change_subdisk_state(sd,
+                                           G_RAID_SUBDISK_S_ACTIVE);
                                g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
                                    G_RAID_EVENT_SUBDISK);
                        } else {
@@ -1694,6 +1703,8 @@ g_raid_md_ctl_intel(struct g_raid_md_obj
                        gctl_error(req, "No RAID level.");
                        return (-3);
                }
+               if (strcasecmp(levelname, "RAID5") == 0)
+                       levelname = "RAID5LA";
                if (g_raid_volume_str2level(levelname, &level, &qual)) {
                        gctl_error(req, "Unknown RAID level '%s'.", levelname);
                        return (-4);
@@ -1818,7 +1829,7 @@ g_raid_md_ctl_intel(struct g_raid_md_obj
                vol = g_raid_create_volume(sc, volname, -1);
                vol->v_md_data = (void *)(intptr_t)i;
                vol->v_raid_level = level;
-               vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
+               vol->v_raid_level_qualifier = qual;
                vol->v_strip_size = strip;
                vol->v_disks_count = numdisks;
                if (level == G_RAID_VOLUME_RL_RAID0)
@@ -1843,8 +1854,12 @@ g_raid_md_ctl_intel(struct g_raid_md_obj
                        sd->sd_size = size;
                        TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next);
                        if (disk->d_state == G_RAID_DISK_S_ACTIVE) {
-                               g_raid_change_subdisk_state(sd,
-                                   G_RAID_SUBDISK_S_ACTIVE);
+                               if (level == G_RAID_VOLUME_RL_RAID5)
+                                       g_raid_change_subdisk_state(sd,
+                                           G_RAID_SUBDISK_S_UNINITIALIZED);
+                               else
+                                       g_raid_change_subdisk_state(sd,
+                                           G_RAID_SUBDISK_S_ACTIVE);
                                g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW,
                                    G_RAID_EVENT_SUBDISK);
                        }
@@ -2245,6 +2260,9 @@ g_raid_md_write_intel(struct g_raid_md_o
                        mmap0->status = INTEL_S_FAILURE;
                else if (vol->v_state == G_RAID_VOLUME_S_DEGRADED)
                        mmap0->status = INTEL_S_DEGRADED;
+               else if (g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_UNINITIALIZED)
+                   == g_raid_nsubdisks(vol, -1))
+                       mmap0->status = INTEL_S_UNINITIALIZED;
                else
                        mmap0->status = INTEL_S_READY;
                if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0)
@@ -2288,7 +2306,8 @@ g_raid_md_write_intel(struct g_raid_md_o
                            sd->sd_state == G_RAID_SUBDISK_S_RESYNC) {
                                mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;
                        } else if (sd->sd_state != G_RAID_SUBDISK_S_ACTIVE &&
-                           sd->sd_state != G_RAID_SUBDISK_S_STALE) {
+                           sd->sd_state != G_RAID_SUBDISK_S_STALE &&
+                           sd->sd_state != G_RAID_SUBDISK_S_UNINITIALIZED) {
                                mmap0->disk_idx[sdi] |= INTEL_DI_RBLD;
                                if (mvol->migr_state)
                                        mmap1->disk_idx[sdi] |= INTEL_DI_RBLD;

Modified: head/sys/geom/raid/md_jmicron.c
==============================================================================
--- head/sys/geom/raid/md_jmicron.c     Thu Apr 19 10:53:17 2012        
(r234457)
+++ head/sys/geom/raid/md_jmicron.c     Thu Apr 19 12:30:12 2012        
(r234458)
@@ -400,13 +400,15 @@ g_raid_md_jmicron_supported(int level, i
        case G_RAID_VOLUME_RL_RAID5:
                if (disks < 3)
                        return (0);
+               if (qual != G_RAID_VOLUME_RLQ_R5LA)
+                       return (0);
                if (!force)
                        return (0);
                break;
        default:
                return (0);
        }
-       if (qual != G_RAID_VOLUME_RLQ_NONE)
+       if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
                return (0);
        return (1);
 }
@@ -657,6 +659,7 @@ g_raid_md_jmicron_start(struct g_raid_so
        vol = g_raid_create_volume(sc, buf, -1);
        size = ((off_t)meta->disk_sectors_high << 16) + meta->disk_sectors_low;
        size *= 512; //ZZZ
+       vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
        if (meta->type == JMICRON_T_RAID0) {
                vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
                vol->v_mediasize = size * mdi->mdio_total_disks;
@@ -674,12 +677,12 @@ g_raid_md_jmicron_start(struct g_raid_so
                vol->v_mediasize = 0;
        } else if (meta->type == JMICRON_T_RAID5) {
                vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
+               vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA;
                vol->v_mediasize = size * (mdi->mdio_total_disks - 1);
        } else {
                vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
                vol->v_mediasize = 0;
        }
-       vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
        vol->v_strip_size = 1024 << meta->stripe_shift; //ZZZ
        vol->v_disks_count = mdi->mdio_total_disks;
        vol->v_sectorsize = 512; //ZZZ
@@ -1057,6 +1060,8 @@ g_raid_md_ctl_jmicron(struct g_raid_md_o
                        gctl_error(req, "No RAID level.");
                        return (-3);
                }
+               if (strcasecmp(levelname, "RAID5") == 0)
+                       levelname = "RAID5LA";
                if (g_raid_volume_str2level(levelname, &level, &qual)) {
                        gctl_error(req, "Unknown RAID level '%s'.", levelname);
                        return (-4);
@@ -1192,7 +1197,7 @@ g_raid_md_ctl_jmicron(struct g_raid_md_o
                vol = g_raid_create_volume(sc, volname, -1);
                vol->v_md_data = (void *)(intptr_t)0;
                vol->v_raid_level = level;
-               vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
+               vol->v_raid_level_qualifier = qual;
                vol->v_strip_size = strip;
                vol->v_disks_count = numdisks;
                if (level == G_RAID_VOLUME_RL_RAID0 ||

Modified: head/sys/geom/raid/md_nvidia.c
==============================================================================
--- head/sys/geom/raid/md_nvidia.c      Thu Apr 19 10:53:17 2012        
(r234457)
+++ head/sys/geom/raid/md_nvidia.c      Thu Apr 19 12:30:12 2012        
(r234458)
@@ -407,11 +407,14 @@ g_raid_md_nvidia_supported(int level, in
        case G_RAID_VOLUME_RL_RAID5:
                if (disks < 3)
                        return (0);
+               if (qual != G_RAID_VOLUME_RLQ_R5LA &&
+                   qual != G_RAID_VOLUME_RLQ_R5LS)
+                       return (0);
                break;
        default:
                return (0);
        }
-       if (qual != G_RAID_VOLUME_RLQ_NONE)
+       if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
                return (0);
        return (1);
 }
@@ -679,10 +682,11 @@ g_raid_md_nvidia_start(struct g_raid_sof
                size = 0;
        } else if (meta->type == NVIDIA_T_RAID5) {
                vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
+               vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA;
                size = vol->v_mediasize / (mdi->mdio_total_disks - 1);
        } else if (meta->type == NVIDIA_T_RAID5_SYM) {
                vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
-//             vol->v_raid_level_qualifier = 0x03;
+               vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LS;
                size = vol->v_mediasize / (mdi->mdio_total_disks - 1);
        } else {
                vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
@@ -1059,6 +1063,8 @@ g_raid_md_ctl_nvidia(struct g_raid_md_ob
                        gctl_error(req, "No RAID level.");
                        return (-3);
                }
+               if (strcasecmp(levelname, "RAID5") == 0)
+                       levelname = "RAID5LS";
                if (g_raid_volume_str2level(levelname, &level, &qual)) {
                        gctl_error(req, "Unknown RAID level '%s'.", levelname);
                        return (-4);
@@ -1206,7 +1212,7 @@ g_raid_md_ctl_nvidia(struct g_raid_md_ob
                vol = g_raid_create_volume(sc, volname, -1);
                vol->v_md_data = (void *)(intptr_t)0;
                vol->v_raid_level = level;
-               vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
+               vol->v_raid_level_qualifier = qual;
                vol->v_strip_size = strip;
                vol->v_disks_count = numdisks;
                vol->v_mediasize = volsize;
@@ -1454,8 +1460,8 @@ g_raid_md_write_nvidia(struct g_raid_md_
        else if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT ||
            vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE)
                meta->type = NVIDIA_T_CONCAT;
-//     else if (vol->v_raid_level_qualifier == 0)
-//             meta->type = NVIDIA_T_RAID5;
+       else if (vol->v_raid_level_qualifier == G_RAID_VOLUME_RLQ_R5LA)
+               meta->type = NVIDIA_T_RAID5;
        else
                meta->type = NVIDIA_T_RAID5_SYM;
        meta->strip_sectors = vol->v_strip_size / vol->v_sectorsize;

Modified: head/sys/geom/raid/md_promise.c
==============================================================================
--- head/sys/geom/raid/md_promise.c     Thu Apr 19 10:53:17 2012        
(r234457)
+++ head/sys/geom/raid/md_promise.c     Thu Apr 19 12:30:12 2012        
(r234458)
@@ -595,11 +595,13 @@ g_raid_md_promise_supported(int level, i
        case G_RAID_VOLUME_RL_RAID5:
                if (disks < 3)
                        return (0);
+               if (qual != G_RAID_VOLUME_RLQ_R5LA)
+                       return (0);
                break;
        default:
                return (0);
        }
-       if (qual != G_RAID_VOLUME_RLQ_NONE)
+       if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
                return (0);
        return (1);
 }
@@ -848,6 +850,7 @@ g_raid_md_promise_start(struct g_raid_vo
        pv = vol->v_md_data;
        meta = pv->pv_meta;
 
+       vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
        if (meta->type == PROMISE_T_RAID0)
                vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
        else if (meta->type == PROMISE_T_RAID1) {
@@ -857,15 +860,15 @@ g_raid_md_promise_start(struct g_raid_vo
                        vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E;
        } else if (meta->type == PROMISE_T_RAID3)
                vol->v_raid_level = G_RAID_VOLUME_RL_RAID3;
-       else if (meta->type == PROMISE_T_RAID5)
+       else if (meta->type == PROMISE_T_RAID5) {
                vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
-       else if (meta->type == PROMISE_T_SPAN)
+               vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA;
+       } else if (meta->type == PROMISE_T_SPAN)
                vol->v_raid_level = G_RAID_VOLUME_RL_CONCAT;
        else if (meta->type == PROMISE_T_JBOD)
                vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE;
        else
                vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
-       vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
        vol->v_strip_size = 512 << meta->stripe_shift; //ZZZ
        vol->v_disks_count = meta->total_disks;
        vol->v_mediasize = (off_t)meta->total_sectors * 512; //ZZZ
@@ -1241,6 +1244,8 @@ g_raid_md_ctl_promise(struct g_raid_md_o
                        gctl_error(req, "No RAID level.");
                        return (-3);
                }
+               if (strcasecmp(levelname, "RAID5") == 0)
+                       levelname = "RAID5LA";
                if (g_raid_volume_str2level(levelname, &level, &qual)) {
                        gctl_error(req, "Unknown RAID level '%s'.", levelname);
                        return (-4);
@@ -1415,7 +1420,7 @@ g_raid_md_ctl_promise(struct g_raid_md_o
                vol = g_raid_create_volume(sc, volname, -1);
                vol->v_md_data = pv;
                vol->v_raid_level = level;
-               vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
+               vol->v_raid_level_qualifier = qual;
                vol->v_strip_size = strip;
                vol->v_disks_count = numdisks;
                if (level == G_RAID_VOLUME_RL_RAID0 ||

Modified: head/sys/geom/raid/md_sii.c
==============================================================================
--- head/sys/geom/raid/md_sii.c Thu Apr 19 10:53:17 2012        (r234457)
+++ head/sys/geom/raid/md_sii.c Thu Apr 19 12:30:12 2012        (r234458)
@@ -456,11 +456,13 @@ g_raid_md_sii_supported(int level, int q
        case G_RAID_VOLUME_RL_RAID5:
                if (disks < 3)
                        return (0);
+               if (qual != G_RAID_VOLUME_RLQ_R5LS)
+                       return (0);
                break;
        default:
                return (0);
        }
-       if (qual != G_RAID_VOLUME_RLQ_NONE)
+       if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE)
                return (0);
        return (1);
 }
@@ -719,6 +721,7 @@ g_raid_md_sii_start(struct g_raid_softc 
        sii_meta_get_name(meta, buf);
        vol = g_raid_create_volume(sc, buf, -1);
        vol->v_mediasize = (off_t)meta->total_sectors * 512;
+       vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
        if (meta->type == SII_T_RAID0) {
                vol->v_raid_level = G_RAID_VOLUME_RL_RAID0;
                size = vol->v_mediasize / mdi->mdio_total_disks;
@@ -736,6 +739,7 @@ g_raid_md_sii_start(struct g_raid_softc 
                size = 0;
        } else if (meta->type == SII_T_RAID5) {
                vol->v_raid_level = G_RAID_VOLUME_RL_RAID5;
+               vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LS;
                size = vol->v_mediasize / (mdi->mdio_total_disks - 1);
        } else if (meta->type == SII_T_JBOD) {
                vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE;
@@ -744,7 +748,6 @@ g_raid_md_sii_start(struct g_raid_softc 
                vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN;
                size = 0;
        }
-       vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
        vol->v_strip_size = meta->strip_sectors * 512; //ZZZ
        vol->v_disks_count = mdi->mdio_total_disks;
        vol->v_sectorsize = 512; //ZZZ
@@ -1144,6 +1147,8 @@ g_raid_md_ctl_sii(struct g_raid_md_objec
                        gctl_error(req, "No RAID level.");
                        return (-3);
                }
+               if (strcasecmp(levelname, "RAID5") == 0)
+                       levelname = "RAID5LS";
                if (g_raid_volume_str2level(levelname, &level, &qual)) {
                        gctl_error(req, "Unknown RAID level '%s'.", levelname);
                        return (-4);
@@ -1278,7 +1283,7 @@ g_raid_md_ctl_sii(struct g_raid_md_objec
                vol = g_raid_create_volume(sc, volname, -1);
                vol->v_md_data = (void *)(intptr_t)0;
                vol->v_raid_level = level;
-               vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE;
+               vol->v_raid_level_qualifier = qual;
                vol->v_strip_size = strip;
                vol->v_disks_count = numdisks;
                if (level == G_RAID_VOLUME_RL_RAID0 ||

Added: head/sys/geom/raid/tr_raid5.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/geom/raid/tr_raid5.c       Thu Apr 19 12:30:12 2012        
(r234458)
@@ -0,0 +1,376 @@
+/*-
+ * Copyright (c) 2012 Alexander Motin <[email protected]>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/endian.h>
+#include <sys/kernel.h>
+#include <sys/kobj.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <geom/geom.h>
+#include "geom/raid/g_raid.h"
+#include "g_raid_tr_if.h"
+
+SYSCTL_DECL(_kern_geom_raid);
+
+static MALLOC_DEFINE(M_TR_RAID5, "tr_raid5_data", "GEOM_RAID RAID5 data");
+
+#define TR_RAID5_NONE 0
+#define TR_RAID5_REBUILD 1
+#define TR_RAID5_RESYNC 2
+
+#define TR_RAID5_F_DOING_SOME  0x1
+#define TR_RAID5_F_LOCKED      0x2
+#define TR_RAID5_F_ABORT       0x4
+
+struct g_raid_tr_raid5_object {
+       struct g_raid_tr_object  trso_base;
+       int                      trso_starting;
+       int                      trso_stopping;
+       int                      trso_type;
+       int                      trso_recover_slabs; /* slabs before rest */
+       int                      trso_fair_io;
+       int                      trso_meta_update;
+       int                      trso_flags;
+       struct g_raid_subdisk   *trso_failed_sd; /* like per volume */
+       void                    *trso_buffer;    /* Buffer space */
+       struct bio               trso_bio;
+};
+
+static g_raid_tr_taste_t g_raid_tr_taste_raid5;
+static g_raid_tr_event_t g_raid_tr_event_raid5;
+static g_raid_tr_start_t g_raid_tr_start_raid5;
+static g_raid_tr_stop_t g_raid_tr_stop_raid5;
+static g_raid_tr_iostart_t g_raid_tr_iostart_raid5;
+static g_raid_tr_iodone_t g_raid_tr_iodone_raid5;
+static g_raid_tr_kerneldump_t g_raid_tr_kerneldump_raid5;
+static g_raid_tr_locked_t g_raid_tr_locked_raid5;
+static g_raid_tr_free_t g_raid_tr_free_raid5;
+
+static kobj_method_t g_raid_tr_raid5_methods[] = {
+       KOBJMETHOD(g_raid_tr_taste,     g_raid_tr_taste_raid5),
+       KOBJMETHOD(g_raid_tr_event,     g_raid_tr_event_raid5),
+       KOBJMETHOD(g_raid_tr_start,     g_raid_tr_start_raid5),
+       KOBJMETHOD(g_raid_tr_stop,      g_raid_tr_stop_raid5),
+       KOBJMETHOD(g_raid_tr_iostart,   g_raid_tr_iostart_raid5),
+       KOBJMETHOD(g_raid_tr_iodone,    g_raid_tr_iodone_raid5),
+       KOBJMETHOD(g_raid_tr_kerneldump, g_raid_tr_kerneldump_raid5),
+       KOBJMETHOD(g_raid_tr_locked,    g_raid_tr_locked_raid5),
+       KOBJMETHOD(g_raid_tr_free,      g_raid_tr_free_raid5),
+       { 0, 0 }
+};
+
+static struct g_raid_tr_class g_raid_tr_raid5_class = {
+       "RAID5",
+       g_raid_tr_raid5_methods,
+       sizeof(struct g_raid_tr_raid5_object),
+       .trc_priority = 100
+};
+
+static int
+g_raid_tr_taste_raid5(struct g_raid_tr_object *tr, struct g_raid_volume *vol)
+{
+       struct g_raid_tr_raid5_object *trs;
+       u_int qual;
+
+       trs = (struct g_raid_tr_raid5_object *)tr;
+       qual = tr->tro_volume->v_raid_level_qualifier;
+       if (tr->tro_volume->v_raid_level == G_RAID_VOLUME_RL_RAID5 &&
+           qual >= 0 && qual <= 3) {
+               /* RAID5 */
+       } else
+               return (G_RAID_TR_TASTE_FAIL);
+       trs->trso_starting = 1;
+       return (G_RAID_TR_TASTE_SUCCEED);
+}
+
+static int
+g_raid_tr_update_state_raid5(struct g_raid_volume *vol,
+    struct g_raid_subdisk *sd)
+{
+       struct g_raid_tr_raid5_object *trs;
+       struct g_raid_softc *sc;
+       u_int s;
+       int na, ns, nu;
+
+       sc = vol->v_softc;
+       trs = (struct g_raid_tr_raid5_object *)vol->v_tr;
+       if (trs->trso_stopping &&
+           (trs->trso_flags & TR_RAID5_F_DOING_SOME) == 0)
+               s = G_RAID_VOLUME_S_STOPPED;
+       else if (trs->trso_starting)
+               s = G_RAID_VOLUME_S_STARTING;
+       else {
+               na = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE);
+               ns = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_STALE) +
+                    g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_RESYNC);
+               nu = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_UNINITIALIZED);
+               if (na == vol->v_disks_count)
+                       s = G_RAID_VOLUME_S_OPTIMAL;
+               else if (na + ns == vol->v_disks_count ||
+                   na + ns + nu == vol->v_disks_count /* XXX: Temporary. */)
+                       s = G_RAID_VOLUME_S_SUBOPTIMAL;
+               else if (na == vol->v_disks_count - 1 ||
+                   na + ns + nu == vol->v_disks_count)
+                       s = G_RAID_VOLUME_S_DEGRADED;
+               else
+                       s = G_RAID_VOLUME_S_BROKEN;
+       }
+       if (s != vol->v_state) {
+               g_raid_event_send(vol, G_RAID_VOLUME_S_ALIVE(s) ?
+                   G_RAID_VOLUME_E_UP : G_RAID_VOLUME_E_DOWN,
+                   G_RAID_EVENT_VOLUME);
+               g_raid_change_volume_state(vol, s);
+               if (!trs->trso_starting && !trs->trso_stopping)
+                       g_raid_write_metadata(sc, vol, NULL, NULL);
+       }
+       return (0);
+}
+
+static int
+g_raid_tr_event_raid5(struct g_raid_tr_object *tr,
+    struct g_raid_subdisk *sd, u_int event)
+{
+
+       g_raid_tr_update_state_raid5(tr->tro_volume, sd);
+       return (0);
+}
+
+static int
+g_raid_tr_start_raid5(struct g_raid_tr_object *tr)
+{
+       struct g_raid_tr_raid5_object *trs;
+       struct g_raid_volume *vol;
+
+       trs = (struct g_raid_tr_raid5_object *)tr;
+       vol = tr->tro_volume;
+       trs->trso_starting = 0;
+       g_raid_tr_update_state_raid5(vol, NULL);
+       return (0);
+}
+
+static int
+g_raid_tr_stop_raid5(struct g_raid_tr_object *tr)
+{
+       struct g_raid_tr_raid5_object *trs;
+       struct g_raid_volume *vol;
+
+       trs = (struct g_raid_tr_raid5_object *)tr;
+       vol = tr->tro_volume;
+       trs->trso_starting = 0;
+       trs->trso_stopping = 1;
+       g_raid_tr_update_state_raid5(vol, NULL);
+       return (0);
+}
+
+static void
+g_raid_tr_iostart_raid5_read(struct g_raid_tr_object *tr, struct bio *bp)
+{
+       struct g_raid_volume *vol;
+       struct g_raid_subdisk *sd;
+       struct bio_queue_head queue;
+       struct bio *cbp;
+       char *addr;
+       off_t offset, start, length, nstripe, remain;
+       int no, pno;
+       u_int strip_size, qual;
+
+       vol = tr->tro_volume;
+       addr = bp->bio_data;
+       strip_size = vol->v_strip_size;
+       qual = tr->tro_volume->v_raid_level_qualifier;
+
+       /* Stripe number. */
+       nstripe = bp->bio_offset / strip_size;
+       /* Start position in stripe. */
+       start = bp->bio_offset % strip_size;
+       /* Parity disk number. */
+       pno = nstripe / (vol->v_disks_count - 1) % vol->v_disks_count;
+       if (qual >= 2)
+               pno = (vol->v_disks_count - 1) - pno;
+       /* Disk number. */
+       no = nstripe % (vol->v_disks_count - 1);
+       if (qual & 1) {
+               no = (pno + no + 1) % vol->v_disks_count;
+       } else if (no >= pno)
+               no++;
+       /* Stripe start position in disk. */
+       offset = (nstripe / (vol->v_disks_count - 1)) * strip_size;
+       /* Length of data to operate. */
+       remain = bp->bio_length;
+
+       bioq_init(&queue);
+       do {
+               length = MIN(strip_size - start, remain);
+               cbp = g_clone_bio(bp);
+               if (cbp == NULL)
+                       goto failure;
+               cbp->bio_offset = offset + start;
+               cbp->bio_data = addr;
+               cbp->bio_length = length;
+               cbp->bio_caller1 = &vol->v_subdisks[no];
+               bioq_insert_tail(&queue, cbp);
+               no++;
+               if (qual & 1) {
+                       no %= vol->v_disks_count;
+                       if (no == pno) {
+                               if (qual < 2) {
+                                       pno = (pno + 1) % vol->v_disks_count;
+                                       no = (no + 2) % vol->v_disks_count;
+                               } else if (pno == 0)
+                                       pno = vol->v_disks_count - 1;
+                               else
+                                       pno--;
+                               offset += strip_size;
+                       }
+               } else {
+                       if (no == pno)
+                               no++;
+                       if (no >= vol->v_disks_count) {
+                               no %= vol->v_disks_count;
+                               if (qual < 2)
+                                       pno = (pno + 1) % vol->v_disks_count;
+                               else if (pno == 0)
+                                       pno = vol->v_disks_count - 1;
+                               else
+                                       pno--;
+                               offset += strip_size;
+                       }
+                       if (no == pno)
+                               no++;
+               }
+               remain -= length;
+               addr += length;
+               start = 0;
+       } while (remain > 0);
+       for (cbp = bioq_first(&queue); cbp != NULL;
+           cbp = bioq_first(&queue)) {
+               bioq_remove(&queue, cbp);
+               sd = cbp->bio_caller1;
+               cbp->bio_caller1 = NULL;
+               g_raid_subdisk_iostart(sd, cbp);
+       }
+       return;
+failure:
+       for (cbp = bioq_first(&queue); cbp != NULL;
+           cbp = bioq_first(&queue)) {
+               bioq_remove(&queue, cbp);
+               g_destroy_bio(cbp);
+       }
+       if (bp->bio_error == 0)
+               bp->bio_error = ENOMEM;
+       g_raid_iodone(bp, bp->bio_error);
+}
+
+static void
+g_raid_tr_iostart_raid5(struct g_raid_tr_object *tr, struct bio *bp)
+{
+       struct g_raid_volume *vol;
+       struct g_raid_tr_raid5_object *trs;
+
+       vol = tr->tro_volume;
+       trs = (struct g_raid_tr_raid5_object *)tr;
+       if (vol->v_state < G_RAID_VOLUME_S_SUBOPTIMAL) {
+               g_raid_iodone(bp, EIO);
+               return;
+       }
+       switch (bp->bio_cmd) {
+       case BIO_READ:
+               g_raid_tr_iostart_raid5_read(tr, bp);
+               break;
+       case BIO_WRITE:
+       case BIO_DELETE:
+       case BIO_FLUSH:
+               g_raid_iodone(bp, ENODEV);
+               break;
+       default:
+               KASSERT(1 == 0, ("Invalid command here: %u (volume=%s)",
+                   bp->bio_cmd, vol->v_name));
+               break;
+       }
+}
+
+static void
+g_raid_tr_iodone_raid5(struct g_raid_tr_object *tr,
+    struct g_raid_subdisk *sd, struct bio *bp)
+{
+       struct bio *pbp;
+       int error;
+
+       pbp = bp->bio_parent;
+       pbp->bio_inbed++;
+       error = bp->bio_error;
+       g_destroy_bio(bp);
+       if (pbp->bio_children == pbp->bio_inbed) {
+               pbp->bio_completed = pbp->bio_length;
+               g_raid_iodone(pbp, error);
+       }
+}
+
+static int
+g_raid_tr_kerneldump_raid5(struct g_raid_tr_object *tr,
+    void *virtual, vm_offset_t physical, off_t offset, size_t length)
+{
+
+       return (ENODEV);
+}
+
+static int
+g_raid_tr_locked_raid5(struct g_raid_tr_object *tr, void *argp)
+{
+       struct bio *bp;
+       struct g_raid_subdisk *sd;
+
+       bp = (struct bio *)argp;
+       sd = (struct g_raid_subdisk *)bp->bio_caller1;
+       g_raid_subdisk_iostart(sd, bp);
+
+       return (0);
+}
+
+static int
+g_raid_tr_free_raid5(struct g_raid_tr_object *tr)
+{
+       struct g_raid_tr_raid5_object *trs;
+
+       trs = (struct g_raid_tr_raid5_object *)tr;
+
+       if (trs->trso_buffer != NULL) {
+               free(trs->trso_buffer, M_TR_RAID5);
+               trs->trso_buffer = NULL;
+       }
+       return (0);
+}
+
+G_RAID_TR_DECLARE(g_raid_tr_raid5);

Modified: head/sys/modules/geom/geom_raid/Makefile
==============================================================================
--- head/sys/modules/geom/geom_raid/Makefile    Thu Apr 19 10:53:17 2012        
(r234457)
+++ head/sys/modules/geom/geom_raid/Makefile    Thu Apr 19 12:30:12 2012        
(r234458)
@@ -11,7 +11,7 @@ SRCS+=        g_raid_tr_if.h g_raid_tr_if.c
 
 SRCS+= md_intel.c md_jmicron.c md_nvidia.c md_promise.c md_sii.c
 
-SRCS+= tr_concat.c tr_raid0.c tr_raid1.c tr_raid1e.c
+SRCS+= tr_concat.c tr_raid0.c tr_raid1.c tr_raid1e.c tr_raid5.c
 
 MFILES=        kern/bus_if.m kern/device_if.m
 MFILES+= geom/raid/g_raid_md_if.m geom/raid/g_raid_tr_if.m
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "[email protected]"

Reply via email to