Author: mav
Date: Tue May  1 08:19:29 2012
New Revision: 234868
URL: http://svn.freebsd.org/changeset/base/234868

Log:
  Improve DDF metadata writing.

Modified:
  head/sys/geom/raid/md_ddf.c

Modified: head/sys/geom/raid/md_ddf.c
==============================================================================
--- head/sys/geom/raid/md_ddf.c Tue May  1 07:46:30 2012        (r234867)
+++ head/sys/geom/raid/md_ddf.c Tue May  1 08:19:29 2012        (r234868)
@@ -89,9 +89,9 @@ struct g_raid_md_ddf_pervolume {
 struct g_raid_md_ddf_object {
        struct g_raid_md_object  mdio_base;
        struct ddf_meta          mdio_meta;
+       int                      mdio_starting;
        struct callout           mdio_start_co; /* STARTING state timer. */
        int                      mdio_started;
-       int                      mdio_incomplete;
        struct root_hold_token  *mdio_rootmount; /* Root mount delay token. */
 };
 
@@ -835,7 +835,8 @@ ddf_vol_meta_create(struct ddf_vol_meta 
 }
 
 static void
-ddf_vol_meta_update(struct ddf_vol_meta *dst, struct ddf_meta *src, uint8_t 
*GUID)
+ddf_vol_meta_update(struct ddf_vol_meta *dst, struct ddf_meta *src,
+    uint8_t *GUID, int started)
 {
        struct ddf_header *hdr;
        struct ddf_vd_entry *vde;
@@ -850,15 +851,15 @@ ddf_vol_meta_update(struct ddf_vol_meta 
        size = GET16(src, hdr->Configuration_Record_Length) * src->sectorsize;
 
        if (dst->vdc == NULL ||
-           ((int32_t)(GET32D(src, vdc->Sequence_Number) -
-           GET32(dst, vdc->Sequence_Number))) > 0)
+           (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) -
+           GET32(dst, vdc->Sequence_Number))) > 0))
                vnew = 1;
        else
                vnew = 0;
 
        if (dst->bvdc[bvd] == NULL ||
-           ((int32_t)(GET32D(src, vdc->Sequence_Number) -
-           GET32(dst, bvdc[bvd]->Sequence_Number))) > 0)
+           (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) -
+           GET32(dst, bvdc[bvd]->Sequence_Number))) > 0))
                bvnew = 1;
        else
                bvnew = 0;
@@ -1803,6 +1804,7 @@ g_raid_md_ddf_start(struct g_raid_volume
        struct g_raid_disk *disk;
        struct g_raid_md_object *md;
        struct g_raid_md_ddf_pervolume *pv;
+       struct g_raid_md_ddf_object *mdi;
        struct ddf_vol_meta *vmeta;
        struct ddf_vdc_record *vdc;
        uint64_t *val2;
@@ -1810,6 +1812,7 @@ g_raid_md_ddf_start(struct g_raid_volume
 
        sc = vol->v_softc;
        md = sc->sc_md;
+       mdi = (struct g_raid_md_ddf_object *)md;
        pv = vol->v_md_data;
        vmeta = &pv->pv_meta;
        vdc = vmeta->vdc;
@@ -1862,6 +1865,7 @@ g_raid_md_ddf_start(struct g_raid_volume
        }
 
        pv->pv_started = 1;
+       mdi->mdio_starting--;
        callout_stop(&pv->pv_start_co);
        G_RAID_DEBUG1(0, sc, "Volume started.");
        g_raid_md_write_ddf(md, vol, NULL, NULL);
@@ -1948,13 +1952,13 @@ g_raid_md_ddf_new_disk(struct g_raid_dis
                        callout_reset(&pv->pv_start_co,
                            g_raid_start_timeout * hz,
                            g_raid_ddf_go, vol);
+                       mdi->mdio_starting++;
                } else
                        pv = vol->v_md_data;
 
                /* If we haven't started yet - check metadata freshness. */
                vmeta = &pv->pv_meta;
-               if (vmeta->hdr == NULL || !pv->pv_started)
-                       ddf_vol_meta_update(vmeta, pdmeta, vdc->VD_GUID);
+               ddf_vol_meta_update(vmeta, pdmeta, vdc->VD_GUID, 
pv->pv_started);
        }
 
        if (spare == 1) {
@@ -2649,26 +2653,29 @@ g_raid_md_write_ddf(struct g_raid_md_obj
        if (sc->sc_stopping == G_RAID_DESTROY_HARD)
                return (0);
 
-       /* Generate new per-volume metadata for affected volumes. */
-       TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
-               if (vol->v_stopping)
-                       continue;
-
-               /* Skip volumes not related to specified targets. */
-               if (tvol != NULL && vol != tvol)
-                       continue;
-               if (tsd != NULL && vol != tsd->sd_volume)
-                       continue;
-               if (tdisk != NULL) {
-                       for (i = 0; i < vol->v_disks_count; i++) {
-                               if (vol->v_subdisks[i].sd_disk == tdisk)
-                                       break;
-                       }
-                       if (i >= vol->v_disks_count)
+       /*
+        * Clear disk flags to let only really needed ones to be reset.
+        * Do it only if there are no volumes in starting state now,
+        * as they can update disk statuses yet and we may kill innocent.
+        */
+       if (mdi->mdio_starting == 0) {
+               for (i = 0; i < GET16(gmeta, pdr->Populated_PDEs); i++) {
+                       if (isff(gmeta->pdr->entry[i].PD_GUID, 24))
                                continue;
+                       SET16(gmeta, pdr->entry[i].PD_Type,
+                           GET16(gmeta, pdr->entry[i].PD_Type) &
+                           ~DDF_PDE_PARTICIPATING);
+                       if ((GET16(gmeta, pdr->entry[i].PD_State) &
+                           DDF_PDE_PFA) == 0)
+                               SET16(gmeta, pdr->entry[i].PD_State, 0);
                }
+       }
 
+       /* Generate/update new per-volume metadata. */
+       TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
                pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
+               if (vol->v_stopping || !pv->pv_started)
+                       continue;
                vmeta = &pv->pv_meta;
 
                SET32(vmeta, vdc->Sequence_Number,
@@ -2711,7 +2718,9 @@ g_raid_md_write_ddf(struct g_raid_md_obj
                        SET8(vmeta, vde->VD_State, DDF_VDE_PARTIAL);
                else
                        SET8(vmeta, vde->VD_State, DDF_VDE_OPTIMAL);
-               if (vol->v_dirty)
+               if (vol->v_dirty ||
+                   g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_STALE) > 0 ||
+                   g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_RESYNC) > 0)
                        SET8(vmeta, vde->VD_State,
                            GET8(vmeta, vde->VD_State) | DDF_VDE_DIRTY);
                SET8(vmeta, vde->Init_State, DDF_VDE_INIT_FULL); // XXX
@@ -2719,45 +2728,50 @@ g_raid_md_write_ddf(struct g_raid_md_obj
 
                for (i = 0; i < vol->v_disks_count; i++) {
                        sd = &vol->v_subdisks[i];
-                       disk = sd->sd_disk;
-                       if (disk == NULL)
-                               continue;
-                       pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
                        bvd = i / GET16(vmeta, vdc->Primary_Element_Count);
                        pos = i % GET16(vmeta, vdc->Primary_Element_Count);
-                       if (vmeta->bvdc[bvd] == NULL) {
-                               size = GET16(vmeta,
-                                   hdr->Configuration_Record_Length) *
-                                   vmeta->sectorsize;
-                               vmeta->bvdc[bvd] = malloc(size, M_MD_DDF, 
M_WAITOK);
-                               memcpy(vmeta->bvdc[bvd], vmeta->vdc, size);
+                       disk = sd->sd_disk;
+                       if (disk != NULL) {
+                               pd = (struct g_raid_md_ddf_perdisk 
*)disk->d_md_data;
+                               if (vmeta->bvdc[bvd] == NULL) {
+                                       size = GET16(vmeta,
+                                           hdr->Configuration_Record_Length) *
+                                           vmeta->sectorsize;
+                                       vmeta->bvdc[bvd] = malloc(size,
+                                           M_MD_DDF, M_WAITOK);
+                                       memset(vmeta->bvdc[bvd], 0xff, size);
+                               }
+                               memcpy(vmeta->bvdc[bvd], vmeta->vdc,
+                                   sizeof(struct ddf_vdc_record));
                                SET8(vmeta, bvdc[bvd]->Secondary_Element_Seq, 
bvd);
+                               SET64(vmeta, bvdc[bvd]->Block_Count,
+                                   sd->sd_size / vol->v_sectorsize);
+                               SET32(vmeta, 
bvdc[bvd]->Physical_Disk_Sequence[pos],
+                                   GET32(&pd->pd_meta, pdd->PD_Reference));
+                               val2 = (uint64_t 
*)&(vmeta->bvdc[bvd]->Physical_Disk_Sequence[
+                                   GET16(vmeta, 
hdr->Max_Primary_Element_Entries)]);
+                               SET64P(vmeta, val2 + pos,
+                                   sd->sd_offset / vol->v_sectorsize);
                        }
-                       SET64(vmeta, bvdc[bvd]->Block_Count,
-                           sd->sd_size / vol->v_sectorsize);
-                       SET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos],
-                           GET32(&pd->pd_meta, pdd->PD_Reference));
-                       val2 = (uint64_t 
*)&(vmeta->bvdc[bvd]->Physical_Disk_Sequence[
-                           GET16(vmeta, hdr->Max_Primary_Element_Entries)]);
-                       SET64P(vmeta, val2 + pos,
-                           sd->sd_offset / vol->v_sectorsize);
+                       if (vmeta->bvdc[bvd] == NULL)
+                               continue;
 
                        j = ddf_meta_find_pd(gmeta, NULL,
-                           GET32(&pd->pd_meta, pdd->PD_Reference));
+                           GET32(vmeta, 
bvdc[bvd]->Physical_Disk_Sequence[pos]));
                        if (j < 0)
                                continue;
                        SET32(gmeta, pdr->entry[j].PD_Type,
                            GET32(gmeta, pdr->entry[j].PD_Type) |
                            DDF_PDE_PARTICIPATING);
-                       if (sd->sd_state == G_RAID_SUBDISK_S_FAILED)
+                       if (sd->sd_state == G_RAID_SUBDISK_S_NONE)
                                SET32(gmeta, pdr->entry[j].PD_State,
                                    GET32(gmeta, pdr->entry[j].PD_State) |
-                                   DDF_PDE_FAILED | DDF_PDE_PFA);
-                       else if (sd->sd_state <= G_RAID_SUBDISK_S_UNINITIALIZED)
+                                   DDF_PDE_FAILED | DDF_PDE_MISSING);
+                       else if (sd->sd_state == G_RAID_SUBDISK_S_FAILED)
                                SET32(gmeta, pdr->entry[j].PD_State,
                                    GET32(gmeta, pdr->entry[j].PD_State) |
-                                   DDF_PDE_FAILED);
-                       else if (sd->sd_state < G_RAID_SUBDISK_S_ACTIVE)
+                                   DDF_PDE_FAILED | DDF_PDE_PFA);
+                       else if (sd->sd_state <= G_RAID_SUBDISK_S_REBUILD)
                                SET32(gmeta, pdr->entry[j].PD_State,
                                    GET32(gmeta, pdr->entry[j].PD_State) |
                                    DDF_PDE_FAILED);
@@ -2768,17 +2782,33 @@ g_raid_md_write_ddf(struct g_raid_md_obj
                }
        }
 
+       /* Remove disks without "participating" flag (unused). */
+       for (i = 0, j = -1; i < GET16(gmeta, pdr->Populated_PDEs); i++) {
+               if (isff(gmeta->pdr->entry[i].PD_GUID, 24))
+                       continue;
+               if (GET16(gmeta, pdr->entry[i].PD_Type) & DDF_PDE_PARTICIPATING)
+                       j = i;
+               else
+                       memset(&gmeta->pdr->entry[i], 0xff,
+                           sizeof(struct ddf_pd_entry));
+       }
+       SET16(gmeta, pdr->Populated_PDEs, j + 1);
+
+       /* Update per-disk metadata and write them. */
        TAILQ_FOREACH(disk, &sc->sc_disks, d_next) {
                pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data;
                if (disk->d_state != G_RAID_DISK_S_ACTIVE)
                        continue;
+               /* Update PDR. */
                memcpy(pd->pd_meta.pdr, gmeta->pdr,
                    GET32(&pd->pd_meta, hdr->pdr_length) *
                    pd->pd_meta.sectorsize);
-               TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
-                       vol = sd->sd_volume;
+               /* Update VDR. */
+               SET16(&pd->pd_meta, vdr->Populated_VDEs, 0);
+               TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) {
+                       if (vol->v_stopping)
+                               continue;
                        pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
-                       vmeta = &pv->pv_meta;
                        i = ddf_meta_find_vd(&pd->pd_meta,
                            pv->pv_meta.vde->VD_GUID);
                        if (i < 0)
@@ -2787,14 +2817,22 @@ g_raid_md_write_ddf(struct g_raid_md_obj
                                memcpy(&pd->pd_meta.vdr->entry[i],
                                    pv->pv_meta.vde,
                                    sizeof(struct ddf_vd_entry));
+               }
+               /* Update VDC. */
+               TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) {
+                       vol = sd->sd_volume;
+                       if (vol->v_stopping)
+                               continue;
+                       pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data;
+                       vmeta = &pv->pv_meta;
                        vdc = ddf_meta_find_vdc(&pd->pd_meta,
-                           pv->pv_meta.vde->VD_GUID);
+                           vmeta->vde->VD_GUID);
                        if (vdc == NULL)
                                vdc = ddf_meta_find_vdc(&pd->pd_meta, NULL);
                        if (vdc != NULL) {
                                bvd = sd->sd_pos / GET16(vmeta,
                                    vdc->Primary_Element_Count);
-                               memcpy(vdc, pv->pv_meta.bvdc[bvd],
+                               memcpy(vdc, vmeta->bvdc[bvd],
                                    GET16(&pd->pd_meta,
                                    hdr->Configuration_Record_Length) *
                                    pd->pd_meta.sectorsize);
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to