Author: ae
Date: Mon Mar 19 13:14:44 2012
New Revision: 233176
URL: http://svn.freebsd.org/changeset/base/233176

Log:
  Add new GEOM_PART_LDM module that implements the Logical Disk Manager
  scheme. The LDM is a logical volume manager for MS Windows NT and it
  is also known as dynamic volumes. It supports about 2000 partitions
  and also provides the capability for software RAID implementations.
  
  This version implements only partitioning scheme capability and based
  on the linux-ntfs project documentation and several publications across
  the Web. NOTE: JBOD, RAID0 and RAID5 volumes aren't supported.
  
  An access to the LDM metadata is read-only. When LDM is on the disk
  partitioned with MBR we can also destroy metadata. For the GPT
  partitioned disks destroy action is not supported.
  
  Reviewed by:  ivoras (previous version)
  MFC after:    1 month

Added:
  head/sys/geom/part/g_part_ldm.c   (contents, props changed)

Added: head/sys/geom/part/g_part_ldm.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/geom/part/g_part_ldm.c     Mon Mar 19 13:14:44 2012        
(r233176)
@@ -0,0 +1,1504 @@
+/*-
+ * Copyright (c) 2012 Andrey V. Elsukov <[email protected]>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/diskmbr.h>
+#include <sys/endian.h>
+#include <sys/gpt.h>
+#include <sys/kernel.h>
+#include <sys/kobj.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/sbuf.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <sys/uuid.h>
+#include <geom/geom.h>
+#include <geom/part/g_part.h>
+
+#include "g_part_if.h"
+
+FEATURE(geom_part_ldm, "GEOM partitioning class for LDM support");
+
+SYSCTL_DECL(_kern_geom_part);
+static SYSCTL_NODE(_kern_geom_part, OID_AUTO, ldm, CTLFLAG_RW, 0,
+    "GEOM_PART_LDM Logical Disk Manager");
+
+static u_int ldm_debug = 0;
+TUNABLE_INT("kern.geom.part.ldm.debug", &ldm_debug);
+SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, debug, CTLFLAG_RW,
+    &ldm_debug, 0, "Debug level");
+
+/*
+ * This allows access to mirrored LDM volumes. Since we do not
+ * doing mirroring here, it is not enabled by default.
+ */
+static u_int show_mirrors = 0;
+TUNABLE_INT("kern.geom.part.ldm.show_mirrors", &show_mirrors);
+SYSCTL_UINT(_kern_geom_part_ldm, OID_AUTO, show_mirrors, CTLFLAG_RW,
+    &show_mirrors, 0, "Show mirrored volumes");
+
+#define        LDM_DEBUG(lvl, fmt, ...)        do {                            
\
+       if (ldm_debug >= (lvl)) {                                       \
+               printf("GEOM_PART: " fmt "\n", __VA_ARGS__);            \
+       }                                                               \
+} while (0)
+#define        LDM_DUMP(buf, size)     do {                                    
\
+       if (ldm_debug > 1) {                                            \
+               hexdump(buf, size, NULL, 0);                            \
+       }                                                               \
+} while (0)
+
+/*
+ * There are internal representations of LDM structures.
+ *
+ * We do not keep all fields of on-disk structures, only most useful.
+ * All numbers in an on-disk structures are in big-endian format.
+ */
+
+/*
+ * Private header is 512 bytes long. There are three copies on each disk.
+ * Offset and sizes are in sectors. Location of each copy:
+ * - the first offset is relative to the disk start;
+ * - the second and third offset are relative to the LDM database start.
+ *
+ * On a disk partitioned with GPT, the LDM has not first private header.
+ */
+#define        LDM_PH_MBRINDEX         0
+#define        LDM_PH_GPTINDEX         2
+static const uint64_t  ldm_ph_off[] = {6, 1856, 2047};
+#define        LDM_VERSION_2K          0x2000b
+#define        LDM_VERSION_VISTA       0x2000c
+#define        LDM_PH_VERSION_OFF      0x00c
+#define        LDM_PH_DISKGUID_OFF     0x030
+#define        LDM_PH_DGGUID_OFF       0x0b0
+#define        LDM_PH_DGNAME_OFF       0x0f0
+#define        LDM_PH_START_OFF        0x11b
+#define        LDM_PH_SIZE_OFF         0x123
+#define        LDM_PH_DB_OFF           0x12b
+#define        LDM_PH_DBSIZE_OFF       0x133
+#define        LDM_PH_TH1_OFF          0x13b
+#define        LDM_PH_TH2_OFF          0x143
+#define        LDM_PH_CONFSIZE_OFF     0x153
+#define        LDM_PH_LOGSIZE_OFF      0x15b
+#define        LDM_PH_SIGN             "PRIVHEAD"
+struct ldm_privhdr {
+       struct uuid     disk_guid;
+       struct uuid     dg_guid;
+       u_char          dg_name[32];
+       uint64_t        start;          /* logical disk start */
+       uint64_t        size;           /* logical disk size */
+       uint64_t        db_offset;      /* LDM database start */
+#define        LDM_DB_SIZE             2048
+       uint64_t        db_size;        /* LDM database size */
+#define        LDM_TH_COUNT            2
+       uint64_t        th_offset[LDM_TH_COUNT]; /* TOC header offsets */
+       uint64_t        conf_size;      /* configuration size */
+       uint64_t        log_size;       /* size of log */
+};
+
+/*
+ * Table of contents header is 512 bytes long.
+ * There are two identical copies at offsets from the private header.
+ * Offsets are relative to the LDM database start.
+ */
+#define        LDM_TH_SIGN             "TOCBLOCK"
+#define        LDM_TH_NAME1            "config"
+#define        LDM_TH_NAME2            "log"
+#define        LDM_TH_NAME1_OFF        0x024
+#define        LDM_TH_CONF_OFF         0x02e
+#define        LDM_TH_CONFSIZE_OFF     0x036
+#define        LDM_TH_NAME2_OFF        0x046
+#define        LDM_TH_LOG_OFF          0x050
+#define        LDM_TH_LOGSIZE_OFF      0x058
+struct ldm_tochdr {
+       uint64_t        conf_offset;    /* configuration offset */
+       uint64_t        log_offset;     /* log offset */
+};
+
+/*
+ * LDM database header is 512 bytes long.
+ */
+#define        LDM_VMDB_SIGN           "VMDB"
+#define        LDM_DB_LASTSEQ_OFF      0x004
+#define        LDM_DB_SIZE_OFF         0x008
+#define        LDM_DB_STATUS_OFF       0x010
+#define        LDM_DB_VERSION_OFF      0x012
+#define        LDM_DB_DGNAME_OFF       0x016
+#define        LDM_DB_DGGUID_OFF       0x035
+struct ldm_vmdbhdr {
+       uint32_t        last_seq;       /* sequence number of last VBLK */
+       uint32_t        size;           /* size of VBLK */
+};
+
+/*
+ * The LDM database configuration section contains VMDB header and
+ * many VBLKs. Each VBLK represents a disk group, disk partition,
+ * component or volume.
+ *
+ * The most interesting for us are volumes, they are represents
+ * partitions in the GEOM_PART meaning. But volume VBLK does not
+ * contain all information needed to create GEOM provider. And we
+ * should get this information from the related VBLK. This is how
+ * VBLK releated:
+ *     Volumes <- Components <- Partitions -> Disks
+ *
+ * One volume can contain several components. In this case LDM
+ * does mirroring of volume data to each component.
+ *
+ * Also each component can contain several partitions (spanned or
+ * striped volumes).
+ */
+
+struct ldm_component {
+       uint64_t        id;             /* object id */
+       uint64_t        vol_id;         /* parent volume object id */
+
+       int             count;
+       LIST_HEAD(, ldm_partition) partitions;
+       LIST_ENTRY(ldm_component) entry;
+};
+
+struct ldm_volume {
+       uint64_t        id;             /* object id */
+       uint64_t        size;           /* volume size */
+       uint8_t         number;         /* used for ordering */
+       uint8_t         part_type;      /* partition type */
+
+       int             count;
+       LIST_HEAD(, ldm_component) components;
+       LIST_ENTRY(ldm_volume)  entry;
+};
+
+struct ldm_disk {
+       uint64_t        id;             /* object id */
+       struct uuid     guid;           /* disk guid */
+
+       LIST_ENTRY(ldm_disk) entry;
+};
+
+#if 0
+struct ldm_disk_group {
+       uint64_t        id;             /* object id */
+       struct uuid     guid;           /* disk group guid */
+       u_char          name[32];       /* disk group name */
+
+       LIST_ENTRY(ldm_disk_group) entry;
+};
+#endif
+
+struct ldm_partition {
+       uint64_t        id;             /* object id */
+       uint64_t        disk_id;        /* disk object id */
+       uint64_t        comp_id;        /* parent component object id */
+       uint64_t        start;          /* offset relative to disk start */
+       uint64_t        offset;         /* offset for spanned volumes */
+       uint64_t        size;           /* partition size */
+
+       LIST_ENTRY(ldm_partition) entry;
+};
+
+/*
+ * Each VBLK is 128 bytes long and has standard 16 bytes header.
+ * Some of VBLK's fields are fixed size, but others has variable size.
+ * Fields with variable size are prefixed with one byte length marker.
+ * Some fields are strings and also can have fixed size and variable.
+ * Strings with fixed size are NULL-terminated, others are not.
+ * All VBLKs have same several first fields:
+ *     Offset          Size            Description
+ *     ---------------+---------------+--------------------------
+ *     0x00            16              standard VBLK header
+ *     0x10            2               update status
+ *     0x13            1               VBLK type
+ *     0x18            PS              object id
+ *     0x18+           PN              object name
+ *
+ *  o Offset 0x18+ means '0x18 + length of all variable-width fields'
+ *  o 'P' in size column means 'prefixed' (variable-width),
+ *    'S' - string, 'N' - number.
+ */
+#define        LDM_VBLK_SIGN           "VBLK"
+#define        LDM_VBLK_SEQ_OFF        0x04
+#define        LDM_VBLK_GROUP_OFF      0x08
+#define        LDM_VBLK_INDEX_OFF      0x0c
+#define        LDM_VBLK_COUNT_OFF      0x0e
+#define        LDM_VBLK_TYPE_OFF       0x13
+#define        LDM_VBLK_OID_OFF        0x18
+struct ldm_vblkhdr {
+       uint32_t        seq;            /* sequence number */
+       uint32_t        group;          /* group number */
+       uint16_t        index;          /* index in the group */
+       uint16_t        count;          /* number of entries in the group */
+};
+
+#define        LDM_VBLK_T_COMPONENT    0x32
+#define        LDM_VBLK_T_PARTITION    0x33
+#define        LDM_VBLK_T_DISK         0x34
+#define        LDM_VBLK_T_DISKGROUP    0x35
+#define        LDM_VBLK_T_DISK4        0x44
+#define        LDM_VBLK_T_DISKGROUP4   0x45
+#define        LDM_VBLK_T_VOLUME       0x51
+struct ldm_vblk {
+       uint8_t         type;           /* VBLK type */
+       union {
+               uint64_t                id;
+               struct ldm_volume       vol;
+               struct ldm_component    comp;
+               struct ldm_disk         disk;
+               struct ldm_partition    part;
+#if 0
+               struct ldm_disk_group   disk_group;
+#endif
+       } u;
+       LIST_ENTRY(ldm_vblk) entry;
+};
+
+/*
+ * Some VBLKs contains a bit more data than can fit into 128 bytes. These
+ * VBLKs are called eXtended VBLK. Before parsing, the data from these VBLK
+ * should be placed into continuous memory buffer. We can determine xVBLK
+ * by the count field in the standard VBLK header (count > 1).
+ */
+struct ldm_xvblk {
+       uint32_t        group;          /* xVBLK group number */
+       uint32_t        size;           /* the total size of xVBLK */
+       uint8_t         map;            /* bitmask of currently saved VBLKs */
+       u_char          *data;          /* xVBLK data */
+
+       LIST_ENTRY(ldm_xvblk)   entry;
+};
+
+/* The internal representation of LDM database. */
+struct ldm_db {
+       struct ldm_privhdr              ph;     /* private header */
+       struct ldm_tochdr               th;     /* TOC header */
+       struct ldm_vmdbhdr              dh;     /* VMDB header */
+
+       LIST_HEAD(, ldm_volume)         volumes;
+       LIST_HEAD(, ldm_disk)           disks;
+       LIST_HEAD(, ldm_vblk)           vblks;
+       LIST_HEAD(, ldm_xvblk)          xvblks;
+};
+
+static struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA;
+
+struct g_part_ldm_table {
+       struct g_part_table     base;
+       uint64_t                db_offset;
+       int                     is_gpt;
+};
+struct g_part_ldm_entry {
+       struct g_part_entry     base;
+       uint8_t                 type;
+};
+
+static int g_part_ldm_add(struct g_part_table *, struct g_part_entry *,
+    struct g_part_parms *);
+static int g_part_ldm_bootcode(struct g_part_table *, struct g_part_parms *);
+static int g_part_ldm_create(struct g_part_table *, struct g_part_parms *);
+static int g_part_ldm_destroy(struct g_part_table *, struct g_part_parms *);
+static void g_part_ldm_dumpconf(struct g_part_table *, struct g_part_entry *,
+    struct sbuf *, const char *);
+static int g_part_ldm_dumpto(struct g_part_table *, struct g_part_entry *);
+static int g_part_ldm_modify(struct g_part_table *, struct g_part_entry *,
+    struct g_part_parms *);
+static const char *g_part_ldm_name(struct g_part_table *, struct g_part_entry 
*,
+    char *, size_t);
+static int g_part_ldm_probe(struct g_part_table *, struct g_consumer *);
+static int g_part_ldm_read(struct g_part_table *, struct g_consumer *);
+static int g_part_ldm_setunset(struct g_part_table *, struct g_part_entry *,
+    const char *, unsigned int);
+static const char *g_part_ldm_type(struct g_part_table *, struct g_part_entry 
*,
+    char *, size_t);
+static int g_part_ldm_write(struct g_part_table *, struct g_consumer *);
+static int g_part_ldm_resize(struct g_part_table *, struct g_part_entry *,
+    struct g_part_parms *);
+
+static kobj_method_t g_part_ldm_methods[] = {
+       KOBJMETHOD(g_part_add,          g_part_ldm_add),
+       KOBJMETHOD(g_part_bootcode,     g_part_ldm_bootcode),
+       KOBJMETHOD(g_part_create,       g_part_ldm_create),
+       KOBJMETHOD(g_part_destroy,      g_part_ldm_destroy),
+       KOBJMETHOD(g_part_dumpconf,     g_part_ldm_dumpconf),
+       KOBJMETHOD(g_part_dumpto,       g_part_ldm_dumpto),
+       KOBJMETHOD(g_part_modify,       g_part_ldm_modify),
+       KOBJMETHOD(g_part_resize,       g_part_ldm_resize),
+       KOBJMETHOD(g_part_name,         g_part_ldm_name),
+       KOBJMETHOD(g_part_probe,        g_part_ldm_probe),
+       KOBJMETHOD(g_part_read,         g_part_ldm_read),
+       KOBJMETHOD(g_part_setunset,     g_part_ldm_setunset),
+       KOBJMETHOD(g_part_type,         g_part_ldm_type),
+       KOBJMETHOD(g_part_write,        g_part_ldm_write),
+       { 0, 0 }
+};
+
+static struct g_part_scheme g_part_ldm_scheme = {
+       "LDM",
+       g_part_ldm_methods,
+       sizeof(struct g_part_ldm_table),
+       .gps_entrysz = sizeof(struct g_part_ldm_entry)
+};
+G_PART_SCHEME_DECLARE(g_part_ldm);
+
+static struct g_part_ldm_alias {
+       u_char          typ;
+       int             alias;
+} ldm_alias_match[] = {
+       { DOSPTYP_NTFS,         G_PART_ALIAS_MS_NTFS },
+       { DOSPTYP_FAT32,        G_PART_ALIAS_MS_FAT32 },
+       { DOSPTYP_386BSD,       G_PART_ALIAS_FREEBSD },
+       { DOSPTYP_LDM,          G_PART_ALIAS_MS_LDM_DATA },
+       { DOSPTYP_LINSWP,       G_PART_ALIAS_LINUX_SWAP },
+       { DOSPTYP_LINUX,        G_PART_ALIAS_LINUX_DATA },
+       { DOSPTYP_LINLVM,       G_PART_ALIAS_LINUX_LVM },
+       { DOSPTYP_LINRAID,      G_PART_ALIAS_LINUX_RAID },
+};
+
+static u_char*
+ldm_privhdr_read(struct g_consumer *cp, uint64_t off, int *error)
+{
+       struct g_provider *pp;
+       u_char *buf;
+
+       pp = cp->provider;
+       buf = g_read_data(cp, off, pp->sectorsize, error);
+       if (buf == NULL)
+               return (NULL);
+
+       if (memcmp(buf, LDM_PH_SIGN, strlen(LDM_PH_SIGN)) != 0) {
+               LDM_DEBUG(1, "%s: invalid LDM private header signature",
+                   pp->name);
+               g_free(buf);
+               buf = NULL;
+               *error = EINVAL;
+       }
+       return (buf);
+}
+
+static int
+ldm_privhdr_parse(struct g_consumer *cp, struct ldm_privhdr *hdr,
+    const u_char *buf)
+{
+       uint32_t version;
+       int error;
+
+       memset(hdr, 0, sizeof(*hdr));
+       version = be32dec(buf + LDM_PH_VERSION_OFF);
+       if (version != LDM_VERSION_2K &&
+           version != LDM_VERSION_VISTA) {
+               LDM_DEBUG(0, "%s: unsupported LDM version %u.%u",
+                   cp->provider->name, version >> 16,
+                   version & 0xFFFF);
+               return (ENXIO);
+       }
+       error = parse_uuid(buf + LDM_PH_DISKGUID_OFF, &hdr->disk_guid);
+       if (error != 0)
+               return (error);
+       error = parse_uuid(buf + LDM_PH_DGGUID_OFF, &hdr->dg_guid);
+       if (error != 0)
+               return (error);
+       strncpy(hdr->dg_name, buf + LDM_PH_DGNAME_OFF, sizeof(hdr->dg_name));
+       hdr->start = be64dec(buf + LDM_PH_START_OFF);
+       hdr->size = be64dec(buf + LDM_PH_SIZE_OFF);
+       hdr->db_offset = be64dec(buf + LDM_PH_DB_OFF);
+       hdr->db_size = be64dec(buf + LDM_PH_DBSIZE_OFF);
+       hdr->th_offset[0] = be64dec(buf + LDM_PH_TH1_OFF);
+       hdr->th_offset[1] = be64dec(buf + LDM_PH_TH2_OFF);
+       hdr->conf_size = be64dec(buf + LDM_PH_CONFSIZE_OFF);
+       hdr->log_size = be64dec(buf + LDM_PH_LOGSIZE_OFF);
+       return (0);
+}
+
+static int
+ldm_privhdr_check(struct ldm_db *db, struct g_consumer *cp, int is_gpt)
+{
+       struct g_consumer *cp2;
+       struct g_provider *pp;
+       struct ldm_privhdr hdr;
+       uint64_t offset, last;
+       int error, found, i;
+       u_char *buf;
+
+       pp = cp->provider;
+       if (is_gpt) {
+               /*
+                * The last LBA is used in several checks below, for the
+                * GPT case it should be calculated relative to the whole
+                * disk.
+                */
+               cp2 = LIST_FIRST(&pp->geom->consumer);
+               last =
+                   cp2->provider->mediasize / cp2->provider->sectorsize - 1;
+       } else
+               last = pp->mediasize / pp->sectorsize - 1;
+       for (found = 0, i = is_gpt;
+           i < sizeof(ldm_ph_off) / sizeof(ldm_ph_off[0]); i++) {
+               offset = ldm_ph_off[i];
+               /*
+                * In the GPT case consumer is attached to the LDM metadata
+                * partition and we don't need add db_offset.
+                */
+               if (!is_gpt)
+                       offset += db->ph.db_offset;
+               if (i == LDM_PH_MBRINDEX) {
+                       /*
+                        * Prepare to errors and setup new base offset
+                        * to read backup private headers. Assume that LDM
+                        * database is in the last 1Mbyte area.
+                        */
+                       db->ph.db_offset = last - LDM_DB_SIZE;
+               }
+               buf = ldm_privhdr_read(cp, offset * pp->sectorsize, &error);
+               if (buf == NULL) {
+                       LDM_DEBUG(1, "%s: failed to read private header "
+                           "%d at LBA %ju", pp->name, i, (uintmax_t)offset);
+                       continue;
+               }
+               error = ldm_privhdr_parse(cp, &hdr, buf);
+               if (error != 0) {
+                       LDM_DEBUG(1, "%s: failed to parse private "
+                           "header %d", pp->name, i);
+                       LDM_DUMP(buf, pp->sectorsize);
+                       g_free(buf);
+                       continue;
+               }
+               g_free(buf);
+               if (hdr.start > last ||
+                   hdr.start + hdr.size - 1 > last ||
+                   (hdr.start + hdr.size - 1 > hdr.db_offset && is_gpt) ||
+                   hdr.db_size != LDM_DB_SIZE ||
+                   hdr.db_offset + LDM_DB_SIZE - 1 > last ||
+                   hdr.th_offset[0] >= LDM_DB_SIZE ||
+                   hdr.th_offset[1] >= LDM_DB_SIZE ||
+                   hdr.conf_size + hdr.log_size >= LDM_DB_SIZE) {
+                       LDM_DEBUG(1, "%s: invalid values in the "
+                           "private header %d", pp->name, i);
+                       LDM_DEBUG(2, "%s: start: %jd, size: %jd, "
+                           "db_offset: %jd, db_size: %jd, th_offset0: %jd, "
+                           "th_offset1: %jd, conf_size: %jd, log_size: %jd, "
+                           "last: %jd", pp->name, hdr.start, hdr.size,
+                           hdr.db_offset, hdr.db_size, hdr.th_offset[0],
+                           hdr.th_offset[1], hdr.conf_size, hdr.log_size,
+                           last);
+                       continue;
+               }
+               if (found != 0 && memcmp(&db->ph, &hdr, sizeof(hdr)) != 0) {
+                       LDM_DEBUG(0, "%s: private headers are not equal",
+                           pp->name);
+                       if (i > 1) {
+                               /*
+                                * We have different headers in the LDM.
+                                * We can not trust this metadata.
+                                */
+                               LDM_DEBUG(0, "%s: refuse LDM metadata",
+                                   pp->name);
+                               return (EINVAL);
+                       }
+                       /*
+                        * We already have read primary private header
+                        * and it differs from this backup one.
+                        * Prefer the backup header and save it.
+                        */
+                       found = 0;
+               }
+               if (found == 0)
+                       memcpy(&db->ph, &hdr, sizeof(hdr));
+               found = 1;
+       }
+       if (found == 0) {
+               LDM_DEBUG(1, "%s: valid LDM private header not found",
+                   pp->name);
+               return (ENXIO);
+       }
+       return (0);
+}
+
+static int
+ldm_gpt_check(struct ldm_db *db, struct g_consumer *cp)
+{
+       struct g_part_table *gpt;
+       struct g_part_entry *e;
+       struct g_consumer *cp2;
+       int error;
+
+       cp2 = LIST_NEXT(cp, consumer);
+       g_topology_lock();
+       gpt = cp->provider->geom->softc;
+       error = 0;
+       LIST_FOREACH(e, &gpt->gpt_entry, gpe_entry) {
+               if (cp->provider == e->gpe_pp) {
+                       /* ms-ldm-metadata partition */
+                       if (e->gpe_start != db->ph.db_offset ||
+                           e->gpe_end != db->ph.db_offset + LDM_DB_SIZE - 1)
+                               error++;
+               } else if (cp2->provider == e->gpe_pp) {
+                       /* ms-ldm-data partition */
+                       if (e->gpe_start != db->ph.start ||
+                           e->gpe_end != db->ph.start + db->ph.size - 1)
+                               error++;
+               }
+               if (error != 0) {
+                       LDM_DEBUG(0, "%s: GPT partition %d boundaries "
+                           "do not match with the LDM metadata",
+                           e->gpe_pp->name, e->gpe_index);
+                       error = ENXIO;
+                       break;
+               }
+       }
+       g_topology_unlock();
+       return (error);
+}
+
+static int
+ldm_tochdr_check(struct ldm_db *db, struct g_consumer *cp)
+{
+       struct g_provider *pp;
+       struct ldm_tochdr hdr;
+       uint64_t offset, conf_size, log_size;
+       int error, found, i;
+       u_char *buf;
+
+       pp = cp->provider;
+       for (i = 0, found = 0; i < LDM_TH_COUNT; i++) {
+               offset = db->ph.db_offset + db->ph.th_offset[i];
+               buf = g_read_data(cp,
+                   offset * pp->sectorsize, pp->sectorsize, &error);
+               if (buf == NULL) {
+                       LDM_DEBUG(1, "%s: failed to read TOC header "
+                           "at LBA %ju", pp->name, (uintmax_t)offset);
+                       continue;
+               }
+               if (memcmp(buf, LDM_TH_SIGN, strlen(LDM_TH_SIGN)) != 0 ||
+                   memcmp(buf + LDM_TH_NAME1_OFF, LDM_TH_NAME1,
+                   strlen(LDM_TH_NAME1)) != 0 ||
+                   memcmp(buf + LDM_TH_NAME2_OFF, LDM_TH_NAME2,
+                   strlen(LDM_TH_NAME2)) != 0) {
+                       LDM_DEBUG(1, "%s: failed to parse TOC header "
+                           "at LBA %ju", pp->name, (uintmax_t)offset);
+                       LDM_DUMP(buf, pp->sectorsize);
+                       g_free(buf);
+                       continue;
+               }
+               hdr.conf_offset = be64dec(buf + LDM_TH_CONF_OFF);
+               hdr.log_offset = be64dec(buf + LDM_TH_LOG_OFF);
+               conf_size = be64dec(buf + LDM_TH_CONFSIZE_OFF);
+               log_size = be64dec(buf + LDM_TH_LOGSIZE_OFF);
+               if (conf_size != db->ph.conf_size ||
+                   hdr.conf_offset + conf_size >= LDM_DB_SIZE ||
+                   log_size != db->ph.log_size ||
+                   hdr.log_offset + log_size >= LDM_DB_SIZE) {
+                       LDM_DEBUG(1, "%s: invalid values in the "
+                           "TOC header at LBA %ju", pp->name,
+                           (uintmax_t)offset);
+                       LDM_DUMP(buf, pp->sectorsize);
+                       g_free(buf);
+                       continue;
+               }
+               g_free(buf);
+               if (found == 0)
+                       memcpy(&db->th, &hdr, sizeof(hdr));
+               found = 1;
+       }
+       if (found == 0) {
+               LDM_DEBUG(0, "%s: valid LDM TOC header not found.",
+                   pp->name);
+               return (ENXIO);
+       }
+       return (0);
+}
+
+static int
+ldm_vmdbhdr_check(struct ldm_db *db, struct g_consumer *cp)
+{
+       struct g_provider *pp;
+       struct uuid dg_guid;
+       uint64_t offset;
+       uint32_t version;
+       int error;
+       u_char *buf;
+
+       pp = cp->provider;
+       offset = db->ph.db_offset + db->th.conf_offset;
+       buf = g_read_data(cp, offset * pp->sectorsize, pp->sectorsize,
+           &error);
+       if (buf == NULL) {
+               LDM_DEBUG(0, "%s: failed to read VMDB header at "
+                   "LBA %ju", pp->name, (uintmax_t)offset);
+               return (error);
+       }
+       if (memcmp(buf, LDM_VMDB_SIGN, strlen(LDM_VMDB_SIGN)) != 0) {
+               g_free(buf);
+               LDM_DEBUG(0, "%s: failed to parse VMDB header at "
+                   "LBA %ju", pp->name, (uintmax_t)offset);
+               return (ENXIO);
+       }
+       /* Check version. */
+       version = be32dec(buf + LDM_DB_VERSION_OFF);
+       if (version != 0x4000A) {
+               g_free(buf);
+               LDM_DEBUG(0, "%s: unsupported VMDB version %u.%u",
+                   pp->name, version >> 16, version & 0xFFFF);
+               return (ENXIO);
+       }
+       /*
+        * Check VMDB update status:
+        *      1 - in a consistent state;
+        *      2 - in a creation phase;
+        *      3 - in a deletion phase;
+        */
+       if (be16dec(buf + LDM_DB_STATUS_OFF) != 1) {
+               g_free(buf);
+               LDM_DEBUG(0, "%s: VMDB is not in a consistent state",
+                   pp->name);
+               return (ENXIO);
+       }
+       db->dh.last_seq = be32dec(buf + LDM_DB_LASTSEQ_OFF);
+       db->dh.size = be32dec(buf + LDM_DB_SIZE_OFF);
+       error = parse_uuid(buf + LDM_DB_DGGUID_OFF, &dg_guid);
+       /* Compare disk group name and guid from VMDB and private headers */
+       if (error != 0 || db->dh.size == 0 ||
+           pp->sectorsize % db->dh.size != 0 ||
+           strncmp(buf + LDM_DB_DGNAME_OFF, db->ph.dg_name, 31) != 0 ||
+           memcmp(&dg_guid, &db->ph.dg_guid, sizeof(dg_guid)) != 0 ||
+           db->dh.size * db->dh.last_seq >
+           db->ph.conf_size * pp->sectorsize) {
+               LDM_DEBUG(0, "%s: invalid values in the VMDB header",
+                   pp->name);
+               LDM_DUMP(buf, pp->sectorsize);
+               g_free(buf);
+               return (EINVAL);
+       }
+       g_free(buf);
+       return (0);
+}
+
+static int
+ldm_xvblk_handle(struct ldm_db *db, struct ldm_vblkhdr *vh, const u_char *p)
+{
+       struct ldm_xvblk *blk;
+       size_t size;
+
+       size = db->dh.size - 16;
+       LIST_FOREACH(blk, &db->xvblks, entry)
+               if (blk->group == vh->group)
+                       break;
+       if (blk == NULL) {
+               blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
+               blk->group = vh->group;
+               blk->size = size * vh->count + 16;
+               blk->data = g_malloc(blk->size, M_WAITOK | M_ZERO);
+               blk->map = 0xFF << vh->count;
+               LIST_INSERT_HEAD(&db->xvblks, blk, entry);
+       }
+       if ((blk->map & (1 << vh->index)) != 0) {
+               /* Block with given index has been already saved. */
+               return (EINVAL);
+       }
+       /* Copy the data block to the place related to index. */
+       memcpy(blk->data + size * vh->index + 16, p + 16, size);
+       blk->map |= 1 << vh->index;
+       return (0);
+}
+
+/* Read the variable-width numeric field and return new offset */
+static int
+ldm_vnum_get(const u_char *buf, int offset, uint64_t *result, size_t range)
+{
+       uint64_t num;
+       uint8_t len;
+
+       len = buf[offset++];
+       if (len > sizeof(uint64_t) || len + offset >= range)
+               return (-1);
+       for (num = 0; len > 0; len--)
+               num = (num << 8) | buf[offset++];
+       *result = num;
+       return (offset);
+}
+
+/* Read the variable-width string and return new offset */
+static int
+ldm_vstr_get(const u_char *buf, int offset, u_char *result,
+    size_t maxlen, size_t range)
+{
+       uint8_t len;
+
+       len = buf[offset++];
+       if (len >= maxlen || len + offset >= range)
+               return (-1);
+       memcpy(result, buf + offset, len);
+       result[len] = '\0';
+       return (offset + len);
+}
+
+/* Just skip the variable-width variable and return new offset */
+static int
+ldm_vparm_skip(const u_char *buf, int offset, size_t range)
+{
+       uint8_t len;
+
+       len = buf[offset++];
+       if (offset + len >= range)
+               return (-1);
+
+       return (offset + len);
+}
+
+static int
+ldm_vblk_handle(struct ldm_db *db, const u_char *p, size_t size)
+{
+       struct ldm_vblk *blk;
+       struct ldm_volume *volume, *last;
+       const char *errstr;
+       u_char vstr[64];
+       int error, offset;
+
+       blk = g_malloc(sizeof(*blk), M_WAITOK | M_ZERO);
+       blk->type = p[LDM_VBLK_TYPE_OFF];
+       offset = ldm_vnum_get(p, LDM_VBLK_OID_OFF, &blk->u.id, size);
+       if (offset < 0) {
+               errstr = "object id";
+               goto fail;
+       }
+       offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
+       if (offset < 0) {
+               errstr = "object name";
+               goto fail;
+       }
+       switch (blk->type) {
+       /*
+        * Component VBLK fields:
+        * Offset       Size    Description
+        * ------------+-------+------------------------
+        *  0x18+       PS      volume state
+        *  0x18+5      PN      component children count
+        *  0x1D+16     PN      parent's volume object id
+        *  0x2D+1      PN      stripe size
+        */
+       case LDM_VBLK_T_COMPONENT:
+               offset = ldm_vparm_skip(p, offset, size);
+               if (offset < 0) {
+                       errstr = "volume state";
+                       goto fail;
+               }
+               offset = ldm_vparm_skip(p, offset + 5, size);
+               if (offset < 0) {
+                       errstr = "children count";
+                       goto fail;
+               }
+               offset = ldm_vnum_get(p, offset + 16,
+                   &blk->u.comp.vol_id, size);
+               if (offset < 0) {
+                       errstr = "volume id";
+                       goto fail;
+               }
+               break;
+       /*
+        * Partition VBLK fields:
+        * Offset       Size    Description
+        * ------------+-------+------------------------
+        *  0x18+12     8       partition start offset
+        *  0x18+20     8       volume offset
+        *  0x18+28     PN      partition size
+        *  0x34+       PN      parent's component object id
+        *  0x34+       PN      disk's object id
+        */
+       case LDM_VBLK_T_PARTITION:
+               if (offset + 28 >= size) {
+                       errstr = "too small buffer";
+                       goto fail;
+               }
+               blk->u.part.start = be64dec(p + offset + 12);
+               blk->u.part.offset = be64dec(p + offset + 20);
+               offset = ldm_vnum_get(p, offset + 28, &blk->u.part.size, size);
+               if (offset < 0) {
+                       errstr = "partition size";
+                       goto fail;
+               }
+               offset = ldm_vnum_get(p, offset, &blk->u.part.comp_id, size);
+               if (offset < 0) {
+                       errstr = "component id";
+                       goto fail;
+               }
+               offset = ldm_vnum_get(p, offset, &blk->u.part.disk_id, size);
+               if (offset < 0) {
+                       errstr = "disk id";
+                       goto fail;
+               }
+               break;
+       /*
+        * Disk VBLK fields:
+        * Offset       Size    Description
+        * ------------+-------+------------------------
+        *  0x18+       PS      disk GUID
+        */
+       case LDM_VBLK_T_DISK:
+               errstr = "disk guid";
+               offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
+               if (offset < 0)
+                       goto fail;
+               error = parse_uuid(vstr, &blk->u.disk.guid);
+               if (error != 0)
+                       goto fail;
+               LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
+               break;
+       /*
+        * Disk group VBLK fields:
+        * Offset       Size    Description
+        * ------------+-------+------------------------
+        *  0x18+       PS      disk group GUID
+        */
+       case LDM_VBLK_T_DISKGROUP:
+#if 0
+               strncpy(blk->u.disk_group.name, vstr,
+                   sizeof(blk->u.disk_group.name));
+               offset = ldm_vstr_get(p, offset, vstr, sizeof(vstr), size);
+               if (offset < 0) {
+                       errstr = "disk group guid";
+                       goto fail;
+               }
+               error = parse_uuid(name, &blk->u.disk_group.guid);
+               if (error != 0) {
+                       errstr = "disk group guid";
+                       goto fail;
+               }
+               LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
+#endif
+               break;
+       /*
+        * Disk VBLK fields:
+        * Offset       Size    Description
+        * ------------+-------+------------------------
+        *  0x18+       16      disk GUID
+        */
+       case LDM_VBLK_T_DISK4:
+               be_uuid_dec(p + offset, &blk->u.disk.guid);
+               LIST_INSERT_HEAD(&db->disks, &blk->u.disk, entry);
+               break;
+       /*
+        * Disk group VBLK fields:
+        * Offset       Size    Description
+        * ------------+-------+------------------------
+        *  0x18+       16      disk GUID
+        */
+       case LDM_VBLK_T_DISKGROUP4:
+#if 0
+               strncpy(blk->u.disk_group.name, vstr,
+                   sizeof(blk->u.disk_group.name));
+               be_uuid_dec(p + offset, &blk->u.disk.guid);
+               LIST_INSERT_HEAD(&db->groups, &blk->u.disk_group, entry);
+#endif
+               break;
+       /*
+        * Volume VBLK fields:
+        * Offset       Size    Description
+        * ------------+-------+------------------------
+        *  0x18+       PS      volume type
+        *  0x18+       PS      unknown
+        *  0x18+       14(S)   volume state
+        *  0x18+16     1       volume number
+        *  0x18+21     PN      volume children count
+        *  0x2D+16     PN      volume size
+        *  0x3D+4      1       partition type
+        */
+       case LDM_VBLK_T_VOLUME:
+               offset = ldm_vparm_skip(p, offset, size);
+               if (offset < 0) {
+                       errstr = "volume type";
+                       goto fail;
+               }
+               offset = ldm_vparm_skip(p, offset, size);
+               if (offset < 0) {
+                       errstr = "unknown param";
+                       goto fail;
+               }
+               if (offset + 21 >= size) {
+                       errstr = "too small buffer";
+                       goto fail;
+               }
+               blk->u.vol.number = p[offset + 16];
+               offset = ldm_vparm_skip(p, offset + 21, size);
+               if (offset < 0) {
+                       errstr = "children count";
+                       goto fail;
+               }
+               offset = ldm_vnum_get(p, offset + 16, &blk->u.vol.size, size);
+               if (offset < 0) {
+                       errstr = "volume size";
+                       goto fail;
+               }
+               if (offset + 4 >= size) {
+                       errstr = "too small buffer";
+                       goto fail;
+               }
+               blk->u.vol.part_type = p[offset + 4];
+               /* keep volumes ordered by volume number */
+               last = NULL;
+               LIST_FOREACH(volume, &db->volumes, entry) {
+                       if (volume->number > blk->u.vol.number)
+                               break;
+                       last = volume;
+               }
+               if (last != NULL)
+                       LIST_INSERT_AFTER(last, &blk->u.vol, entry);
+               else
+                       LIST_INSERT_HEAD(&db->volumes, &blk->u.vol, entry);
+               break;
+       default:
+               LDM_DEBUG(1, "unknown VBLK type 0x%02x\n", blk->type);
+               LDM_DUMP(p, size);
+       }
+       LIST_INSERT_HEAD(&db->vblks, blk, entry);
+       return (0);
+fail:
+       LDM_DEBUG(0, "failed to parse '%s' in VBLK of type 0x%02x\n",
+           errstr, blk->type);
+       LDM_DUMP(p, size);
+       g_free(blk);
+       return (EINVAL);
+}
+
+static void
+ldm_vmdb_free(struct ldm_db *db)
+{
+       struct ldm_vblk *vblk;
+       struct ldm_xvblk *xvblk;
+
+       while (!LIST_EMPTY(&db->xvblks)) {

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "[email protected]"

Reply via email to