Re: [PATCH 004 of 7] md: Allow devices to be shared between md arrays.

2007-12-25 Thread Andrew Morton
On Fri, 14 Dec 2007 17:26:28 +1100 NeilBrown [EMAIL PROTECTED] wrote:

 + mddev_unlock(rdev-mddev);
 + ITERATE_MDDEV(mddev, tmp) {
 + mdk_rdev_t *rdev2;
 +
 + mddev_lock(mddev);
 + ITERATE_RDEV(mddev, rdev2, tmp2)
 + if (test_bit(AllReserved, rdev2-flags) ||
 + (rdev-bdev == rdev2-bdev 
 +  rdev != rdev2 
 +  overlaps(rdev-data_offset, rdev-size,
 + rdev2-data_offset, rdev2-size))) {
 + overlap = 1;
 + break;
 + }
 + mddev_unlock(mddev);
 + if (overlap) {
 + mddev_put(mddev);
 + break;
 + }
 + }

eww, ITERATE_MDDEV() and ITERATE_RDEV() are an eyesore.

for_each_mddev() and for_each_rdev() would at least mean the reader doesn't
need to check the implementation when wondering what that `break' is
breaking from.

  #define  In_sync 2   /* device is in_sync with rest 
 of array */
  #define  WriteMostly 4   /* Avoid reading if at all 
 possible */
  #define  BarriersNotsupp 5   /* BIO_RW_BARRIER is not 
 supported */
 +#define  AllReserved 6   /* If whole device is reserved 
 for

The naming style here is inconsistent.

A task for the keen would be to convert these to an enum and add some
namespacing prefix to them.  
-
To unsubscribe from this list: send the line unsubscribe linux-raid in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 004 of 7] md: Allow devices to be shared between md arrays.

2007-12-13 Thread NeilBrown

Currently, a given device is claimed by a particular array so
that it cannot be used by other arrays.

This is not ideal for DDF and other metadata schemes which have
their own partitioning concept.

So for externally managed metadata, just claim the device for
md in general, require that offset and size are set
properly for each device, and make sure that if a device is
included in different arrays then the active sections do
not overlap.

This involves adding another flag to the rdev which makes it awkward
to set -flags = 0 to clear certain flags.  So now clear flags
explicitly by name when we want to clear things.

Signed-off-by: Neil Brown [EMAIL PROTECTED]

### Diffstat output
 ./drivers/md/md.c   |   93 ++--
 ./include/linux/raid/md_k.h |2 
 2 files changed, 84 insertions(+), 11 deletions(-)

diff .prev/drivers/md/md.c ./drivers/md/md.c
--- .prev/drivers/md/md.c   2007-12-14 16:08:57.0 +1100
+++ ./drivers/md/md.c   2007-12-14 16:09:01.0 +1100
@@ -774,7 +774,11 @@ static int super_90_validate(mddev_t *md
__u64 ev1 = md_event(sb);
 
rdev-raid_disk = -1;
-   rdev-flags = 0;
+   clear_bit(Faulty, rdev-flags);
+   clear_bit(In_sync, rdev-flags);
+   clear_bit(WriteMostly, rdev-flags);
+   clear_bit(BarriersNotsupp, rdev-flags);
+
if (mddev-raid_disks == 0) {
mddev-major_version = 0;
mddev-minor_version = sb-minor_version;
@@ -1155,7 +1159,11 @@ static int super_1_validate(mddev_t *mdd
__u64 ev1 = le64_to_cpu(sb-events);
 
rdev-raid_disk = -1;
-   rdev-flags = 0;
+   clear_bit(Faulty, rdev-flags);
+   clear_bit(In_sync, rdev-flags);
+   clear_bit(WriteMostly, rdev-flags);
+   clear_bit(BarriersNotsupp, rdev-flags);
+
if (mddev-raid_disks == 0) {
mddev-major_version = 1;
mddev-patch_version = 0;
@@ -1407,7 +1415,7 @@ static int bind_rdev_to_array(mdk_rdev_t
goto fail;
}
list_add(rdev-same_set, mddev-disks);
-   bd_claim_by_disk(rdev-bdev, rdev, mddev-gendisk);
+   bd_claim_by_disk(rdev-bdev, rdev-bdev-bd_holder, mddev-gendisk);
return 0;
 
  fail:
@@ -1447,7 +1455,7 @@ static void unbind_rdev_from_array(mdk_r
  * otherwise reused by a RAID array (or any other kernel
  * subsystem), by bd_claiming the device.
  */
-static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
+static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
 {
int err = 0;
struct block_device *bdev;
@@ -1459,13 +1467,15 @@ static int lock_rdev(mdk_rdev_t *rdev, d
__bdevname(dev, b));
return PTR_ERR(bdev);
}
-   err = bd_claim(bdev, rdev);
+   err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
if (err) {
printk(KERN_ERR md: could not bd_claim %s.\n,
bdevname(bdev, b));
blkdev_put(bdev);
return err;
}
+   if (!shared)
+   set_bit(AllReserved, rdev-flags);
rdev-bdev = bdev;
return err;
 }
@@ -1930,7 +1940,8 @@ slot_store(mdk_rdev_t *rdev, const char 
return -ENOSPC;
rdev-raid_disk = slot;
/* assume it is working */
-   rdev-flags = 0;
+   clear_bit(Faulty, rdev-flags);
+   clear_bit(WriteMostly, rdev-flags);
set_bit(In_sync, rdev-flags);
}
return len;
@@ -1955,6 +1966,10 @@ offset_store(mdk_rdev_t *rdev, const cha
return -EINVAL;
if (rdev-mddev-pers)
return -EBUSY;
+   if (rdev-size  rdev-mddev-external)
+   /* Must set offset before size, so overlap checks
+* can be sane */
+   return -EBUSY;
rdev-data_offset = offset;
return len;
 }
@@ -1968,16 +1983,69 @@ rdev_size_show(mdk_rdev_t *rdev, char *p
return sprintf(page, %llu\n, (unsigned long long)rdev-size);
 }
 
+static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
+{
+   /* check if two start/length pairs overlap */
+   if (s1+l1 = s2)
+   return 0;
+   if (s2+l2 = s1)
+   return 0;
+   return 1;
+}
+
 static ssize_t
 rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 {
char *e;
unsigned long long size = simple_strtoull(buf, e, 10);
+   unsigned long long oldsize = rdev-size;
if (e==buf || (*e  *e != '\n'))
return -EINVAL;
if (rdev-mddev-pers)
return -EBUSY;
rdev-size = size;
+   if (size  oldsize  rdev-mddev-external) {
+   /* need to check that all other rdevs with the same -bdev
+* do not overlap.  We need to unlock the mddev to avoid
+* a deadlock.  We have already changed rdev-size, and if
+