[PATCH 005 of 13] md: Allow stripes to be expanded in preparation for expanding an array.

2006-03-16 Thread NeilBrown

Before a RAID-5 can be expanded, we need to be able to expand the
stripe-cache data structure.  
This requires allocating new stripes in a new kmem_cache.
If this succeeds, we copy cache pages over and release the old
stripes and kmem_cache.
We then allocate new pages.  If that fails, we leave the stripe
cache at it's new size.  It isn't worth the effort to shrink 
it back again.

Unfortuanately this means we need two kmem_cache names as we, for a
short period of time, we have two kmem_caches.  So they are
raid5/%s and raid5/%s-alt


Signed-off-by: Neil Brown [EMAIL PROTECTED]

### Diffstat output
 ./drivers/md/raid5.c |  118 +--
 ./drivers/md/raid6main.c |4 -
 ./include/linux/raid/raid5.h |9 ++-
 3 files changed, 123 insertions(+), 8 deletions(-)

diff ./drivers/md/raid5.c~current~ ./drivers/md/raid5.c
--- ./drivers/md/raid5.c~current~   2006-03-17 11:48:55.0 +1100
+++ ./drivers/md/raid5.c2006-03-17 11:48:56.0 +1100
@@ -313,20 +313,130 @@ static int grow_stripes(raid5_conf_t *co
kmem_cache_t *sc;
int devs = conf-raid_disks;
 
-   sprintf(conf-cache_name, raid5/%s, mdname(conf-mddev));
-
-   sc = kmem_cache_create(conf-cache_name, 
+   sprintf(conf-cache_name[0], raid5/%s, mdname(conf-mddev));
+   sprintf(conf-cache_name[1], raid5/%s-alt, mdname(conf-mddev));
+   conf-active_name = 0;
+   sc = kmem_cache_create(conf-cache_name[conf-active_name],
   sizeof(struct 
stripe_head)+(devs-1)*sizeof(struct r5dev),
   0, 0, NULL, NULL);
if (!sc)
return 1;
conf-slab_cache = sc;
+   conf-pool_size = devs;
while (num--) {
if (!grow_one_stripe(conf))
return 1;
}
return 0;
 }
+static int resize_stripes(raid5_conf_t *conf, int newsize)
+{
+   /* make all the stripes able to hold 'newsize' devices.
+* New slots in each stripe get 'page' set to a new page.
+* We allocate all the new stripes first, then if that succeeds,
+* copy everything across.
+* Finally we add new pages.  This could fail, but we leave
+* the stripe cache at it's new size, just with some pages empty.
+*
+* We use GFP_NOIO allocations as IO to the raid5 is blocked
+* at some points in this operation.
+*/
+   struct stripe_head *osh, *nsh;
+   struct list_head newstripes, oldstripes;
+   struct disk_info *ndisks;
+   int err = 0;
+   kmem_cache_t *sc;
+   int i;
+
+   if (newsize = conf-pool_size)
+   return 0; /* never bother to shrink */
+
+   sc = kmem_cache_create(conf-cache_name[1-conf-active_name],
+  sizeof(struct 
stripe_head)+(newsize-1)*sizeof(struct r5dev),
+  0, 0, NULL, NULL);
+   if (!sc)
+   return -ENOMEM;
+   INIT_LIST_HEAD(newstripes);
+   for (i = conf-max_nr_stripes; i; i--) {
+   nsh = kmem_cache_alloc(sc, GFP_NOIO);
+   if (!nsh)
+   break;
+
+   memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
+
+   nsh-raid_conf = conf;
+   spin_lock_init(nsh-lock);
+
+   list_add(nsh-lru, newstripes);
+   }
+   if (i) {
+   /* didn't get enough, give up */
+   while (!list_empty(newstripes)) {
+   nsh = list_entry(newstripes.next, struct stripe_head, 
lru);
+   list_del(nsh-lru);
+   kmem_cache_free(sc, nsh);
+   }
+   kmem_cache_destroy(sc);
+   return -ENOMEM;
+   }
+   /* OK, we have enough stripes, start collecting inactive
+* stripes and copying them over
+*/
+   INIT_LIST_HEAD(oldstripes);
+   list_for_each_entry(nsh, newstripes, lru) {
+   spin_lock_irq(conf-device_lock);
+   wait_event_lock_irq(conf-wait_for_stripe,
+   !list_empty(conf-inactive_list),
+   conf-device_lock,
+   unplug_slaves(conf-mddev);
+   );
+   osh = get_free_stripe(conf);
+   spin_unlock_irq(conf-device_lock);
+   atomic_set(nsh-count, 1);
+   for(i=0; iconf-pool_size; i++)
+   nsh-dev[i].page = osh-dev[i].page;
+   for( ; inewsize; i++)
+   nsh-dev[i].page = NULL;
+   list_add(osh-lru, oldstripes);
+   }
+   /* Got them all.
+* Return the new ones and free the old ones.
+* At this point, we are holding all the stripes so the array
+* is completely stalled, so now is a good time to resize
+* conf-disks.
+*/
+   ndisks = 

Re: [PATCH 005 of 13] md: Allow stripes to be expanded in preparation for expanding an array.

2006-03-16 Thread Andrew Morton
NeilBrown [EMAIL PROTECTED] wrote:

 +static int resize_stripes(raid5_conf_t *conf, int newsize)
  +{
  +/* make all the stripes able to hold 'newsize' devices.
  + * New slots in each stripe get 'page' set to a new page.
  + * We allocate all the new stripes first, then if that succeeds,
  + * copy everything across.
  + * Finally we add new pages.  This could fail, but we leave
  + * the stripe cache at it's new size, just with some pages empty.
  + *
  + * We use GFP_NOIO allocations as IO to the raid5 is blocked
  + * at some points in this operation.
  + */
  +struct stripe_head *osh, *nsh;
  +struct list_head newstripes, oldstripes;

You can use LIST_HEAD() here, avoid the separate INIT_LIST_HEAD().


  +struct disk_info *ndisks;
  +int err = 0;
  +kmem_cache_t *sc;
  +int i;
  +
  +if (newsize = conf-pool_size)
  +return 0; /* never bother to shrink */
  +
  +sc = kmem_cache_create(conf-cache_name[1-conf-active_name],
  +   sizeof(struct 
 stripe_head)+(newsize-1)*sizeof(struct r5dev),
  +   0, 0, NULL, NULL);

kmem_cache_create() internally does a GFP_KERNEL allocation.

  +if (!sc)
  +return -ENOMEM;
  +INIT_LIST_HEAD(newstripes);
  +for (i = conf-max_nr_stripes; i; i--) {
  +nsh = kmem_cache_alloc(sc, GFP_NOIO);

So either this can use GFP_KERNEL, or we have a problem.
-
To unsubscribe from this list: send the line unsubscribe linux-raid in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 005 of 13] md: Allow stripes to be expanded in preparation for expanding an array.

2006-03-16 Thread Andrew Morton
NeilBrown [EMAIL PROTECTED] wrote:

 + wait_event_lock_irq(conf-wait_for_stripe,
  +!list_empty(conf-inactive_list),
  +conf-device_lock,
  +unplug_slaves(conf-mddev);
  +);

Boy, that's an ugly-looking thing, isn't it?

__wait_event_lock_irq() already puts a semicolon after `cmd' so I think the
one here isn't needed, which would make it a bit less of a surprise to look
at.

-
To unsubscribe from this list: send the line unsubscribe linux-raid in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 005 of 13] md: Allow stripes to be expanded in preparation for expanding an array.

2006-03-16 Thread Andrew Morton
NeilBrown [EMAIL PROTECTED] wrote:

 + /* Got them all.
  + * Return the new ones and free the old ones.
  + * At this point, we are holding all the stripes so the array
  + * is completely stalled, so now is a good time to resize
  + * conf-disks.
  + */
  +ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
  +if (ndisks) {
  +for (i=0; iconf-raid_disks; i++)
  +ndisks[i] = conf-disks[i];
  +kfree(conf-disks);
  +conf-disks = ndisks;
  +} else
  +err = -ENOMEM;
  +while(!list_empty(newstripes)) {
  +nsh = list_entry(newstripes.next, struct stripe_head, lru);
  +list_del_init(nsh-lru);
  +for (i=conf-raid_disks; i  newsize; i++)
  +if (nsh-dev[i].page == NULL) {
  +struct page *p = alloc_page(GFP_NOIO);
  +nsh-dev[i].page = p;
  +if (!p)
  +err = -ENOMEM;
  +}
  +release_stripe(nsh);
  +}
  +while(!list_empty(oldstripes)) {
  +osh = list_entry(oldstripes.next, struct stripe_head, lru);
  +list_del(osh-lru);
  +kmem_cache_free(conf-slab_cache, osh);
  +}
  +kmem_cache_destroy(conf-slab_cache);
  +conf-slab_cache = sc;
  +conf-active_name = 1-conf-active_name;
  +conf-pool_size = newsize;
  +return err;
  +}

Are you sure the -ENOMEM handling here is solid?  It looks strange.

There are a few more GFP_NOIOs in this function, which can possibly become
GFP_KERNEL.
-
To unsubscribe from this list: send the line unsubscribe linux-raid in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 005 of 13] md: Allow stripes to be expanded in preparation for expanding an array.

2006-03-16 Thread Neil Brown
On Thursday March 16, [EMAIL PROTECTED] wrote:
 NeilBrown [EMAIL PROTECTED] wrote:
 
  +static int resize_stripes(raid5_conf_t *conf, int newsize)
   +{
   +  /* make all the stripes able to hold 'newsize' devices.
   +   * New slots in each stripe get 'page' set to a new page.
   +   * We allocate all the new stripes first, then if that succeeds,
   +   * copy everything across.
   +   * Finally we add new pages.  This could fail, but we leave
   +   * the stripe cache at it's new size, just with some pages empty.
   +   *
   +   * We use GFP_NOIO allocations as IO to the raid5 is blocked
   +   * at some points in this operation.
   +   */
   +  struct stripe_head *osh, *nsh;
   +  struct list_head newstripes, oldstripes;
 
 You can use LIST_HEAD() here, avoid the separate INIT_LIST_HEAD().
 

I guess.
I have to have the declaration miles from where I use the variable.
Do I have to have the initialisation equally far?  Ok, I'll do that..


 
   +  struct disk_info *ndisks;
   +  int err = 0;
   +  kmem_cache_t *sc;
   +  int i;
   +
   +  if (newsize = conf-pool_size)
   +  return 0; /* never bother to shrink */
   +
   +  sc = kmem_cache_create(conf-cache_name[1-conf-active_name],
   + sizeof(struct 
  stripe_head)+(newsize-1)*sizeof(struct r5dev),
   + 0, 0, NULL, NULL);
 
 kmem_cache_create() internally does a GFP_KERNEL allocation.
 
   +  if (!sc)
   +  return -ENOMEM;
   +  INIT_LIST_HEAD(newstripes);
   +  for (i = conf-max_nr_stripes; i; i--) {
   +  nsh = kmem_cache_alloc(sc, GFP_NOIO);
 
 So either this can use GFP_KERNEL, or we have a problem.

Good point  Maybe the comment about GFP_NOIO just needs to be
improved. 
We cannot risk waiting on IO after the 
/* OK, we have enough stripes, start collecting inactive
 * stripes and copying them over
 */

comment, up to the second last while loop, that starts
while(!list_empty(newstripes)) {

Before the comment, which where the kmem_cache_create is, is OK.

Thanks!

NeilBrown

-
To unsubscribe from this list: send the line unsubscribe linux-raid in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 005 of 13] md: Allow stripes to be expanded in preparation for expanding an array.

2006-03-16 Thread Neil Brown
On Thursday March 16, [EMAIL PROTECTED] wrote:
 NeilBrown [EMAIL PROTECTED] wrote:
 
  +   /* Got them all.
   +   * Return the new ones and free the old ones.
   +   * At this point, we are holding all the stripes so the array
   +   * is completely stalled, so now is a good time to resize
   +   * conf-disks.
   +   */
   +  ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
   +  if (ndisks) {
   +  for (i=0; iconf-raid_disks; i++)
   +  ndisks[i] = conf-disks[i];
   +  kfree(conf-disks);
   +  conf-disks = ndisks;
   +  } else
   +  err = -ENOMEM;
   +  while(!list_empty(newstripes)) {
   +  nsh = list_entry(newstripes.next, struct stripe_head, lru);
   +  list_del_init(nsh-lru);
   +  for (i=conf-raid_disks; i  newsize; i++)
   +  if (nsh-dev[i].page == NULL) {
   +  struct page *p = alloc_page(GFP_NOIO);
   +  nsh-dev[i].page = p;
   +  if (!p)
   +  err = -ENOMEM;
   +  }
   +  release_stripe(nsh);
   +  }
   +  while(!list_empty(oldstripes)) {
   +  osh = list_entry(oldstripes.next, struct stripe_head, lru);
   +  list_del(osh-lru);
   +  kmem_cache_free(conf-slab_cache, osh);
   +  }
   +  kmem_cache_destroy(conf-slab_cache);
   +  conf-slab_cache = sc;
   +  conf-active_name = 1-conf-active_name;
   +  conf-pool_size = newsize;
   +  return err;
   +}
 
 Are you sure the -ENOMEM handling here is solid?  It
 looks strange.

The philosophy of the -ENOMEM handling is (awkwardly?) embodied in the
comment
 * Finally we add new pages.  This could fail, but we leave
 * the stripe cache at it's new size, just with some pages empty.

at the top of the function.  The core function here is making some
data structures bigger.  In each case, having a bigger data structure
than required is no big deal.  So we try to increase the size of each
of them (the stripe_head cache, the 'disks' array, and the pages
allocated to each stripe.
If any of there fail we return -ENOMEM, but may allow others to
succeed.

Does that help?

NeilBrown
-
To unsubscribe from this list: send the line unsubscribe linux-raid in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html