Hi,

On Thu, Apr 07, 2011 at 03:57:50PM +0800, WuBo wrote:
> hi,all
> 
> I've been diging into the idea of chunk tree backups. Here is the 
> predesign, before finishing chunk alloc, the first block in this 
> chunk will be written in some information, these information will be 
> useful for chunk tree rebuilding if crash, also the first block will 
> be moved into fs_info->freed_extents[2], just as the super block.
> what we should do is making some changes in these functions:
> btrfs_make_block_group
> btrfs_read_block_groups
> btrfs_remove_block_group  
> what do you think about it?

while I don't quite understand what you are implementing, I can give you
a few comments on the code itself.

It would be good if you describe your design in more details. I think if
you are touching crash recoverya, some description of how the chunk
copies are used etc, and possibly if fsck can benefit from it.

> There's something strait with backward compatibility. The mkfs.btrfs
> has been made several chunks when creating the fs. It also need to do 
> the same thing as above. But it will be confusing in some situations 
> such as old fs mount on new kernel. I think it's better to add a 
> incompat flag in super block to mark weather the fs is formaten with
> new mkfs.btrfs.
> 
> if that's OK, TODOLIST:
> -design the information on chunk's first block to make it uniqueness

(I hope this covers the questions stated above)

> -backward compatibility handle(for example:fix mkfs.btrfs)
> 
> Signed-off-by: Wu Bo <wu...@cn.fujitsu.com>
> ---
>  fs/btrfs/ctree.h       |   13 +++-
>  fs/btrfs/extent-tree.c |  135 +++++++++++++++++++++++++++++++++++++-
>  fs/btrfs/volumes.c     |  168 
> ++++++++++++++++++++++++++++++++++++++++++++----
>  fs/btrfs/volumes.h     |   25 +++++++
>  4 files changed, 322 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 8b4b9d1..580dd1c 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -41,6 +41,7 @@ extern struct kmem_cache *btrfs_transaction_cachep;
>  extern struct kmem_cache *btrfs_bit_radix_cachep;
>  extern struct kmem_cache *btrfs_path_cachep;
>  struct btrfs_ordered_sum;
> +struct map_lookup;
>  
>  #define BTRFS_MAGIC "_BHRfS_M"
>  
> @@ -408,6 +409,7 @@ struct btrfs_super_block {
>  #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL        (1ULL << 1)
>  #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS  (1ULL << 2)
>  #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO  (1ULL << 3)
> +#define BTRFS_FEATURE_INCOMPAT_CHUNK_TREE_BACKUP (1ULL << 4)
>  
>  #define BTRFS_FEATURE_COMPAT_SUPP            0ULL
>  #define BTRFS_FEATURE_COMPAT_RO_SUPP         0ULL
> @@ -415,7 +417,8 @@ struct btrfs_super_block {
>       (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |         \
>        BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |        \
>        BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS |          \
> -      BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
> +      BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO |          \
> +      BTRFS_FEATURE_INCOMPAT_CHUNK_TREE_BACKUP)
>  
>  /*
>   * A leaf is full of items. offset and size tell us where to find
> @@ -2172,10 +2175,12 @@ int btrfs_extent_readonly(struct btrfs_root *root, 
> u64 bytenr);
>  int btrfs_free_block_groups(struct btrfs_fs_info *info);
>  int btrfs_read_block_groups(struct btrfs_root *root);
>  int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr);
> +
>  int btrfs_make_block_group(struct btrfs_trans_handle *trans,
> -                        struct btrfs_root *root, u64 bytes_used,
> -                        u64 type, u64 chunk_objectid, u64 chunk_offset,
> -                        u64 size);
> +                        struct btrfs_root *root, struct map_lookup *map,
> +                        u64 bytes_used, u64 type, u64 chunk_objectid,
> +                        u64 chunk_offset, u64 size);
> +
>  int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
>                            struct btrfs_root *root, u64 group_start);
>  u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index f1db57d..27ea7d5 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -23,6 +23,7 @@
>  #include <linux/rcupdate.h>
>  #include <linux/kthread.h>
>  #include <linux/slab.h>
> +#include <linux/buffer_head.h>
>  #include "compat.h"
>  #include "hash.h"
>  #include "ctree.h"
> @@ -231,6 +232,113 @@ static int exclude_super_stripes(struct btrfs_root 
> *root,
>       return 0;
>  }
>  
> +static int exclude_chunk_stripes_header_slow(struct btrfs_root *root,
> +                                     struct btrfs_block_group_cache *cache)
> +{
> +     int i;
> +     int nr;
> +     u64 devid;
> +     u64 physical;
> +     int stripe_len;
> +     u64 stripe_num;
> +     u64 *logical;
> +     struct btrfs_path *path;
> +     struct btrfs_key key;
> +     struct btrfs_chunk *chunk;
> +     struct btrfs_key found_key;
> +     struct extent_buffer *leaf;
> +     int ret;
> +
> +     ret = 0;
> +     path = btrfs_alloc_path();
> +     if (!path)
> +             return -1;
                return -ENOMEM;
> +
> +     root = root->fs_info->chunk_root;
> +
> +     key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
> +     key.offset = cache->key.objectid;
> +     key.type = BTRFS_CHUNK_ITEM_KEY;
> +
> +     ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> +     if (ret != 0)
> +             goto error;
> +
> +     btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
> +
> +     if (found_key.objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID ||
> +             btrfs_key_type(&found_key) != BTRFS_CHUNK_ITEM_KEY ||
> +             found_key.offset != cache->key.objectid) {
> +             ret = -1;
                      -1 does not seem a good return code

> +             goto error;
> +     }
> +
> +     leaf = path->nodes[0];
> +     chunk = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_chunk);
> +     stripe_num = btrfs_chunk_num_stripes(leaf, chunk);
> +
> +     i = 0;
> +     while (i < stripe_num) {
        for (i = 0; i < stripe_num; i++) {

> +             devid = btrfs_stripe_devid_nr(leaf, chunk, i);
> +             physical = btrfs_stripe_offset_nr(leaf, chunk, i);
> +
> +             ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
> +                             cache->key.objectid, physical, devid, &logical,
> +                             &nr, &stripe_len);

though btrfs_rmap_block() does return only 0 by now, it can potentially
return -ENOMEM (a BUG_ON is there right now), so add a check please

> +             while (nr--) {
> +                     add_excluded_extent(root, logical[nr], stripe_len);
> +
> +                     /* FIXME.here just use the bytes_super */
> +                     cache->bytes_super += stripe_len;
> +             }
> +
> +             kfree(logical);
> +             i++;
> +     }
> +
> +error:
> +     btrfs_free_path(path);
> +     return ret;
> +}
> +
> +static int exclude_chunk_stripes_header(struct btrfs_root *root,
> +                                     struct btrfs_block_group_cache *cache,
> +                                     struct map_lookup *map)
> +{
> +     int i;
> +     int nr;
> +     u64 devid;
> +     u64 physical;
> +     int stripe_len;
> +     u64 *logical;
> +     int ret;
> +
> +     if (!map)
> +             goto slow;
                return exclude_chunk_stripes_header_slow(root, cache);

no need to scroll down to look what it means 'slow'

> +
> +     i = 0;
> +     while (i < map->num_stripes) {
> +             devid = map->stripes[i].dev->devid;
> +             physical = map->stripes[i].physical;
> +             ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
> +                             cache->key.objectid, physical, devid, &logical,
> +                             &nr, &stripe_len);

return value check

> +             while (nr--) {
> +                     add_excluded_extent(root, logical[nr], stripe_len);
> +
> +                     /* FIXME.here just use the bytes_super */
> +                     cache->bytes_super += stripe_len;
> +             }
> +
> +             kfree(logical);
> +             i++;
> +     }
> +
> +     return 0;
> +slow:
> +     return exclude_chunk_stripes_header_slow(root, cache);
> +}
> +
>  static struct btrfs_caching_control *
>  get_caching_control(struct btrfs_block_group_cache *cache)
>  {
> @@ -8402,6 +8510,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
>       struct extent_buffer *leaf;
>       int need_clear = 0;
>       u64 cache_gen;
> +     u64 feature;
>  
>       root = info->extent_root;
>       key.objectid = 0;
> @@ -8470,6 +8579,15 @@ int btrfs_read_block_groups(struct btrfs_root *root)
>               exclude_super_stripes(root, cache);
>  
>               /*
> +              * FIXME, it's should consider the backward compatibility
> +              * also with the user tools for example  mkfs.btrfs
> +              * Maybe we can judge sb flags to determine whether exclude it
> +              */
> +             feature = 
> btrfs_super_incompat_flags(&root->fs_info->super_copy);
> +             if (feature & BTRFS_FEATURE_INCOMPAT_CHUNK_TREE_BACKUP)
> +                     exclude_chunk_stripes_header(root, cache, NULL);
> +
> +             /*
>                * check for two cases, either we are full, and therefore
>                * don't need to bother with the caching work since we won't
>                * find any space, or we are empty, and we can just add all
> @@ -8533,13 +8651,14 @@ error:
>  }
>  
>  int btrfs_make_block_group(struct btrfs_trans_handle *trans,
> -                        struct btrfs_root *root, u64 bytes_used,
> -                        u64 type, u64 chunk_objectid, u64 chunk_offset,
> -                        u64 size)
> +                        struct btrfs_root *root, struct map_lookup *map,
> +                        u64 bytes_used, u64 type, u64 chunk_objectid,
> +                        u64 chunk_offset, u64 size)
>  {
>       int ret;
>       struct btrfs_root *extent_root;
>       struct btrfs_block_group_cache *cache;
> +     u64 feature;
>  
>       extent_root = root->fs_info->extent_root;
>  
> @@ -8577,6 +8696,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle 
> *trans,
>       cache->cached = BTRFS_CACHE_FINISHED;
>       exclude_super_stripes(root, cache);
>  
> +     feature = btrfs_super_incompat_flags(&root->fs_info->super_copy);
> +     if (feature & BTRFS_FEATURE_INCOMPAT_CHUNK_TREE_BACKUP)
> +             exclude_chunk_stripes_header(root, cache, map);
> +
>       add_new_free_space(cache, root->fs_info, chunk_offset,
>                          chunk_offset + size);
>  
> @@ -8615,6 +8738,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle 
> *trans,
>       struct inode *inode;
>       int ret;
>       int factor;
> +     u64 feature;
> +
> +     /* erase the first block which record this chunk info */
> +     feature = btrfs_super_incompat_flags(&root->fs_info->super_copy);
> +     if (feature & BTRFS_FEATURE_INCOMPAT_CHUNK_TREE_BACKUP)
> +             erase_chunk_stripes_header(root, group_start);
>  
>       root = root->fs_info->extent_root;
>  
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 94334d9..a9ac2b1 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -33,17 +33,6 @@
>  #include "volumes.h"
>  #include "async-thread.h"
>  
> -struct map_lookup {
> -     u64 type;
> -     int io_align;
> -     int io_width;
> -     int stripe_len;
> -     int sector_size;
> -     int num_stripes;
> -     int sub_stripes;
> -     struct btrfs_bio_stripe stripes[];
> -};

already moved to .h

> -
>  static int init_first_rw_device(struct btrfs_trans_handle *trans,
>                               struct btrfs_root *root,
>                               struct btrfs_device *device);
> @@ -2667,7 +2656,7 @@ static int __btrfs_alloc_chunk(struct 
> btrfs_trans_handle *trans,
>       BUG_ON(ret);
>       free_extent_map(em);
>  
> -     ret = btrfs_make_block_group(trans, extent_root, 0, type,
> +     ret = btrfs_make_block_group(trans, extent_root, map, 0, type,
>                                    BTRFS_FIRST_CHUNK_TREE_OBJECTID,
>                                    start, *num_bytes);
>       BUG_ON(ret);
> @@ -2761,6 +2750,151 @@ static int __finish_chunk_alloc(struct 
> btrfs_trans_handle *trans,
>       return 0;
>  }
>  
> +static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
> +{
> +     char b[BDEVNAME_SIZE];
> +
> +     if (uptodate) {
> +             set_buffer_uptodate(bh);
> +     } else {
> +             if (!buffer_eopnotsupp(bh) && printk_ratelimit()) {
                                              ^^^^^^^^^^^^^^^^
should not be used in new code, it consults global ratelimit shared with
any printk_ratelimit caller. use printk_ratelimited (which maintains own
ratelimit counter)

> +                     printk(KERN_WARNING "lost page write due to I/O error 
> on %s\n",
> +                                    bdevname(bh->b_bdev, b));
> +             }
> +             clear_buffer_uptodate(bh);
> +     }
> +     unlock_buffer(bh);
> +     put_bh(bh);
> +}
> +
> +static int mark_chunk_stripes_header(struct btrfs_root *root,
> +             struct map_lookup *map, u64 chunk_offset,
> +             u64 chunk_size, u64 stripe_size, u64 flag)
> +{
> +     struct buffer_head *bh;
> +     struct btrfs_device *device = NULL;
> +     struct btrfs_stripe_header header;
> +     u64 bytenr;
> +     u32 sectorsize;
> +     int index;
> +     int ret;
> +
> +     ret = 0;
> +     index = 0;
> +     while (index < map->num_stripes) {
        for (index = 0; index < map->num_stripes; index++) {

> +             device = map->stripes[index].dev;
> +             bytenr = map->stripes[index].physical;
> +             sectorsize = root->sectorsize;
> +             do_div(bytenr, sectorsize);
> +             bh = __getblk(device->bdev, bytenr, sectorsize);
> +             if (!bh)
> +                     return -1;
> +
> +             memset(&header, 0, sizeof(header));
> +             header.magic =  cpu_to_le64(BTRFS_STRIPE_HEADER_MAGIC);
> +             header.chunk_offset = cpu_to_le64(chunk_offset);
> +             header.chunk_size = cpu_to_le64(chunk_size);
> +             header.stripe_size = cpu_to_le64(stripe_size);
> +             header.stripe_index = cpu_to_le32(index);
> +             header.flag = cpu_to_le64(flag);
> +             memcpy(header.fsid, root->fs_info->fsid, BTRFS_FSID_SIZE);
> +
> +             memset(bh->b_data, 0, sectorsize);
> +             memcpy(bh->b_data, &header, sizeof(header));
> +
> +             get_bh(bh);
> +             set_buffer_uptodate(bh);
> +             lock_buffer(bh);
> +             bh->b_end_io = btrfs_end_buffer_write_sync;
> +
> +             ret = submit_bh(WRITE_SYNC, bh);
> +             wait_on_buffer(bh);
> +             brelse(bh);
> +
> +             index++;
> +     }
> +     return ret;
> +}
> +
> +int erase_chunk_stripes_header(struct btrfs_root *root,
> +                                     u64 chunk_offset)
> +{
> +     int i;
> +     int ret;
> +     u64 devid;
> +     u64 physical;
> +     u32 sectorsize;
> +     u64 stripe_num;
> +     u8 uuid[BTRFS_UUID_SIZE];
> +     struct btrfs_path *path;
> +     struct btrfs_key key;
> +     struct btrfs_chunk *chunk;
> +     struct btrfs_key found_key;
> +     struct extent_buffer *leaf;
> +     struct btrfs_device *device;
> +     struct buffer_head *bh;
> +
> +     ret = 0;
> +     path = btrfs_alloc_path();
> +     if (!path)
> +             return -1;
                return -ENOMEM;

> +
> +     root = root->fs_info->chunk_root;
> +
> +     key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
> +     key.offset = chunk_offset;
> +     key.type = BTRFS_CHUNK_ITEM_KEY;
> +
> +     ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> +     if (ret != 0) {
> +             ret = -1;
something else than -1

> +             goto error;
> +     }
> +
> +     btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
> +
> +     if (found_key.objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID ||
> +             btrfs_key_type(&found_key) != BTRFS_CHUNK_ITEM_KEY ||
> +             found_key.offset != chunk_offset) {
> +             ret = -1;
something else than -1

> +             goto error;
> +     }
> +
> +     leaf = path->nodes[0];
> +     chunk = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_chunk);
> +     stripe_num = btrfs_chunk_num_stripes(leaf, chunk);
> +     i = 0;
> +     while (i < stripe_num) {
        for (...)

> +             devid = btrfs_stripe_devid_nr(leaf, chunk, i);
> +             physical = btrfs_stripe_offset_nr(leaf, chunk, i);
> +             read_extent_buffer(leaf, uuid, (unsigned long)
> +                             btrfs_stripe_dev_uuid_nr(chunk, i),
> +                             BTRFS_UUID_SIZE);
> +             device = btrfs_find_device(root, devid, uuid, NULL);
> +
> +             sectorsize = root->sectorsize;
> +             do_div(physical, sectorsize);
> +             bh = __getblk(device->bdev, physical, sectorsize);
> +             if (!bh) {
> +                     i++;
> +                     continue;
> +             }
> +             memset(bh->b_data, 0, sectorsize);
> +             get_bh(bh);
> +             set_buffer_uptodate(bh);
> +             lock_buffer(bh);
> +             bh->b_end_io = btrfs_end_buffer_write_sync;
> +
> +             submit_bh(WRITE_SYNC, bh);
> +             wait_on_buffer(bh);
> +             brelse(bh);
> +             i++;
> +     }
> +error:
> +     btrfs_free_path(path);
> +     return ret;
> +}
> +
>  /*
>   * Chunk allocation falls into two parts. The first part does works
>   * that make the new allocated chunk useable, but not do any operation
> @@ -2777,6 +2911,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
>       struct map_lookup *map;
>       struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
>       int ret;
> +     u64 feature;
>  
>       ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
>                             &chunk_offset);
> @@ -2791,6 +2926,15 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
>       ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
>                                  chunk_size, stripe_size);
>       BUG_ON(ret);
> +
> +     feature = btrfs_super_incompat_flags(&extent_root->fs_info->super_copy);
> +     if (feature & BTRFS_FEATURE_INCOMPAT_CHUNK_TREE_BACKUP) {
> +             ret = mark_chunk_stripes_header(extent_root, map, chunk_offset,
> +                                     chunk_size, stripe_size, type);
> +             if (ret)
> +                     return ret;
> +     }
> +
>       return 0;
>  }
>  
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 7af6144..7166a3b 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -146,6 +146,31 @@ struct btrfs_device_info {
>       u64 max_avail;
>  };
>  
> +struct map_lookup {
> +     u64 type;
> +     int io_align;
> +     int io_width;
> +     int stripe_len;
> +     int sector_size;
> +     int num_stripes;
> +     int sub_stripes;
> +     struct btrfs_bio_stripe stripes[];
> +};

already present in .h

> +
> +#define BTRFS_STRIPE_HEADER_MAGIC 111111

this is not a *magic* number! :)

> +
> +struct btrfs_stripe_header {
> +     __le64 magic;
> +     __le64 chunk_offset;
> +     __le64 chunk_size;
> +     __le64 stripe_size;
> +     __le32 stripe_index;
> +     __le64 flag;
> +     u8 fsid[BTRFS_FSID_SIZE];
> +} __attribute__ ((__packed__));

why is it packed? it's an in-memory-only, ordering of stripe_index and
flag will cause unaligned access of flag. though compiler knows this and
will generate only more instructions, not really an unaligned access,
but still a bit ineffective.

> +
> +int erase_chunk_stripes_header(struct btrfs_root *root, u64 chunk_offset);
> +
>  /* Used to sort the devices by max_avail(descending sort) */
>  int btrfs_cmp_device_free_bytes(const void *dev_info1, const void 
> *dev_info2);

dave
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to