Re: [PATCH 2/4] btrfs: add support for 3-copy replication (raid1c3)

2018-07-17 Thread David Sterba
On Fri, Jul 13, 2018 at 11:02:03PM +0200, Goffredo Baroncelli wrote:
> As general comment, good to hear that something is moving around raid5/6 + 
> write hole and multiple mirroring.
> However I am guessing if this is time to simplify the RAID code. There are a 
> lot of "if" which could be avoided using 
> the values stored in the array "btrfs_raid_array[]".

I absolutely agree and had the same impression during implementing the
feature. For this patchset I did only a minimal prep work, the
suggestions you give below make sense to me.

Enhancing the table would make a lot of code go away and just use one
formula to calculate the results that are now opencoded. I'll be going
through the raid code so I'll get to the cleanups eventually.

> Below some comments:

> > @@ -5075,6 +5093,8 @@ static inline int btrfs_chunk_max_errors(struct 
> > map_lookup *map)
> >  BTRFS_BLOCK_GROUP_RAID5 |
> >  BTRFS_BLOCK_GROUP_DUP)) {
> > max_errors = 1;
> > +   } else if (map->type & BTRFS_BLOCK_GROUP_RAID1C3) {
> > +   max_errors = 2;
> > } else if (map->type & BTRFS_BLOCK_GROUP_RAID6) {
> > max_errors = 2;
> > } else {
> 
> Even in this case the ifs above could be replaced with something like:
> 
>   index = btrfs_bg_flags_to_raid_index(map->type)
>   max_errors = btrfs_raid_array[index].ncopies-1;

There's .tolerated_failures that should equal ncopies - 1 in general,
but does not for DUP so the semantics of the function and caller needs
to be verified.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/4] btrfs: add support for 3-copy replication (raid1c3)

2018-07-13 Thread Goffredo Baroncelli
As general comment, good to hear that something is moving around raid5/6 + 
write hole and multiple mirroring.
However I am guessing if this is time to simplify the RAID code. There are a 
lot of "if" which could be avoided using 
the values stored in the array "btrfs_raid_array[]".

Below some comments:

On 07/13/2018 08:46 PM, David Sterba wrote:
> Add new block group profile to store 3 copies in a simliar way that
> current RAID1 does. The profile name is temporary and may change in the
> future.
> 
> Signed-off-by: David Sterba 
> ---
>  fs/btrfs/extent-tree.c  |  6 +
>  fs/btrfs/relocation.c   |  1 +
>  fs/btrfs/scrub.c|  3 ++-
>  fs/btrfs/super.c|  3 +++
>  fs/btrfs/volumes.c  | 40 -
>  fs/btrfs/volumes.h  |  2 ++
>  include/uapi/linux/btrfs.h  |  3 ++-
>  include/uapi/linux/btrfs_tree.h |  3 +++
>  8 files changed, 53 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 4ffa64e288da..47f929dcc3d4 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -7527,6 +7527,7 @@ static noinline int find_free_extent(struct 
> btrfs_fs_info *fs_info,
>   if (!block_group_bits(block_group, flags)) {
>   u64 extra = BTRFS_BLOCK_GROUP_DUP |
>   BTRFS_BLOCK_GROUP_RAID1 |
> + BTRFS_BLOCK_GROUP_RAID1C3 |
>   BTRFS_BLOCK_GROUP_RAID5 |
>   BTRFS_BLOCK_GROUP_RAID6 |
>   BTRFS_BLOCK_GROUP_RAID10;

"extra" could be created iterating on btrfs_raid_array[] and considering only 
the item with ncopies > 1; or we could add 

#define BTRFS_BLOCK_GROUP_REDUNDANCY (BTRFS_BLOCK_GROUP_DUP| .)

This constant could be used also below

> @@ -9330,6 +9331,8 @@ static u64 update_block_group_flags(struct 
> btrfs_fs_info *fs_info, u64 flags)
>  
>   num_devices = fs_info->fs_devices->rw_devices;
>  
> + ASSERT(!(flags & BTRFS_BLOCK_GROUP_RAID1C3));
> +
>   stripped = BTRFS_BLOCK_GROUP_RAID0 |
>   BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
>   BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
> @@ -9647,6 +9650,8 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, 
> u64 bytenr)
>   min_free >>= 1;
>   } else if (index == BTRFS_RAID_RAID1) {
>   dev_min = 2;
> + } else if (index == BTRFS_RAID_RAID1C3) {
> + dev_min = 3;
>   } else if (index == BTRFS_RAID_DUP) {
>   /* Multiply by 2 */
>   min_free <<= 1;

The "if"s above could be simplified as:

dev_min = btrfs_raid_array[index].devs_min;
if (index == BTRFS_RAID_DUP)
min_free <<= 1;
else if (index == BTRFS_RAID_RAID0)
min_free = div64_u64(min_free, dev_min);
else if (index == BTRFS_RAID_RAID10)
min_free >>= 1;


> @@ -10141,6 +10146,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info 
> *info)
>   if (!(get_alloc_profile(info, space_info->flags) &
> (BTRFS_BLOCK_GROUP_RAID10 |
>  BTRFS_BLOCK_GROUP_RAID1 |
> +BTRFS_BLOCK_GROUP_RAID1C3 |
>  BTRFS_BLOCK_GROUP_RAID5 |
>  BTRFS_BLOCK_GROUP_RAID6 |
>  BTRFS_BLOCK_GROUP_DUP)))


See above about BTRFS_BLOCK_GROUP_REDUNDANCY

> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index 879b76fa881a..fea9e7e96b87 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -4339,6 +4339,7 @@ static void describe_relocation(struct btrfs_fs_info 
> *fs_info,
>   DESCRIBE_FLAG(METADATA, "metadata");

The code below, could be performed searching in the array btrfs_raid_array[] 
instead of checking each possibility

>   DESCRIBE_FLAG(RAID0,"raid0");
>   DESCRIBE_FLAG(RAID1,"raid1");
> + DESCRIBE_FLAG(RAID1C3,  "raid1c3");
>   DESCRIBE_FLAG(DUP,  "dup");
>   DESCRIBE_FLAG(RAID10,   "raid10");
>   DESCRIBE_FLAG(RAID5,"raid5");



> diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
> index 572306036477..e9355759f2ec 100644
> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -3388,7 +3388,8 @@ static noinline_for_stack int scrub_stripe(struct 
> scrub_ctx *sctx,
>   offset = map->stripe_len * (num / map->sub_stripes);
>   increment = map->stripe_len * factor;
>   mirror_num = num % map->sub_stripes + 1;
> - } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
> + } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
> + BTRFS_BLOCK_GROUP_RAID1C3)) {
>   increment = map->stripe_len;
>   mirror_num = num % map->num_stripes + 1;
>   } else if (map->type & 

[PATCH 2/4] btrfs: add support for 3-copy replication (raid1c3)

2018-07-13 Thread David Sterba
Add new block group profile to store 3 copies in a simliar way that
current RAID1 does. The profile name is temporary and may change in the
future.

Signed-off-by: David Sterba 
---
 fs/btrfs/extent-tree.c  |  6 +
 fs/btrfs/relocation.c   |  1 +
 fs/btrfs/scrub.c|  3 ++-
 fs/btrfs/super.c|  3 +++
 fs/btrfs/volumes.c  | 40 -
 fs/btrfs/volumes.h  |  2 ++
 include/uapi/linux/btrfs.h  |  3 ++-
 include/uapi/linux/btrfs_tree.h |  3 +++
 8 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4ffa64e288da..47f929dcc3d4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7527,6 +7527,7 @@ static noinline int find_free_extent(struct btrfs_fs_info 
*fs_info,
if (!block_group_bits(block_group, flags)) {
u64 extra = BTRFS_BLOCK_GROUP_DUP |
BTRFS_BLOCK_GROUP_RAID1 |
+   BTRFS_BLOCK_GROUP_RAID1C3 |
BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID6 |
BTRFS_BLOCK_GROUP_RAID10;
@@ -9330,6 +9331,8 @@ static u64 update_block_group_flags(struct btrfs_fs_info 
*fs_info, u64 flags)
 
num_devices = fs_info->fs_devices->rw_devices;
 
+   ASSERT(!(flags & BTRFS_BLOCK_GROUP_RAID1C3));
+
stripped = BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
@@ -9647,6 +9650,8 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 
bytenr)
min_free >>= 1;
} else if (index == BTRFS_RAID_RAID1) {
dev_min = 2;
+   } else if (index == BTRFS_RAID_RAID1C3) {
+   dev_min = 3;
} else if (index == BTRFS_RAID_DUP) {
/* Multiply by 2 */
min_free <<= 1;
@@ -10141,6 +10146,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
if (!(get_alloc_profile(info, space_info->flags) &
  (BTRFS_BLOCK_GROUP_RAID10 |
   BTRFS_BLOCK_GROUP_RAID1 |
+  BTRFS_BLOCK_GROUP_RAID1C3 |
   BTRFS_BLOCK_GROUP_RAID5 |
   BTRFS_BLOCK_GROUP_RAID6 |
   BTRFS_BLOCK_GROUP_DUP)))
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 879b76fa881a..fea9e7e96b87 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4339,6 +4339,7 @@ static void describe_relocation(struct btrfs_fs_info 
*fs_info,
DESCRIBE_FLAG(METADATA, "metadata");
DESCRIBE_FLAG(RAID0,"raid0");
DESCRIBE_FLAG(RAID1,"raid1");
+   DESCRIBE_FLAG(RAID1C3,  "raid1c3");
DESCRIBE_FLAG(DUP,  "dup");
DESCRIBE_FLAG(RAID10,   "raid10");
DESCRIBE_FLAG(RAID5,"raid5");
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 572306036477..e9355759f2ec 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3388,7 +3388,8 @@ static noinline_for_stack int scrub_stripe(struct 
scrub_ctx *sctx,
offset = map->stripe_len * (num / map->sub_stripes);
increment = map->stripe_len * factor;
mirror_num = num % map->sub_stripes + 1;
-   } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+   } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+   BTRFS_BLOCK_GROUP_RAID1C3)) {
increment = map->stripe_len;
mirror_num = num % map->num_stripes + 1;
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4f646b66cc06..86e6aa5ef788 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1977,6 +1977,9 @@ static int btrfs_calc_avail_data_space(struct 
btrfs_fs_info *fs_info,
} else if (type & BTRFS_BLOCK_GROUP_RAID1) {
min_stripes = 2;
num_stripes = 2;
+   } else if (type & BTRFS_BLOCK_GROUP_RAID1C3) {
+   min_stripes = 3;
+   num_stripes = 3;
} else if (type & BTRFS_BLOCK_GROUP_RAID10) {
min_stripes = 4;
num_stripes = 4;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 45635f4d78c8..0920b31e999d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -116,6 +116,18 @@ const struct btrfs_raid_attr 
btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.bg_flag= BTRFS_BLOCK_GROUP_RAID6,
.mindev_error   = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
},
+   [BTRFS_RAID_RAID1C3] = {
+   .sub_stripes= 1,
+   .dev_stripes= 1,
+   .devs_max   = 0,
+   .devs_min   = 3,
+