Re: [PATCH 6/8] Add support for recovery for a raid5 btrfs profiles.

2018-05-16 Thread Goffredo Baroncelli
On 05/14/2018 08:40 PM, Daniel Kiper wrote:
> On Fri, May 11, 2018 at 09:24:44PM +0200, Goffredo Baroncelli wrote:
>> Signed-off-by: Goffredo Baroncelli 
>> ---
>>  grub-core/fs/btrfs.c | 178 +--
>>  1 file changed, 173 insertions(+), 5 deletions(-)
>>
>> diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
>> index fc4198e39..8d72607d1 100644
>> --- a/grub-core/fs/btrfs.c
>> +++ b/grub-core/fs/btrfs.c
>> @@ -29,6 +29,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>
>>  GRUB_MOD_LICENSE ("GPLv3+");
>>
>> @@ -663,9 +664,156 @@ btrfs_read_from_chunk (struct grub_btrfs_data *data,
>>  err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
>>paddr & (GRUB_DISK_SECTOR_SIZE - 1),
>>csize, buf);
>> +grub_dprintf ("btrfs", "reading paddr 0x%" PRIxGRUB_UINT64_T "\n", 
>> paddr);
>>  return err;
>>  }
>>
>> +struct raid56_buffer {
>> +  void *buf;
>> +  int  data_is_valid;
>> +};
>> +
>> +static void
>> +rebuild_raid5 (struct raid56_buffer *buffers, grub_uint64_t nstripes,
>> +   grub_uint64_t csize)
>> +{
>> +  grub_uint64_t target = 0, i;
>> +
>> +  while (buffers[target].data_is_valid && target < nstripes)
>> +++target;
>> +
>> +  if (target == nstripes)
>> +{
>> +  grub_dprintf ("btrfs", "called rebuild_raid5(), but all disks are 
>> OK\n");
>> +  return;
>> +}
>> +
>> +  grub_dprintf ("btrfs", "rebuilding raid5 stripe #%" PRIuGRUB_UINT64_T 
>> "\n",
>> +target);
>> +  for (i = 0; i < nstripes ; i++)
> 
> Please drop extra space behind nstripes. I have found
> similar issues below too. Please fix all of them.
> 
>> +if (i != target)
>> +  grub_crypto_xor (buffers[target].buf, buffers[target].buf, 
>> buffers[i].buf,
>> +   csize);
>> +}
>> +
>> +static grub_err_t
>> +raid56_read_retry (struct grub_btrfs_data *data,
>> +   struct grub_btrfs_chunk_item *chunk,
>> +   grub_uint64_t stripe_offset, grub_uint64_t stripen,
>> +   grub_uint64_t csize, void *buf)
>> +{
>> +
>> +  struct raid56_buffer *buffers = NULL;
>> +  grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
>> +  grub_uint64_t chunk_type = grub_le_to_cpu64 (chunk->type);
>> +  grub_err_t ret = GRUB_ERR_NONE;
>> +  grub_uint64_t i, failed_devices;
>> +
>> +  buffers = grub_zalloc (sizeof(*buffers) * nstripes);
>> +  if (!buffers)
>> +{
>> +  ret = GRUB_ERR_OUT_OF_MEMORY;
>> +  goto cleanup;
>> +}
>> +
>> +  for (i = 0; i < nstripes ; i++)
> 
> Ditto.
> 
>> +{
>> +  buffers[i].buf = grub_zalloc (csize);
>> +  if (!buffers[i].buf)
>> +{
>> +  ret = GRUB_ERR_OUT_OF_MEMORY;
>> +  goto cleanup;
>> +}
>> +}
>> +
>> +  for (i = 0; i < nstripes ; i++)
> 
> Ditto.
> 
>> +{
>> +  struct grub_btrfs_chunk_stripe *stripe;
>> +  grub_disk_addr_t paddr;
>> +  grub_device_t dev;
>> +  grub_err_t err2;
>> +
>> +  stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1);
>> +  stripe += i;
>> +
>> +  paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
>> +  grub_dprintf ("btrfs", "reading paddr %" PRIxGRUB_UINT64_T
>> +" from stripe ID %" PRIxGRUB_UINT64_T "\n", paddr,
>> +stripe->device_id);
>> +
>> +  /* FIXME: rescan the devices */
> 
> Could you be more verbose here?
This was an initial comment. After few thoughts I concluded that it was wrong. 
So I 
removed it in the next patches set.

> 
>> +  dev = find_device (data, stripe->device_id);
>> +  if (!dev)
>> +{
>> +  buffers[i].data_is_valid = 0;
>> +  grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " FAILED (dev ID 
>> %"
>> +PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
>> +  continue;
>> +}
>> +
>> +  err2 = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
>> + paddr & (GRUB_DISK_SECTOR_SIZE - 1),
>> + csize, buffers[i].buf);
>> +  if (err2 == GRUB_ERR_NONE)
>> +{
>> +  buffers[i].data_is_valid = 1;
>> +  grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " Ok (dev ID %"
>> +PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
>> +}
>> +  else
>> +{
>> +  buffers[i].data_is_valid = 0;
>> +  grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T
>> +" FAILED (dev ID %" PRIxGRUB_UINT64_T ")\n", i,
>> +stripe->device_id);
>> +}
>> +}
>> +
>> +  failed_devices = 0;
>> +  for (i = 0; i < nstripes ; i++)
>> +if (!buffers[i].data_is_valid)
>> +  ++failed_devices;
>> +  if (failed_devices > 1 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5))
>> +{
>> +  grub_dprintf ("btrfs",
>> +"not enough disks for raid5: total %" PRIuGRUB_UINT64_T
>> +", missing %" PRIuGRUB_UINT64_T "\n",
>> +n

Re: [PATCH 6/8] Add support for recovery for a raid5 btrfs profiles.

2018-05-14 Thread Daniel Kiper
On Fri, May 11, 2018 at 09:24:44PM +0200, Goffredo Baroncelli wrote:
> Signed-off-by: Goffredo Baroncelli 
> ---
>  grub-core/fs/btrfs.c | 178 +--
>  1 file changed, 173 insertions(+), 5 deletions(-)
>
> diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
> index fc4198e39..8d72607d1 100644
> --- a/grub-core/fs/btrfs.c
> +++ b/grub-core/fs/btrfs.c
> @@ -29,6 +29,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  GRUB_MOD_LICENSE ("GPLv3+");
>
> @@ -663,9 +664,156 @@ btrfs_read_from_chunk (struct grub_btrfs_data *data,
>  err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
> paddr & (GRUB_DISK_SECTOR_SIZE - 1),
> csize, buf);
> +grub_dprintf ("btrfs", "reading paddr 0x%" PRIxGRUB_UINT64_T "\n", 
> paddr);
>  return err;
>  }
>
> +struct raid56_buffer {
> +  void *buf;
> +  int  data_is_valid;
> +};
> +
> +static void
> +rebuild_raid5 (struct raid56_buffer *buffers, grub_uint64_t nstripes,
> +   grub_uint64_t csize)
> +{
> +  grub_uint64_t target = 0, i;
> +
> +  while (buffers[target].data_is_valid && target < nstripes)
> +++target;
> +
> +  if (target == nstripes)
> +{
> +  grub_dprintf ("btrfs", "called rebuild_raid5(), but all disks are 
> OK\n");
> +  return;
> +}
> +
> +  grub_dprintf ("btrfs", "rebuilding raid5 stripe #%" PRIuGRUB_UINT64_T "\n",
> + target);
> +  for (i = 0; i < nstripes ; i++)

Please drop extra space behind nstripes. I have found
similar issues below too. Please fix all of them.

> +if (i != target)
> +  grub_crypto_xor (buffers[target].buf, buffers[target].buf, 
> buffers[i].buf,
> +   csize);
> +}
> +
> +static grub_err_t
> +raid56_read_retry (struct grub_btrfs_data *data,
> +struct grub_btrfs_chunk_item *chunk,
> +grub_uint64_t stripe_offset, grub_uint64_t stripen,
> +grub_uint64_t csize, void *buf)
> +{
> +
> +  struct raid56_buffer *buffers = NULL;
> +  grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
> +  grub_uint64_t chunk_type = grub_le_to_cpu64 (chunk->type);
> +  grub_err_t ret = GRUB_ERR_NONE;
> +  grub_uint64_t i, failed_devices;
> +
> +  buffers = grub_zalloc (sizeof(*buffers) * nstripes);
> +  if (!buffers)
> +{
> +  ret = GRUB_ERR_OUT_OF_MEMORY;
> +  goto cleanup;
> +}
> +
> +  for (i = 0; i < nstripes ; i++)

Ditto.

> +{
> +  buffers[i].buf = grub_zalloc (csize);
> +  if (!buffers[i].buf)
> + {
> +   ret = GRUB_ERR_OUT_OF_MEMORY;
> +   goto cleanup;
> + }
> +}
> +
> +  for (i = 0; i < nstripes ; i++)

Ditto.

> +{
> +  struct grub_btrfs_chunk_stripe *stripe;
> +  grub_disk_addr_t paddr;
> +  grub_device_t dev;
> +  grub_err_t err2;
> +
> +  stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1);
> +  stripe += i;
> +
> +  paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
> +  grub_dprintf ("btrfs", "reading paddr %" PRIxGRUB_UINT64_T
> +" from stripe ID %" PRIxGRUB_UINT64_T "\n", paddr,
> +stripe->device_id);
> +
> +  /* FIXME: rescan the devices */

Could you be more verbose here?

> +  dev = find_device (data, stripe->device_id);
> +  if (!dev)
> + {
> +   buffers[i].data_is_valid = 0;
> +   grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " FAILED (dev ID 
> %"
> + PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
> +   continue;
> + }
> +
> +  err2 = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
> +  paddr & (GRUB_DISK_SECTOR_SIZE - 1),
> +  csize, buffers[i].buf);
> +  if (err2 == GRUB_ERR_NONE)
> + {
> +   buffers[i].data_is_valid = 1;
> +   grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " Ok (dev ID %"
> + PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
> + }
> +  else
> + {
> +   buffers[i].data_is_valid = 0;
> +   grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T
> + " FAILED (dev ID %" PRIxGRUB_UINT64_T ")\n", i,
> + stripe->device_id);
> + }
> +}
> +
> +  failed_devices = 0;
> +  for (i = 0; i < nstripes ; i++)
> +if (!buffers[i].data_is_valid)
> +  ++failed_devices;
> +  if (failed_devices > 1 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5))
> +{
> +  grub_dprintf ("btrfs",
> + "not enough disks for raid5: total %" PRIuGRUB_UINT64_T
> + ", missing %" PRIuGRUB_UINT64_T "\n",
> + nstripes, failed_devices);
> +  ret = GRUB_ERR_READ_ERROR;
> +  goto cleanup;
> +}
> +  else
> +{
> +  grub_dprintf ("btrfs",
> +"enough disks for raid5/6 rebuilding: total %"
> + PRIuGRUB_UINT64_T ", missing %" PRIuGRUB_UINT64_T "\n",
> + 

[PATCH 6/8] Add support for recovery for a raid5 btrfs profiles.

2018-05-11 Thread Goffredo Baroncelli
Signed-off-by: Goffredo Baroncelli 
---
 grub-core/fs/btrfs.c | 178 +--
 1 file changed, 173 insertions(+), 5 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index fc4198e39..8d72607d1 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 
 GRUB_MOD_LICENSE ("GPLv3+");
 
@@ -663,9 +664,156 @@ btrfs_read_from_chunk (struct grub_btrfs_data *data,
 err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
  paddr & (GRUB_DISK_SECTOR_SIZE - 1),
  csize, buf);
+grub_dprintf ("btrfs", "reading paddr 0x%" PRIxGRUB_UINT64_T "\n", paddr);
 return err;
 }
 
+struct raid56_buffer {
+  void *buf;
+  int  data_is_valid;
+};
+
+static void
+rebuild_raid5 (struct raid56_buffer *buffers, grub_uint64_t nstripes,
+   grub_uint64_t csize)
+{
+  grub_uint64_t target = 0, i;
+
+  while (buffers[target].data_is_valid && target < nstripes)
+++target;
+
+  if (target == nstripes)
+{
+  grub_dprintf ("btrfs", "called rebuild_raid5(), but all disks are OK\n");
+  return;
+}
+
+  grub_dprintf ("btrfs", "rebuilding raid5 stripe #%" PRIuGRUB_UINT64_T "\n",
+   target);
+  for (i = 0; i < nstripes ; i++)
+if (i != target)
+  grub_crypto_xor (buffers[target].buf, buffers[target].buf, 
buffers[i].buf,
+   csize);
+}
+
+static grub_err_t
+raid56_read_retry (struct grub_btrfs_data *data,
+  struct grub_btrfs_chunk_item *chunk,
+  grub_uint64_t stripe_offset, grub_uint64_t stripen,
+  grub_uint64_t csize, void *buf)
+{
+
+  struct raid56_buffer *buffers = NULL;
+  grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
+  grub_uint64_t chunk_type = grub_le_to_cpu64 (chunk->type);
+  grub_err_t ret = GRUB_ERR_NONE;
+  grub_uint64_t i, failed_devices;
+
+  buffers = grub_zalloc (sizeof(*buffers) * nstripes);
+  if (!buffers)
+{
+  ret = GRUB_ERR_OUT_OF_MEMORY;
+  goto cleanup;
+}
+
+  for (i = 0; i < nstripes ; i++)
+{
+  buffers[i].buf = grub_zalloc (csize);
+  if (!buffers[i].buf)
+   {
+ ret = GRUB_ERR_OUT_OF_MEMORY;
+ goto cleanup;
+   }
+}
+
+  for (i = 0; i < nstripes ; i++)
+{
+  struct grub_btrfs_chunk_stripe *stripe;
+  grub_disk_addr_t paddr;
+  grub_device_t dev;
+  grub_err_t err2;
+
+  stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1);
+  stripe += i;
+
+  paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
+  grub_dprintf ("btrfs", "reading paddr %" PRIxGRUB_UINT64_T
+" from stripe ID %" PRIxGRUB_UINT64_T "\n", paddr,
+stripe->device_id);
+
+  /* FIXME: rescan the devices */
+  dev = find_device (data, stripe->device_id);
+  if (!dev)
+   {
+ buffers[i].data_is_valid = 0;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " FAILED (dev ID 
%"
+   PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ continue;
+   }
+
+  err2 = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
+paddr & (GRUB_DISK_SECTOR_SIZE - 1),
+csize, buffers[i].buf);
+  if (err2 == GRUB_ERR_NONE)
+   {
+ buffers[i].data_is_valid = 1;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " Ok (dev ID %"
+   PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+   }
+  else
+   {
+ buffers[i].data_is_valid = 0;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T
+   " FAILED (dev ID %" PRIxGRUB_UINT64_T ")\n", i,
+   stripe->device_id);
+   }
+}
+
+  failed_devices = 0;
+  for (i = 0; i < nstripes ; i++)
+if (!buffers[i].data_is_valid)
+  ++failed_devices;
+  if (failed_devices > 1 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5))
+{
+  grub_dprintf ("btrfs",
+   "not enough disks for raid5: total %" PRIuGRUB_UINT64_T
+   ", missing %" PRIuGRUB_UINT64_T "\n",
+   nstripes, failed_devices);
+  ret = GRUB_ERR_READ_ERROR;
+  goto cleanup;
+}
+  else
+{
+  grub_dprintf ("btrfs",
+"enough disks for raid5/6 rebuilding: total %"
+   PRIuGRUB_UINT64_T ", missing %" PRIuGRUB_UINT64_T "\n",
+nstripes, failed_devices);
+}
+
+  /* if these are enough, try to rebuild the data */
+  if (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+{
+  rebuild_raid5 (buffers, nstripes, csize);
+  grub_memcpy (buf, buffers[stripen].buf, csize);
+}
+  else
+{
+  grub_dprintf ("btrfs", "called rebuild_raid6(), NOT IMPLEMENTED\n");
+}
+
+cleanup:
+
+  if (buffers)
+{
+  for (i = 0; i < nstripes ; i++)
+   if (buff