On 2/1/21 4:28 PM, Goffredo Baroncelli wrote:
From: Goffredo Baroncelli <kreij...@inwind.it>

When this mode is enabled, the chunk allocation policy is modified as follow.

Each disk may have a different tag:
- BTRFS_DEV_ALLOCATION_PREFERRED_METADATA
- BTRFS_DEV_ALLOCATION_METADATA_ONLY
- BTRFS_DEV_ALLOCATION_DATA_ONLY
- BTRFS_DEV_ALLOCATION_PREFERRED_DATA (default)

Where:
- ALLOCATION_PREFERRED_X means that it is preferred to use this disk for the
X chunk type (the other type may be allowed when the space is low)
- ALLOCATION_X_ONLY means that it is used *only* for the X chunk type. This
means also that it is a preferred choice.

Each time the allocator allocates a chunk of type X , first it takes the disks
tagged as ALLOCATION_X_ONLY or ALLOCATION_PREFERRED_X; if the space is not
enough, it uses also the disks tagged as ALLOCATION_METADATA_ONLY; if the space
is not enough, it uses also the other disks, with the exception of the one
marked as ALLOCATION_PREFERRED_Y, where Y the other type of chunk (i.e. not X).

Signed-off-by: Goffredo Baroncelli <kreij...@inwind.it>
---
  fs/btrfs/volumes.c | 81 +++++++++++++++++++++++++++++++++++++++++++++-
  fs/btrfs/volumes.h |  1 +
  2 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 68b346c5465d..57ee3e2fdac0 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4806,13 +4806,18 @@ static int btrfs_add_system_chunk(struct btrfs_fs_info 
*fs_info,
  }
/*
- * sort the devices in descending order by max_avail, total_avail
+ * sort the devices in descending order by alloc_hint,
+ * max_avail, total_avail
   */
  static int btrfs_cmp_device_info(const void *a, const void *b)
  {
        const struct btrfs_device_info *di_a = a;
        const struct btrfs_device_info *di_b = b;
+ if (di_a->alloc_hint > di_b->alloc_hint)
+               return -1;
+       if (di_a->alloc_hint < di_b->alloc_hint)
+               return 1;
        if (di_a->max_avail > di_b->max_avail)
                return -1;
        if (di_a->max_avail < di_b->max_avail)
@@ -4939,6 +4944,15 @@ static int gather_device_info(struct btrfs_fs_devices 
*fs_devices,
        int ndevs = 0;
        u64 max_avail;
        u64 dev_offset;
+       int hint;
+
+       static const char alloc_hint_map[BTRFS_DEV_ALLOCATION_MASK_COUNT] = {
+               [BTRFS_DEV_ALLOCATION_DATA_ONLY] = -1,
+               [BTRFS_DEV_ALLOCATION_PREFERRED_DATA] = 0,
+               [BTRFS_DEV_ALLOCATION_METADATA_ONLY] = 1,
+               [BTRFS_DEV_ALLOCATION_PREFERRED_METADATA] = 2
+               /* the other values are set to 0 */
+       };

This can be made global, up with the btrfs_raid_array definitions.

/*
         * in the first pass through the devices list, we gather information
@@ -4991,16 +5005,81 @@ static int gather_device_info(struct btrfs_fs_devices 
*fs_devices,
                devices_info[ndevs].max_avail = max_avail;
                devices_info[ndevs].total_avail = total_avail;
                devices_info[ndevs].dev = device;
+
+               if (((ctl->type & BTRFS_BLOCK_GROUP_DATA) &&
+                    (ctl->type & BTRFS_BLOCK_GROUP_METADATA)) ||
+                   info->allocation_hint_mode ==
+                    BTRFS_ALLOCATION_HINT_DISABLED) {
+                       /*
+                        * if mixed bg or the allocator hint is
+                        * disable, set all the alloc_hint
+                        * fields to the same value, so the sorting
+                        * is not affected
+                        */
+                       devices_info[ndevs].alloc_hint = 0;
+               } else if(ctl->type & BTRFS_BLOCK_GROUP_DATA) {
+                       hint = device->type & BTRFS_DEV_ALLOCATION_MASK;
+
+                       /*
+                        * skip BTRFS_DEV_METADATA_ONLY disks
+                        */
+                       if (hint == BTRFS_DEV_ALLOCATION_METADATA_ONLY)
+                               continue;
+                       /*
+                        * if a data chunk must be allocated,
+                        * sort also by hint (data disk
+                        * higher priority)
+                        */
+                       devices_info[ndevs].alloc_hint = -alloc_hint_map[hint];
+               } else { /* BTRFS_BLOCK_GROUP_METADATA */
+                       hint = device->type & BTRFS_DEV_ALLOCATION_MASK;
+
+                       /*
+                        * skip BTRFS_DEV_DATA_ONLY disks
+                        */
+                       if (hint == BTRFS_DEV_ALLOCATION_DATA_ONLY)
+                               continue;
+                       /*
+                        * if a data chunk must be allocated,
+                        * sort also by hint (metadata hint
+                        * higher priority)
+                        */
+                       devices_info[ndevs].alloc_hint = alloc_hint_map[hint];
+               }
+
                ++ndevs;
        }
        ctl->ndevs = ndevs;
+ /*
+        * no devices available
+        */
+       if (!ndevs)
+               return 0;
+
        /*
         * now sort the devices by hole size / available space
         */
        sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
             btrfs_cmp_device_info, NULL);
+ /*
+        * select the minimum set of disks grouped by hint that
+        * can host the chunk
+        */
+       ndevs = 0;
+       while (ndevs < ctl->ndevs) {
+               hint = devices_info[ndevs++].alloc_hint;
+               while (devices_info[ndevs].alloc_hint == hint &&
+                      ndevs < ctl->ndevs)
+                               ndevs++;
+               if (ndevs >= ctl->devs_min)
+                       break;
+       }

Can we just adjust btrfs_cmp_device_info to take the hint info into account? Thanks,

Josef

Reply via email to