On Mon, 3 Nov 2014 08:56:50 -0500, Josef Bacik wrote: > Our gluster boxes get several thousand statfs() calls per second, which begins > to suck hardcore with all of the lock contention on the chunk mutex and dev > list > mutex. We don't really need to hold these things, if we have transient > weirdness with statfs() because of the chunk allocator we don't care, so > remove > this locking. > > We still need the dev_list lock if you mount with -o alloc_start however, > which > is a good argument for nuking that thing from orbit, but that's a patch for > another day. Thanks, > > Signed-off-by: Josef Bacik <jba...@fb.com> > --- > V1->V2: make sure ->alloc_start is set before doing the dev extent lookup > logic.
I am strange that why we need dev_list_lock if we mount with -o alloc_start. AFAIK. ->alloc_start is protected by chunk_mutex. But I think we needn't care that someone changes ->alloc_start, in other words, we needn't take chunk_mutex during the whole process, the following case can be tolerated by the users, I think. Task1 Task2 statfs mutex_lock(&fs_info->chunk_mutex); tmp = fs_info->alloc_start; mutex_unlock(&fs_info->chunk_mutex); btrfs_calc_avail_data_space(fs_info, tmp) ... mount -o remount,alloc_start=xxxx ... Thanks Miao > > fs/btrfs/super.c | 72 > ++++++++++++++++++++++++++++++++++++-------------------- > 1 file changed, 47 insertions(+), 25 deletions(-) > > diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c > index 54bd91e..dc337d1 100644 > --- a/fs/btrfs/super.c > +++ b/fs/btrfs/super.c > @@ -1644,8 +1644,20 @@ static int btrfs_calc_avail_data_space(struct > btrfs_root *root, u64 *free_bytes) > int i = 0, nr_devices; > int ret; > > + /* > + * We aren't under the device list lock, so this is racey-ish, but good > + * enough for our purposes. > + */ > nr_devices = fs_info->fs_devices->open_devices; > - BUG_ON(!nr_devices); > + if (!nr_devices) { > + smp_mb(); > + nr_devices = fs_info->fs_devices->open_devices; > + ASSERT(nr_devices); > + if (!nr_devices) { > + *free_bytes = 0; > + return 0; > + } > + } > > devices_info = kmalloc_array(nr_devices, sizeof(*devices_info), > GFP_NOFS); > @@ -1670,11 +1682,17 @@ static int btrfs_calc_avail_data_space(struct > btrfs_root *root, u64 *free_bytes) > else > min_stripe_size = BTRFS_STRIPE_LEN; > > - list_for_each_entry(device, &fs_devices->devices, dev_list) { > + if (fs_info->alloc_start) > + mutex_lock(&fs_devices->device_list_mutex); > + rcu_read_lock(); > + list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { > if (!device->in_fs_metadata || !device->bdev || > device->is_tgtdev_for_dev_replace) > continue; > > + if (i >= nr_devices) > + break; > + > avail_space = device->total_bytes - device->bytes_used; > > /* align with stripe_len */ > @@ -1689,24 +1707,32 @@ static int btrfs_calc_avail_data_space(struct > btrfs_root *root, u64 *free_bytes) > skip_space = 1024 * 1024; > > /* user can set the offset in fs_info->alloc_start. */ > - if (fs_info->alloc_start + BTRFS_STRIPE_LEN <= > - device->total_bytes) > + if (fs_info->alloc_start && > + fs_info->alloc_start + BTRFS_STRIPE_LEN <= > + device->total_bytes) { > + rcu_read_unlock(); > skip_space = max(fs_info->alloc_start, skip_space); > > - /* > - * btrfs can not use the free space in [0, skip_space - 1], > - * we must subtract it from the total. In order to implement > - * it, we account the used space in this range first. > - */ > - ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1, > - &used_space); > - if (ret) { > - kfree(devices_info); > - return ret; > - } > + /* > + * btrfs can not use the free space in > + * [0, skip_space - 1], we must subtract it from the > + * total. In order to implement it, we account the used > + * space in this range first. > + */ > + ret = btrfs_account_dev_extents_size(device, 0, > + skip_space - 1, > + &used_space); > + if (ret) { > + kfree(devices_info); > + mutex_unlock(&fs_devices->device_list_mutex); > + return ret; > + } > > - /* calc the free space in [0, skip_space - 1] */ > - skip_space -= used_space; > + rcu_read_lock(); > + > + /* calc the free space in [0, skip_space - 1] */ > + skip_space -= used_space; > + } > > /* > * we can use the free space in [0, skip_space - 1], subtract > @@ -1725,6 +1751,9 @@ static int btrfs_calc_avail_data_space(struct > btrfs_root *root, u64 *free_bytes) > > i++; > } > + rcu_read_unlock(); > + if (fs_info->alloc_start) > + mutex_unlock(&fs_devices->device_list_mutex); > > nr_devices = i; > > @@ -1787,8 +1816,6 @@ static int btrfs_statfs(struct dentry *dentry, struct > kstatfs *buf) > * holding chunk_muext to avoid allocating new chunks, holding > * device_list_mutex to avoid the device being removed > */ > - mutex_lock(&fs_info->fs_devices->device_list_mutex); > - mutex_lock(&fs_info->chunk_mutex); > rcu_read_lock(); > list_for_each_entry_rcu(found, head, list) { > if (found->flags & BTRFS_BLOCK_GROUP_DATA) { > @@ -1826,15 +1853,10 @@ static int btrfs_statfs(struct dentry *dentry, struct > kstatfs *buf) > > buf->f_bavail = total_free_data; > ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); > - if (ret) { > - mutex_unlock(&fs_info->chunk_mutex); > - mutex_unlock(&fs_info->fs_devices->device_list_mutex); > + if (ret) > return ret; > - } > buf->f_bavail += div_u64(total_free_data, factor); > buf->f_bavail = buf->f_bavail >> bits; > - mutex_unlock(&fs_info->chunk_mutex); > - mutex_unlock(&fs_info->fs_devices->device_list_mutex); > > buf->f_type = BTRFS_SUPER_MAGIC; > buf->f_bsize = dentry->d_sb->s_blocksize; > -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html