As you know, ocfs2 has support trim the underlying disk via fstrim command. But there is a problem, ocfs2 is a shared storage cluster file system, if the user configures a scheduled fstrim job on each file system node, this will trigger multiple nodes trim a shared disk simultaneously, it is very wasteful for CPU and IO consumption. Then, we introduce a trimfs dlm lock, which will make only one fstrim command is running on the shared disk among the cluster, the other fstrim command should be returned with -EBUSY errno.
Signed-off-by: Gang He <g...@suse.com> --- fs/ocfs2/alloc.c | 18 +++++++++++++++++- fs/ocfs2/dlmglue.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/dlmglue.h | 2 ++ fs/ocfs2/ocfs2.h | 1 + fs/ocfs2/ocfs2_lockid.h | 5 +++++ 5 files changed, 73 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index ab5105f..89d16ad 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7401,10 +7401,24 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) inode_lock(main_bm_inode); + ret = ocfs2_trim_fs_lock(osb); + if (ret < 0) { + if (ret != -EAGAIN) + mlog_errno(ret); + else { + ret = -EBUSY; + mlog(ML_NOTICE, + "Cannot trim disk %s since a trim operation is " + "running on it from another node.\n", + sb->s_id); + } + goto out_mutex; + } + ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0); if (ret < 0) { mlog_errno(ret); - goto out_mutex; + goto out_fsunlock; } main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data; @@ -7466,6 +7480,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) out_unlock: ocfs2_inode_unlock(main_bm_inode, 0); brelse(main_bm_bh); +out_fsunlock: + ocfs2_trim_fs_unlock(osb); out_mutex: inode_unlock(main_bm_inode); iput(main_bm_inode); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 4689940..b28fdf4 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -259,6 +259,10 @@ struct ocfs2_lock_res_ops { .flags = 0, }; +static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = { + .flags = 0, +}; + static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; @@ -676,6 +680,15 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, &ocfs2_nfs_sync_lops, osb); } +static void ocfs2_trim_fs_lock_res_init(struct ocfs2_lock_res *res, + struct ocfs2_super *osb) +{ + ocfs2_lock_res_init_once(res); + ocfs2_build_lock_name(OCFS2_LOCK_TYPE_TRIM_FS, 0, 0, res->l_name); + ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_TRIM_FS, + &ocfs2_trim_fs_lops, osb); +} + static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, struct ocfs2_super *osb) { @@ -2745,6 +2758,41 @@ void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) ex ? LKM_EXMODE : LKM_PRMODE); } +int ocfs2_trim_fs_lock(struct ocfs2_super *osb) +{ + int status; + struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres; + + if (ocfs2_is_hard_readonly(osb)) + return -EROFS; + + if (ocfs2_mount_local(osb)) + return 0; + + ocfs2_trim_fs_lock_res_init(lockres, osb); + status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, + DLM_LKF_NOQUEUE, 0); + if (status < 0) { + if (status != -EAGAIN) + mlog_errno(status); + ocfs2_simple_drop_lockres(osb, lockres); + ocfs2_lock_res_free(lockres); + } + + return status; +} + +void ocfs2_trim_fs_unlock(struct ocfs2_super *osb) +{ + struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres; + + if (!ocfs2_mount_local(osb)) { + ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); + ocfs2_simple_drop_lockres(osb, lockres); + ocfs2_lock_res_free(lockres); + } +} + int ocfs2_dentry_lock(struct dentry *dentry, int ex) { int ret; diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index a7fc18b..361e8a5 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -153,6 +153,8 @@ void ocfs2_super_unlock(struct ocfs2_super *osb, void ocfs2_rename_unlock(struct ocfs2_super *osb); int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex); void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex); +int ocfs2_trim_fs_lock(struct ocfs2_super *osb); +void ocfs2_trim_fs_unlock(struct ocfs2_super *osb); int ocfs2_dentry_lock(struct dentry *dentry, int ex); void ocfs2_dentry_unlock(struct dentry *dentry, int ex); int ocfs2_file_lock(struct file *file, int ex, int trylock); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 9a50f22..6867eef 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -404,6 +404,7 @@ struct ocfs2_super struct ocfs2_lock_res osb_super_lockres; struct ocfs2_lock_res osb_rename_lockres; struct ocfs2_lock_res osb_nfs_sync_lockres; + struct ocfs2_lock_res osb_trim_fs_lockres; struct ocfs2_dlm_debug *osb_dlm_debug; struct dentry *osb_debug_root; diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index d277aab..7051b99 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h @@ -50,6 +50,7 @@ enum ocfs2_lock_type { OCFS2_LOCK_TYPE_NFS_SYNC, OCFS2_LOCK_TYPE_ORPHAN_SCAN, OCFS2_LOCK_TYPE_REFCOUNT, + OCFS2_LOCK_TYPE_TRIM_FS, OCFS2_NUM_LOCK_TYPES }; @@ -93,6 +94,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) case OCFS2_LOCK_TYPE_REFCOUNT: c = 'T'; break; + case OCFS2_LOCK_TYPE_TRIM_FS: + c = 'I'; + break; default: c = '\0'; } @@ -115,6 +119,7 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync", [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", [OCFS2_LOCK_TYPE_REFCOUNT] = "Refcount", + [OCFS2_LOCK_TYPE_TRIM_FS] = "TrimFs", }; static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) -- 1.8.5.6 _______________________________________________ Ocfs2-devel mailing list Ocfs2-devel@oss.oracle.com https://oss.oracle.com/mailman/listinfo/ocfs2-devel