On 2021/7/23 1:44, Jaegeuk Kim wrote:
This tries to fix priority inversion in the below condition resulting in
long checkpoint delay.

f2fs_get_node_info()
  - nat_tree_lock
   -> sleep to grab journal_rwsem by contention

                                      checkpoint
                                      - waiting for nat_tree_lock

In order to let checkpoint go, let's release nat_tree_lock, if there's a
journal_rwsem contention.

Write lock of nat_tree_lock is held from many places, how about just
retrying unlock/lock only if checkpoint() is flushing nat blocks?

---
 fs/f2fs/f2fs.h | 1 +
 fs/f2fs/node.c | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ad8f99d7235f..05f41a15fda4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -877,6 +877,7 @@ struct f2fs_nm_info {
        spinlock_t nat_list_lock;       /* protect clean nat entry list */
        unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
        unsigned int nat_blocks;        /* # of nat blocks */
+       bool flushing_nat;              /* indicate checkpoint() is flushing 
nat blocks */

        /* free node ids management */
        struct radix_tree_root free_nid_root;/* root of the free_nid cache */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index c60ba4179bb2..2caa171a68f8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -569,7 +569,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
         * This sem is on the critical path on the checkpoint requiring the 
above
         * nat_tree_lock. Therefore, we should retry, if we failed to grab here.
         */
-       if (!down_read_trylock(&curseg->journal_rwsem)) {
+       if (!down_read_trylock(&curseg->journal_rwsem) && nm_i->flushing_nat) {
                up_read(&nm_i->nat_tree_lock);
                goto retry;
        }
@@ -2981,6 +2981,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
        LIST_HEAD(sets);
        int err = 0;

+       nm_i->flushing_nat = true;
+
        /*
         * during unmount, let's flush nat_bits before checking
         * nat_cnt[DIRTY_NAT].
@@ -2992,7 +2994,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
        }

        if (!nm_i->nat_cnt[DIRTY_NAT])
-               return 0;
+               goto out;

        down_write(&nm_i->nat_tree_lock);

@@ -3026,6 +3028,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
        up_write(&nm_i->nat_tree_lock);
        /* Allow dirty nats by node block allocation in write_begin */

+out:
+       nm_i->flushing_nat = false;
        return err;
 }

--
2.22.1




_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to