During fsync, we put the changed parts(i.e. extent map) into the log tree, and we ship these parts from a list of modified_extents to a local list to process, of course, we must increment the refs of the extent maps to avoid it from getting evicted from cache.
The problem is we don't hold the tree writer lock all the time of iterating the local list, and it is possible that other threads hack in and delete the extent map from the local list silently. So we'll end up with memory leak here. I hit this when testing xfstest 274 with mount options 'autodefrag,compress=zlib'. With this fix, the memory leak has gone away. Signed-off-by: Liu Bo <bo.li....@oracle.com> --- fs/btrfs/extent_map.c | 5 +++-- fs/btrfs/extent_map.h | 4 ++-- fs/btrfs/tree-log.c | 12 +++++------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index c025a7a..4c6d271 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -78,6 +78,7 @@ struct extent_map *alloc_extent_map(void) em->generation = 0; atomic_set(&em->refs, 1); INIT_LIST_HEAD(&em->list); + INIT_LIST_HEAD(&em->log_list); #if LEAK_DEBUG spin_lock_irqsave(&map_leak_lock, flags); list_add(&em->leak_list, &emaps); @@ -107,6 +108,7 @@ void free_extent_map(struct extent_map *em) #endif WARN_ON(em->in_tree); WARN_ON(!list_empty(&em->list)); + WARN_ON(!list_empty(&em->log_list)); kmem_cache_free(extent_map_cache, em); } } @@ -433,8 +435,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); rb_erase(&em->rb_node, &tree->map); - if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) - list_del_init(&em->list); + list_del_init(&em->list); em->in_tree = 0; return ret; } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index d07a841..ac12389 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -13,8 +13,7 @@ #define EXTENT_FLAG_COMPRESSED 1 #define EXTENT_FLAG_VACANCY 2 /* no file extent item found */ #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ -#define EXTENT_FLAG_LOGGING 4 /* Logging this extent */ -#define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */ +#define EXTENT_FLAG_FILLING 4 /* Filling in a preallocated extent */ struct extent_map { struct rb_node rb_node; @@ -35,6 +34,7 @@ struct extent_map { unsigned int in_tree; unsigned int compress_type; struct list_head list; + struct list_head log_list; struct list_head leak_list; }; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 83186c7..c3ea5bd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3145,8 +3145,8 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) { struct extent_map *em1, *em2; - em1 = list_entry(a, struct extent_map, list); - em2 = list_entry(b, struct extent_map, list); + em1 = list_entry(a, struct extent_map, log_list); + em2 = list_entry(b, struct extent_map, log_list); if (em1->start < em2->start) return -1; @@ -3400,17 +3400,15 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, continue; /* Need a ref to keep it from getting evicted from cache */ atomic_inc(&em->refs); - set_bit(EXTENT_FLAG_LOGGING, &em->flags); - list_add_tail(&em->list, &extents); + list_add_tail(&em->log_list, &extents); } list_sort(NULL, &extents, extent_cmp); while (!list_empty(&extents)) { - em = list_entry(extents.next, struct extent_map, list); + em = list_entry(extents.next, struct extent_map, log_list); - list_del_init(&em->list); - clear_bit(EXTENT_FLAG_LOGGING, &em->flags); + list_del_init(&em->log_list); /* * If we had an error we just need to delete everybody from our -- 1.7.7.6 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html