Re: [PATCH RESEND v1 04/16] vfs: add two map arrays
On Thu, Dec 20, 2012 at 10:43:23PM +0800, zwu.ker...@gmail.com wrote: > --- a/fs/hot_tracking.c > +++ b/fs/hot_tracking.c > +/* Free inode and range map info */ > +static void hot_map_exit(struct hot_info *root) > +{ > + int i; > + for (i = 0; i < HEAT_MAP_SIZE; i++) { > + spin_lock(>heat_inode_map[i].lock); > + hot_map_list_free(>heat_inode_map[i].node_list, root); > + spin_unlock(>heat_inode_map[i].lock); please insert an empty line here to improve readability > + spin_lock(>heat_range_map[i].lock); > + hot_map_list_free(>heat_range_map[i].node_list, root); > + spin_unlock(>heat_range_map[i].lock); > + } > +} > + > +/* > * Initialize kmem cache for hot_inode_item and hot_range_item. > */ > void __init hot_cache_init(void) > --- a/include/linux/hot_tracking.h > +++ b/include/linux/hot_tracking.h > @@ -71,6 +82,12 @@ struct hot_range_item { > struct hot_info { > struct hot_rb_tree hot_inode_tree; > spinlock_t lock; /*protect inode tree */ > + > + /* map of inode temperature */ > + struct hot_map_head heat_inode_map[HEAT_MAP_SIZE]; > + /* map of range temperature */ > + struct hot_map_head heat_range_map[HEAT_MAP_SIZE]; > + unsigned int hot_map_nr; > }; Final layout of struct hot_info is struct hot_info { struct hot_rb_tree hot_inode_tree; /* 0 8 */ spinlock_t lock; /* 872 */ /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */ struct hot_map_headheat_inode_map[256]; /*80 24576 */ /* --- cacheline 385 boundary (24640 bytes) was 16 bytes ago --- */ struct hot_map_headheat_range_map[256]; /* 24656 24576 */ /* --- cacheline 769 boundary (49216 bytes) was 16 bytes ago --- */ unsigned int hot_map_nr; /* 49232 4 */ /* XXX 4 bytes hole, try to pack */ struct workqueue_struct * update_wq;/* 49240 8 */ struct delayed_workupdate_work; /* 49248 216 */ /* XXX last struct has 4 bytes of padding */ /* --- cacheline 772 boundary (49408 bytes) was 56 bytes ago --- */ struct hot_type * hot_type; /* 49464 8 */ /* --- cacheline 773 boundary (49472 bytes) --- */ struct shrinkerhot_shrink; /* 4947248 */ struct dentry *vol_dentry; /* 49520 8 */ /* size: 49528, cachelines: 774, members: 10 */ /* sum members: 49524, holes: 1, sum holes: 4 */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 56 bytes */ }; that's an order-4 allocation and the heat_*_map[] themselves need order-3. Also the structure struct hot_map_head { struct list_head node_list;/* 016 */ u8 temp; /*16 1 */ /* XXX 7 bytes hole, try to pack */ spinlock_t lock; /*2472 */ /* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */ /* size: 96, cachelines: 2, members: 3 */ /* sum members: 89, holes: 1, sum holes: 7 */ /* last cacheline: 32 bytes */ }; is not packed efficiently and given the number of the array items, the wasted space adds to the sum. So, this needs to be fixed. Options I see: 1) try to allocate the structure with GFP_NOWARN and use vmalloc as a fallback 2) allocate heat_*_map arrays dynamically An array of 256 pointers takes 2048 bytes, so when there are 2 of them plus other struct items, overall size will go beyond a 4k page. Also, doing kmalloc on each heat_*_map item could spread them over memory, although hot_info is a long-term structure and it would make sense to keep the data located at one place. For struct hot_map_head I suggest to create a slab. david -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH RESEND v1 04/16] vfs: add two map arrays
On Thu, Dec 20, 2012 at 10:43:23PM +0800, zwu.ker...@gmail.com wrote: --- a/fs/hot_tracking.c +++ b/fs/hot_tracking.c +/* Free inode and range map info */ +static void hot_map_exit(struct hot_info *root) +{ + int i; + for (i = 0; i HEAT_MAP_SIZE; i++) { + spin_lock(root-heat_inode_map[i].lock); + hot_map_list_free(root-heat_inode_map[i].node_list, root); + spin_unlock(root-heat_inode_map[i].lock); please insert an empty line here to improve readability + spin_lock(root-heat_range_map[i].lock); + hot_map_list_free(root-heat_range_map[i].node_list, root); + spin_unlock(root-heat_range_map[i].lock); + } +} + +/* * Initialize kmem cache for hot_inode_item and hot_range_item. */ void __init hot_cache_init(void) --- a/include/linux/hot_tracking.h +++ b/include/linux/hot_tracking.h @@ -71,6 +82,12 @@ struct hot_range_item { struct hot_info { struct hot_rb_tree hot_inode_tree; spinlock_t lock; /*protect inode tree */ + + /* map of inode temperature */ + struct hot_map_head heat_inode_map[HEAT_MAP_SIZE]; + /* map of range temperature */ + struct hot_map_head heat_range_map[HEAT_MAP_SIZE]; + unsigned int hot_map_nr; }; Final layout of struct hot_info is struct hot_info { struct hot_rb_tree hot_inode_tree; /* 0 8 */ spinlock_t lock; /* 872 */ /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */ struct hot_map_headheat_inode_map[256]; /*80 24576 */ /* --- cacheline 385 boundary (24640 bytes) was 16 bytes ago --- */ struct hot_map_headheat_range_map[256]; /* 24656 24576 */ /* --- cacheline 769 boundary (49216 bytes) was 16 bytes ago --- */ unsigned int hot_map_nr; /* 49232 4 */ /* XXX 4 bytes hole, try to pack */ struct workqueue_struct * update_wq;/* 49240 8 */ struct delayed_workupdate_work; /* 49248 216 */ /* XXX last struct has 4 bytes of padding */ /* --- cacheline 772 boundary (49408 bytes) was 56 bytes ago --- */ struct hot_type * hot_type; /* 49464 8 */ /* --- cacheline 773 boundary (49472 bytes) --- */ struct shrinkerhot_shrink; /* 4947248 */ struct dentry *vol_dentry; /* 49520 8 */ /* size: 49528, cachelines: 774, members: 10 */ /* sum members: 49524, holes: 1, sum holes: 4 */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 56 bytes */ }; that's an order-4 allocation and the heat_*_map[] themselves need order-3. Also the structure struct hot_map_head { struct list_head node_list;/* 016 */ u8 temp; /*16 1 */ /* XXX 7 bytes hole, try to pack */ spinlock_t lock; /*2472 */ /* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */ /* size: 96, cachelines: 2, members: 3 */ /* sum members: 89, holes: 1, sum holes: 7 */ /* last cacheline: 32 bytes */ }; is not packed efficiently and given the number of the array items, the wasted space adds to the sum. So, this needs to be fixed. Options I see: 1) try to allocate the structure with GFP_NOWARN and use vmalloc as a fallback 2) allocate heat_*_map arrays dynamically An array of 256 pointers takes 2048 bytes, so when there are 2 of them plus other struct items, overall size will go beyond a 4k page. Also, doing kmalloc on each heat_*_map item could spread them over memory, although hot_info is a long-term structure and it would make sense to keep the data located at one place. For struct hot_map_head I suggest to create a slab. david -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH RESEND v1 04/16] vfs: add two map arrays
From: Zhi Yong Wu Adds two map arrays which contains a lot of list and is used to efficiently look up the data temperature of a file or its ranges. In each list of map arrays, the array node will keep track of temperature info. Signed-off-by: Zhi Yong Wu --- fs/hot_tracking.c| 67 ++ include/linux/hot_tracking.h | 17 ++ 2 files changed, 84 insertions(+), 0 deletions(-) diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c index 6f587fa..5f164c8 100644 --- a/fs/hot_tracking.c +++ b/fs/hot_tracking.c @@ -58,6 +58,7 @@ static void hot_range_item_init(struct hot_range_item *hr, loff_t start, hr->hot_inode = he; kref_init(>hot_range.refs); spin_lock_init(>hot_range.lock); + INIT_LIST_HEAD(>hot_range.n_list); hr->hot_range.hot_freq_data.avg_delta_reads = (u64) -1; hr->hot_range.hot_freq_data.avg_delta_writes = (u64) -1; hr->hot_range.hot_freq_data.flags = FREQ_DATA_TYPE_RANGE; @@ -89,9 +90,20 @@ static void hot_range_item_free(struct kref *kref) struct hot_comm_item, refs); struct hot_range_item *hr = container_of(comm_item, struct hot_range_item, hot_range); + struct hot_info *root = container_of( + hr->hot_inode->hot_inode_tree, + struct hot_info, hot_inode_tree); + + spin_lock(>hot_range.lock); + if (!list_empty(>hot_range.n_list)) { + list_del_init(>hot_range.n_list); + root->hot_map_nr--; + } rb_erase(>hot_range.rb_node, >hot_inode->hot_range_tree.map); + spin_unlock(>hot_range.lock); + kmem_cache_free(hot_range_item_cachep, hr); } @@ -128,6 +140,15 @@ static void hot_inode_item_free(struct kref *kref) struct hot_comm_item, refs); struct hot_inode_item *he = container_of(comm_item, struct hot_inode_item, hot_inode); + struct hot_info *root = container_of(he->hot_inode_tree, + struct hot_info, hot_inode_tree); + + spin_lock(>hot_inode.lock); + if (!list_empty(>hot_inode.n_list)) { + list_del_init(>hot_inode.n_list); + root->hot_map_nr--; + } + spin_unlock(>hot_inode.lock); hot_range_tree_free(he); spin_lock(>hot_inode.lock); @@ -294,6 +315,50 @@ static void hot_freq_data_update(struct hot_freq_data *freq_data, bool write) } /* + * Initialize inode and range map info. + */ +static void hot_map_init(struct hot_info *root) +{ + int i; + for (i = 0; i < HEAT_MAP_SIZE; i++) { + INIT_LIST_HEAD(>heat_inode_map[i].node_list); + INIT_LIST_HEAD(>heat_range_map[i].node_list); + root->heat_inode_map[i].temp = i; + root->heat_range_map[i].temp = i; + spin_lock_init(>heat_inode_map[i].lock); + spin_lock_init(>heat_range_map[i].lock); + } +} + +static void hot_map_list_free(struct list_head *node_list, + struct hot_info *root) +{ + struct list_head *pos, *next; + struct hot_comm_item *node; + + list_for_each_safe(pos, next, node_list) { + node = list_entry(pos, struct hot_comm_item, n_list); + list_del_init(>n_list); + root->hot_map_nr--; + } + +} + +/* Free inode and range map info */ +static void hot_map_exit(struct hot_info *root) +{ + int i; + for (i = 0; i < HEAT_MAP_SIZE; i++) { + spin_lock(>heat_inode_map[i].lock); + hot_map_list_free(>heat_inode_map[i].node_list, root); + spin_unlock(>heat_inode_map[i].lock); + spin_lock(>heat_range_map[i].lock); + hot_map_list_free(>heat_range_map[i].node_list, root); + spin_unlock(>heat_range_map[i].lock); + } +} + +/* * Initialize kmem cache for hot_inode_item and hot_range_item. */ void __init hot_cache_init(void) @@ -384,6 +449,7 @@ int hot_track_init(struct super_block *sb) } hot_inode_tree_init(root); + hot_map_init(root); sb->s_hot_root = root; @@ -397,6 +463,7 @@ void hot_track_exit(struct super_block *sb) { struct hot_info *root = sb->s_hot_root; + hot_map_exit(root); hot_inode_tree_exit(root); sb->s_hot_root = NULL; kfree(root); diff --git a/include/linux/hot_tracking.h b/include/linux/hot_tracking.h index d555046..23edad92 100644 --- a/include/linux/hot_tracking.h +++ b/include/linux/hot_tracking.h @@ -20,6 +20,9 @@ #include #include +#define HEAT_MAP_BITS 8 +#define HEAT_MAP_SIZE (1 << HEAT_MAP_BITS) + struct hot_rb_tree { struct rb_root map; }; @@ -40,12 +43,20 @@ struct hot_freq_data { u32 last_temp; }; +/* List heads in hot map array */ +struct hot_map_head { + struct list_head node_list; + u8 temp; +
[PATCH RESEND v1 04/16] vfs: add two map arrays
From: Zhi Yong Wu wu...@linux.vnet.ibm.com Adds two map arrays which contains a lot of list and is used to efficiently look up the data temperature of a file or its ranges. In each list of map arrays, the array node will keep track of temperature info. Signed-off-by: Zhi Yong Wu wu...@linux.vnet.ibm.com --- fs/hot_tracking.c| 67 ++ include/linux/hot_tracking.h | 17 ++ 2 files changed, 84 insertions(+), 0 deletions(-) diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c index 6f587fa..5f164c8 100644 --- a/fs/hot_tracking.c +++ b/fs/hot_tracking.c @@ -58,6 +58,7 @@ static void hot_range_item_init(struct hot_range_item *hr, loff_t start, hr-hot_inode = he; kref_init(hr-hot_range.refs); spin_lock_init(hr-hot_range.lock); + INIT_LIST_HEAD(hr-hot_range.n_list); hr-hot_range.hot_freq_data.avg_delta_reads = (u64) -1; hr-hot_range.hot_freq_data.avg_delta_writes = (u64) -1; hr-hot_range.hot_freq_data.flags = FREQ_DATA_TYPE_RANGE; @@ -89,9 +90,20 @@ static void hot_range_item_free(struct kref *kref) struct hot_comm_item, refs); struct hot_range_item *hr = container_of(comm_item, struct hot_range_item, hot_range); + struct hot_info *root = container_of( + hr-hot_inode-hot_inode_tree, + struct hot_info, hot_inode_tree); + + spin_lock(hr-hot_range.lock); + if (!list_empty(hr-hot_range.n_list)) { + list_del_init(hr-hot_range.n_list); + root-hot_map_nr--; + } rb_erase(hr-hot_range.rb_node, hr-hot_inode-hot_range_tree.map); + spin_unlock(hr-hot_range.lock); + kmem_cache_free(hot_range_item_cachep, hr); } @@ -128,6 +140,15 @@ static void hot_inode_item_free(struct kref *kref) struct hot_comm_item, refs); struct hot_inode_item *he = container_of(comm_item, struct hot_inode_item, hot_inode); + struct hot_info *root = container_of(he-hot_inode_tree, + struct hot_info, hot_inode_tree); + + spin_lock(he-hot_inode.lock); + if (!list_empty(he-hot_inode.n_list)) { + list_del_init(he-hot_inode.n_list); + root-hot_map_nr--; + } + spin_unlock(he-hot_inode.lock); hot_range_tree_free(he); spin_lock(he-hot_inode.lock); @@ -294,6 +315,50 @@ static void hot_freq_data_update(struct hot_freq_data *freq_data, bool write) } /* + * Initialize inode and range map info. + */ +static void hot_map_init(struct hot_info *root) +{ + int i; + for (i = 0; i HEAT_MAP_SIZE; i++) { + INIT_LIST_HEAD(root-heat_inode_map[i].node_list); + INIT_LIST_HEAD(root-heat_range_map[i].node_list); + root-heat_inode_map[i].temp = i; + root-heat_range_map[i].temp = i; + spin_lock_init(root-heat_inode_map[i].lock); + spin_lock_init(root-heat_range_map[i].lock); + } +} + +static void hot_map_list_free(struct list_head *node_list, + struct hot_info *root) +{ + struct list_head *pos, *next; + struct hot_comm_item *node; + + list_for_each_safe(pos, next, node_list) { + node = list_entry(pos, struct hot_comm_item, n_list); + list_del_init(node-n_list); + root-hot_map_nr--; + } + +} + +/* Free inode and range map info */ +static void hot_map_exit(struct hot_info *root) +{ + int i; + for (i = 0; i HEAT_MAP_SIZE; i++) { + spin_lock(root-heat_inode_map[i].lock); + hot_map_list_free(root-heat_inode_map[i].node_list, root); + spin_unlock(root-heat_inode_map[i].lock); + spin_lock(root-heat_range_map[i].lock); + hot_map_list_free(root-heat_range_map[i].node_list, root); + spin_unlock(root-heat_range_map[i].lock); + } +} + +/* * Initialize kmem cache for hot_inode_item and hot_range_item. */ void __init hot_cache_init(void) @@ -384,6 +449,7 @@ int hot_track_init(struct super_block *sb) } hot_inode_tree_init(root); + hot_map_init(root); sb-s_hot_root = root; @@ -397,6 +463,7 @@ void hot_track_exit(struct super_block *sb) { struct hot_info *root = sb-s_hot_root; + hot_map_exit(root); hot_inode_tree_exit(root); sb-s_hot_root = NULL; kfree(root); diff --git a/include/linux/hot_tracking.h b/include/linux/hot_tracking.h index d555046..23edad92 100644 --- a/include/linux/hot_tracking.h +++ b/include/linux/hot_tracking.h @@ -20,6 +20,9 @@ #include linux/kref.h #include linux/fs.h +#define HEAT_MAP_BITS 8 +#define HEAT_MAP_SIZE (1 HEAT_MAP_BITS) + struct hot_rb_tree { struct rb_root map; }; @@ -40,12 +43,20 @@ struct hot_freq_data { u32 last_temp; };