Re: [PATCH RESEND v1 04/16] vfs: add two map arrays

2013-01-09 Thread David Sterba
On Thu, Dec 20, 2012 at 10:43:23PM +0800, zwu.ker...@gmail.com wrote:
> --- a/fs/hot_tracking.c
> +++ b/fs/hot_tracking.c
> +/* Free inode and range map info */
> +static void hot_map_exit(struct hot_info *root)
> +{
> + int i;
> + for (i = 0; i < HEAT_MAP_SIZE; i++) {
> + spin_lock(>heat_inode_map[i].lock);
> + hot_map_list_free(>heat_inode_map[i].node_list, root);
> + spin_unlock(>heat_inode_map[i].lock);

please insert an empty line here to improve readability

> + spin_lock(>heat_range_map[i].lock);
> + hot_map_list_free(>heat_range_map[i].node_list, root);
> + spin_unlock(>heat_range_map[i].lock);
> + }
> +}
> +
> +/*
>   * Initialize kmem cache for hot_inode_item and hot_range_item.
>   */
>  void __init hot_cache_init(void)
> --- a/include/linux/hot_tracking.h
> +++ b/include/linux/hot_tracking.h
> @@ -71,6 +82,12 @@ struct hot_range_item {
>  struct hot_info {
>   struct hot_rb_tree hot_inode_tree;
>   spinlock_t lock; /*protect inode tree */
> +
> + /* map of inode temperature */
> + struct hot_map_head heat_inode_map[HEAT_MAP_SIZE];
> + /* map of range temperature */
> + struct hot_map_head heat_range_map[HEAT_MAP_SIZE];
> + unsigned int hot_map_nr;
>  };

Final layout of struct hot_info is

struct hot_info {
struct hot_rb_tree hot_inode_tree;   /* 0 8 */
spinlock_t lock; /* 872 */
/* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
struct hot_map_headheat_inode_map[256];  /*80 24576 */
/* --- cacheline 385 boundary (24640 bytes) was 16 bytes ago --- */
struct hot_map_headheat_range_map[256];  /* 24656 24576 */
/* --- cacheline 769 boundary (49216 bytes) was 16 bytes ago --- */
unsigned int   hot_map_nr;   /* 49232 4 */

/* XXX 4 bytes hole, try to pack */

struct workqueue_struct *  update_wq;/* 49240 8 */
struct delayed_workupdate_work;  /* 49248   216 */

/* XXX last struct has 4 bytes of padding */

/* --- cacheline 772 boundary (49408 bytes) was 56 bytes ago --- */
struct hot_type *  hot_type; /* 49464 8 */
/* --- cacheline 773 boundary (49472 bytes) --- */
struct shrinkerhot_shrink;   /* 4947248 */
struct dentry *vol_dentry;   /* 49520 8 */

/* size: 49528, cachelines: 774, members: 10 */
/* sum members: 49524, holes: 1, sum holes: 4 */
/* paddings: 1, sum paddings: 4 */
/* last cacheline: 56 bytes */
};

that's an order-4 allocation and the heat_*_map[] themselves need order-3.

Also the structure

struct hot_map_head {
struct list_head   node_list;/* 016 */
u8 temp; /*16 1 */

/* XXX 7 bytes hole, try to pack */

spinlock_t lock; /*2472 */
/* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */

/* size: 96, cachelines: 2, members: 3 */
/* sum members: 89, holes: 1, sum holes: 7 */
/* last cacheline: 32 bytes */
};

is not packed efficiently and given the number of the array items, the wasted
space adds to the sum.

So, this needs to be fixed. Options I see:

1) try to allocate the structure with GFP_NOWARN and use vmalloc as a fallback
2) allocate heat_*_map arrays dynamically

An array of 256 pointers takes 2048 bytes, so when there are 2 of them plus
other struct items, overall size will go beyond a 4k page. Also, doing
kmalloc on each heat_*_map item could spread them over memory, although
hot_info is a long-term structure and it would make sense to keep the
data located at one place. For struct hot_map_head I suggest to create a
slab.


david
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RESEND v1 04/16] vfs: add two map arrays

2013-01-09 Thread David Sterba
On Thu, Dec 20, 2012 at 10:43:23PM +0800, zwu.ker...@gmail.com wrote:
 --- a/fs/hot_tracking.c
 +++ b/fs/hot_tracking.c
 +/* Free inode and range map info */
 +static void hot_map_exit(struct hot_info *root)
 +{
 + int i;
 + for (i = 0; i  HEAT_MAP_SIZE; i++) {
 + spin_lock(root-heat_inode_map[i].lock);
 + hot_map_list_free(root-heat_inode_map[i].node_list, root);
 + spin_unlock(root-heat_inode_map[i].lock);

please insert an empty line here to improve readability

 + spin_lock(root-heat_range_map[i].lock);
 + hot_map_list_free(root-heat_range_map[i].node_list, root);
 + spin_unlock(root-heat_range_map[i].lock);
 + }
 +}
 +
 +/*
   * Initialize kmem cache for hot_inode_item and hot_range_item.
   */
  void __init hot_cache_init(void)
 --- a/include/linux/hot_tracking.h
 +++ b/include/linux/hot_tracking.h
 @@ -71,6 +82,12 @@ struct hot_range_item {
  struct hot_info {
   struct hot_rb_tree hot_inode_tree;
   spinlock_t lock; /*protect inode tree */
 +
 + /* map of inode temperature */
 + struct hot_map_head heat_inode_map[HEAT_MAP_SIZE];
 + /* map of range temperature */
 + struct hot_map_head heat_range_map[HEAT_MAP_SIZE];
 + unsigned int hot_map_nr;
  };

Final layout of struct hot_info is

struct hot_info {
struct hot_rb_tree hot_inode_tree;   /* 0 8 */
spinlock_t lock; /* 872 */
/* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
struct hot_map_headheat_inode_map[256];  /*80 24576 */
/* --- cacheline 385 boundary (24640 bytes) was 16 bytes ago --- */
struct hot_map_headheat_range_map[256];  /* 24656 24576 */
/* --- cacheline 769 boundary (49216 bytes) was 16 bytes ago --- */
unsigned int   hot_map_nr;   /* 49232 4 */

/* XXX 4 bytes hole, try to pack */

struct workqueue_struct *  update_wq;/* 49240 8 */
struct delayed_workupdate_work;  /* 49248   216 */

/* XXX last struct has 4 bytes of padding */

/* --- cacheline 772 boundary (49408 bytes) was 56 bytes ago --- */
struct hot_type *  hot_type; /* 49464 8 */
/* --- cacheline 773 boundary (49472 bytes) --- */
struct shrinkerhot_shrink;   /* 4947248 */
struct dentry *vol_dentry;   /* 49520 8 */

/* size: 49528, cachelines: 774, members: 10 */
/* sum members: 49524, holes: 1, sum holes: 4 */
/* paddings: 1, sum paddings: 4 */
/* last cacheline: 56 bytes */
};

that's an order-4 allocation and the heat_*_map[] themselves need order-3.

Also the structure

struct hot_map_head {
struct list_head   node_list;/* 016 */
u8 temp; /*16 1 */

/* XXX 7 bytes hole, try to pack */

spinlock_t lock; /*2472 */
/* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */

/* size: 96, cachelines: 2, members: 3 */
/* sum members: 89, holes: 1, sum holes: 7 */
/* last cacheline: 32 bytes */
};

is not packed efficiently and given the number of the array items, the wasted
space adds to the sum.

So, this needs to be fixed. Options I see:

1) try to allocate the structure with GFP_NOWARN and use vmalloc as a fallback
2) allocate heat_*_map arrays dynamically

An array of 256 pointers takes 2048 bytes, so when there are 2 of them plus
other struct items, overall size will go beyond a 4k page. Also, doing
kmalloc on each heat_*_map item could spread them over memory, although
hot_info is a long-term structure and it would make sense to keep the
data located at one place. For struct hot_map_head I suggest to create a
slab.


david
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND v1 04/16] vfs: add two map arrays

2012-12-20 Thread zwu . kernel
From: Zhi Yong Wu 

  Adds two map arrays which contains
a lot of list and is used to efficiently
look up the data temperature of a file or its
ranges.
  In each list of map arrays, the array node
will keep track of temperature info.

Signed-off-by: Zhi Yong Wu 
---
 fs/hot_tracking.c|   67 ++
 include/linux/hot_tracking.h |   17 ++
 2 files changed, 84 insertions(+), 0 deletions(-)

diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c
index 6f587fa..5f164c8 100644
--- a/fs/hot_tracking.c
+++ b/fs/hot_tracking.c
@@ -58,6 +58,7 @@ static void hot_range_item_init(struct hot_range_item *hr, 
loff_t start,
hr->hot_inode = he;
kref_init(>hot_range.refs);
spin_lock_init(>hot_range.lock);
+   INIT_LIST_HEAD(>hot_range.n_list);
hr->hot_range.hot_freq_data.avg_delta_reads = (u64) -1;
hr->hot_range.hot_freq_data.avg_delta_writes = (u64) -1;
hr->hot_range.hot_freq_data.flags = FREQ_DATA_TYPE_RANGE;
@@ -89,9 +90,20 @@ static void hot_range_item_free(struct kref *kref)
struct hot_comm_item, refs);
struct hot_range_item *hr = container_of(comm_item,
struct hot_range_item, hot_range);
+   struct hot_info *root = container_of(
+   hr->hot_inode->hot_inode_tree,
+   struct hot_info, hot_inode_tree);
+
+   spin_lock(>hot_range.lock);
+   if (!list_empty(>hot_range.n_list)) {
+   list_del_init(>hot_range.n_list);
+   root->hot_map_nr--;
+   }
 
rb_erase(>hot_range.rb_node,
>hot_inode->hot_range_tree.map);
+   spin_unlock(>hot_range.lock);
+
kmem_cache_free(hot_range_item_cachep, hr);
 }
 
@@ -128,6 +140,15 @@ static void hot_inode_item_free(struct kref *kref)
struct hot_comm_item, refs);
struct hot_inode_item *he = container_of(comm_item,
struct hot_inode_item, hot_inode);
+   struct hot_info *root = container_of(he->hot_inode_tree,
+   struct hot_info, hot_inode_tree);
+
+   spin_lock(>hot_inode.lock);
+   if (!list_empty(>hot_inode.n_list)) {
+   list_del_init(>hot_inode.n_list);
+   root->hot_map_nr--;
+   }
+   spin_unlock(>hot_inode.lock);
 
hot_range_tree_free(he);
spin_lock(>hot_inode.lock);
@@ -294,6 +315,50 @@ static void hot_freq_data_update(struct hot_freq_data 
*freq_data, bool write)
 }
 
 /*
+ * Initialize inode and range map info.
+ */
+static void hot_map_init(struct hot_info *root)
+{
+   int i;
+   for (i = 0; i < HEAT_MAP_SIZE; i++) {
+   INIT_LIST_HEAD(>heat_inode_map[i].node_list);
+   INIT_LIST_HEAD(>heat_range_map[i].node_list);
+   root->heat_inode_map[i].temp = i;
+   root->heat_range_map[i].temp = i;
+   spin_lock_init(>heat_inode_map[i].lock);
+   spin_lock_init(>heat_range_map[i].lock);
+   }
+}
+
+static void hot_map_list_free(struct list_head *node_list,
+   struct hot_info *root)
+{
+   struct list_head *pos, *next;
+   struct hot_comm_item *node;
+
+   list_for_each_safe(pos, next, node_list) {
+   node = list_entry(pos, struct hot_comm_item, n_list);
+   list_del_init(>n_list);
+   root->hot_map_nr--;
+   }
+
+}
+
+/* Free inode and range map info */
+static void hot_map_exit(struct hot_info *root)
+{
+   int i;
+   for (i = 0; i < HEAT_MAP_SIZE; i++) {
+   spin_lock(>heat_inode_map[i].lock);
+   hot_map_list_free(>heat_inode_map[i].node_list, root);
+   spin_unlock(>heat_inode_map[i].lock);
+   spin_lock(>heat_range_map[i].lock);
+   hot_map_list_free(>heat_range_map[i].node_list, root);
+   spin_unlock(>heat_range_map[i].lock);
+   }
+}
+
+/*
  * Initialize kmem cache for hot_inode_item and hot_range_item.
  */
 void __init hot_cache_init(void)
@@ -384,6 +449,7 @@ int hot_track_init(struct super_block *sb)
}
 
hot_inode_tree_init(root);
+   hot_map_init(root);
 
sb->s_hot_root = root;
 
@@ -397,6 +463,7 @@ void hot_track_exit(struct super_block *sb)
 {
struct hot_info *root = sb->s_hot_root;
 
+   hot_map_exit(root);
hot_inode_tree_exit(root);
sb->s_hot_root = NULL;
kfree(root);
diff --git a/include/linux/hot_tracking.h b/include/linux/hot_tracking.h
index d555046..23edad92 100644
--- a/include/linux/hot_tracking.h
+++ b/include/linux/hot_tracking.h
@@ -20,6 +20,9 @@
 #include 
 #include 
 
+#define HEAT_MAP_BITS 8
+#define HEAT_MAP_SIZE (1 << HEAT_MAP_BITS)
+
 struct hot_rb_tree {
struct rb_root map;
 };
@@ -40,12 +43,20 @@ struct hot_freq_data {
u32 last_temp;
 };
 
+/* List heads in hot map array */
+struct hot_map_head {
+   struct list_head node_list;
+   u8 temp;
+   

[PATCH RESEND v1 04/16] vfs: add two map arrays

2012-12-20 Thread zwu . kernel
From: Zhi Yong Wu wu...@linux.vnet.ibm.com

  Adds two map arrays which contains
a lot of list and is used to efficiently
look up the data temperature of a file or its
ranges.
  In each list of map arrays, the array node
will keep track of temperature info.

Signed-off-by: Zhi Yong Wu wu...@linux.vnet.ibm.com
---
 fs/hot_tracking.c|   67 ++
 include/linux/hot_tracking.h |   17 ++
 2 files changed, 84 insertions(+), 0 deletions(-)

diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c
index 6f587fa..5f164c8 100644
--- a/fs/hot_tracking.c
+++ b/fs/hot_tracking.c
@@ -58,6 +58,7 @@ static void hot_range_item_init(struct hot_range_item *hr, 
loff_t start,
hr-hot_inode = he;
kref_init(hr-hot_range.refs);
spin_lock_init(hr-hot_range.lock);
+   INIT_LIST_HEAD(hr-hot_range.n_list);
hr-hot_range.hot_freq_data.avg_delta_reads = (u64) -1;
hr-hot_range.hot_freq_data.avg_delta_writes = (u64) -1;
hr-hot_range.hot_freq_data.flags = FREQ_DATA_TYPE_RANGE;
@@ -89,9 +90,20 @@ static void hot_range_item_free(struct kref *kref)
struct hot_comm_item, refs);
struct hot_range_item *hr = container_of(comm_item,
struct hot_range_item, hot_range);
+   struct hot_info *root = container_of(
+   hr-hot_inode-hot_inode_tree,
+   struct hot_info, hot_inode_tree);
+
+   spin_lock(hr-hot_range.lock);
+   if (!list_empty(hr-hot_range.n_list)) {
+   list_del_init(hr-hot_range.n_list);
+   root-hot_map_nr--;
+   }
 
rb_erase(hr-hot_range.rb_node,
hr-hot_inode-hot_range_tree.map);
+   spin_unlock(hr-hot_range.lock);
+
kmem_cache_free(hot_range_item_cachep, hr);
 }
 
@@ -128,6 +140,15 @@ static void hot_inode_item_free(struct kref *kref)
struct hot_comm_item, refs);
struct hot_inode_item *he = container_of(comm_item,
struct hot_inode_item, hot_inode);
+   struct hot_info *root = container_of(he-hot_inode_tree,
+   struct hot_info, hot_inode_tree);
+
+   spin_lock(he-hot_inode.lock);
+   if (!list_empty(he-hot_inode.n_list)) {
+   list_del_init(he-hot_inode.n_list);
+   root-hot_map_nr--;
+   }
+   spin_unlock(he-hot_inode.lock);
 
hot_range_tree_free(he);
spin_lock(he-hot_inode.lock);
@@ -294,6 +315,50 @@ static void hot_freq_data_update(struct hot_freq_data 
*freq_data, bool write)
 }
 
 /*
+ * Initialize inode and range map info.
+ */
+static void hot_map_init(struct hot_info *root)
+{
+   int i;
+   for (i = 0; i  HEAT_MAP_SIZE; i++) {
+   INIT_LIST_HEAD(root-heat_inode_map[i].node_list);
+   INIT_LIST_HEAD(root-heat_range_map[i].node_list);
+   root-heat_inode_map[i].temp = i;
+   root-heat_range_map[i].temp = i;
+   spin_lock_init(root-heat_inode_map[i].lock);
+   spin_lock_init(root-heat_range_map[i].lock);
+   }
+}
+
+static void hot_map_list_free(struct list_head *node_list,
+   struct hot_info *root)
+{
+   struct list_head *pos, *next;
+   struct hot_comm_item *node;
+
+   list_for_each_safe(pos, next, node_list) {
+   node = list_entry(pos, struct hot_comm_item, n_list);
+   list_del_init(node-n_list);
+   root-hot_map_nr--;
+   }
+
+}
+
+/* Free inode and range map info */
+static void hot_map_exit(struct hot_info *root)
+{
+   int i;
+   for (i = 0; i  HEAT_MAP_SIZE; i++) {
+   spin_lock(root-heat_inode_map[i].lock);
+   hot_map_list_free(root-heat_inode_map[i].node_list, root);
+   spin_unlock(root-heat_inode_map[i].lock);
+   spin_lock(root-heat_range_map[i].lock);
+   hot_map_list_free(root-heat_range_map[i].node_list, root);
+   spin_unlock(root-heat_range_map[i].lock);
+   }
+}
+
+/*
  * Initialize kmem cache for hot_inode_item and hot_range_item.
  */
 void __init hot_cache_init(void)
@@ -384,6 +449,7 @@ int hot_track_init(struct super_block *sb)
}
 
hot_inode_tree_init(root);
+   hot_map_init(root);
 
sb-s_hot_root = root;
 
@@ -397,6 +463,7 @@ void hot_track_exit(struct super_block *sb)
 {
struct hot_info *root = sb-s_hot_root;
 
+   hot_map_exit(root);
hot_inode_tree_exit(root);
sb-s_hot_root = NULL;
kfree(root);
diff --git a/include/linux/hot_tracking.h b/include/linux/hot_tracking.h
index d555046..23edad92 100644
--- a/include/linux/hot_tracking.h
+++ b/include/linux/hot_tracking.h
@@ -20,6 +20,9 @@
 #include linux/kref.h
 #include linux/fs.h
 
+#define HEAT_MAP_BITS 8
+#define HEAT_MAP_SIZE (1  HEAT_MAP_BITS)
+
 struct hot_rb_tree {
struct rb_root map;
 };
@@ -40,12 +43,20 @@ struct hot_freq_data {
u32 last_temp;
 };