[PATCH v5 0/5] memcg, cgroup: kill css_id

2013-08-07 Thread Li Zefan
Hi Andrew,

All the cgroup preparation patches are in cgroup tree now, and these
remaining patches can be routed by mm tree.

===

This patchset converts memcg to use cgroup-id, and then we can remove
cgroup css_id.

As we've removed memcg's own refcnt, converting memcg to use cgroup-id
is very straight-forward.

The patchset is based on Tejun's cgroup tree.

v5:
- rebased against mmotm 2013-08-07-16-55

v4:
- make cgroup_from_id() inline and check if cgroup_mutex is held.
- add a comment for idr_remove() in cgroup_offline)fn().

v2-v3:
- some minor cleanups suggested by Michal.
- fixed the call to idr_alloc() in cgroup_init() in the first patch.

Li Zefan (5):
  memcg: convert to use cgroup_is_descendant()
  memcg: convert to use cgroup id
  memcg: fail to create cgroup if the cgroup id is too big
  memcg: stop using css id
  cgroup: kill css_id

--
 include/linux/cgroup.h |  37 --
 kernel/cgroup.c| 252 
+---
 mm/memcontrol.c|  69 +++---
 3 files changed, 43 insertions(+), 315 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v5 5/5] cgroup: kill css_id

2013-08-07 Thread Li Zefan
The only user of css_id was memcg, and it has been convered to use
cgroup-id, so kill css_id.

Signed-off-by: Li Zefan lize...@huwei.com
Reviewed-by: Michal Hocko mho...@suse.cz
Acked-by: Tejun Heo t...@kernel.org
---
 include/linux/cgroup.h |  37 
 kernel/cgroup.c| 252 +
 2 files changed, 1 insertion(+), 288 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 44dd422..9d062be 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -596,11 +596,6 @@ struct cgroup_subsys {
int subsys_id;
int disabled;
int early_init;
-   /*
-* True if this subsys uses ID. ID is not available before cgroup_init()
-* (not available in early_init time.)
-*/
-   bool use_id;
 
/*
 * If %false, this subsystem is properly hierarchical -
@@ -626,9 +621,6 @@ struct cgroup_subsys {
 */
struct cgroupfs_root *root;
struct list_head sibling;
-   /* used when use_id == true */
-   struct idr idr;
-   spinlock_t id_lock;
 
/* list of cftype_sets */
struct list_head cftsets;
@@ -873,35 +865,6 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
 
-/*
- * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
- * if cgroup_subsys.use_id == true. It can be used for looking up and scanning.
- * CSS ID is assigned at cgroup allocation (create) automatically
- * and removed when subsys calls free_css_id() function. This is because
- * the lifetime of cgroup_subsys_state is subsys's matter.
- *
- * Looking up and scanning function should be called under rcu_read_lock().
- * Taking cgroup_mutex is not necessary for following calls.
- * But the css returned by this routine can be not populated yet or being
- * destroyed. The caller should check css and cgroup's status.
- */
-
-/*
- * Typically Called at -destroy(), or somewhere the subsys frees
- * cgroup_subsys_state.
- */
-void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css);
-
-/* Find a cgroup_subsys_state which has given ID */
-
-struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id);
-
-/* Returns true if root is ancestor of cg */
-bool css_is_ancestor(struct cgroup_subsys_state *cg,
-const struct cgroup_subsys_state *root);
-
-/* Get id and depth of css */
-unsigned short css_id(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
 
 #else /* !CONFIG_CGROUPS */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e82423d..3ccf877 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -123,38 +123,6 @@ struct cfent {
 };
 
 /*
- * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
- * cgroup_subsys-use_id != 0.
- */
-#define CSS_ID_MAX (65535)
-struct css_id {
-   /*
-* The css to which this ID points. This pointer is set to valid value
-* after cgroup is populated. If cgroup is removed, this will be NULL.
-* This pointer is expected to be RCU-safe because destroy()
-* is called after synchronize_rcu(). But for safe use, css_tryget()
-* should be used for avoiding race.
-*/
-   struct cgroup_subsys_state __rcu *css;
-   /*
-* ID of this css.
-*/
-   unsigned short id;
-   /*
-* Depth in hierarchy which this ID belongs to.
-*/
-   unsigned short depth;
-   /*
-* ID is freed by RCU. (and lookup routine is RCU safe.)
-*/
-   struct rcu_head rcu_head;
-   /*
-* Hierarchy of CSS ID belongs to.
-*/
-   unsigned short stack[0]; /* Array of Length (depth+1) */
-};
-
-/*
  * cgroup_event represents events which userspace want to receive.
  */
 struct cgroup_event {
@@ -364,9 +332,6 @@ struct cgrp_cset_link {
 static struct css_set init_css_set;
 static struct cgrp_cset_link init_cgrp_cset_link;
 
-static int cgroup_init_idr(struct cgroup_subsys *ss,
-  struct cgroup_subsys_state *css);
-
 /* css_set_lock protects the list of css_set objects, and the
  * chain of tasks off each css_set.  Nests outside task-alloc_lock
  * due to cgroup_iter_start() */
@@ -814,9 +779,6 @@ static struct backing_dev_info cgroup_backing_dev_info = {
.capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
 
-static int alloc_css_id(struct cgroup_subsys *ss,
-   struct cgroup *parent, struct cgroup *child);
-
 static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
 {
struct inode *inode = new_inode(sb);
@@ -4136,20 +4098,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp, 
unsigned long subsys_mask)
}
}
 
-   /* This cgroup is ready now

[PATCH v5 3/5] memcg: fail to create cgroup if the cgroup id is too big

2013-08-07 Thread Li Zefan
memcg requires the cgroup id to be smaller than 65536.

This is a preparation to kill css id.

Signed-off-by: Li Zefan lize...@huawei.com
Acked-by: Michal Hocko mho...@suse.cz
---
 mm/memcontrol.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3189f70..c0bb83c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -503,6 +503,12 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup 
*memcg)
return (memcg == root_mem_cgroup);
 }
 
+/*
+ * We restrict the id in the range of [1, 65535], so it can fit into
+ * an unsigned short.
+ */
+#define MEM_CGROUP_ID_MAX  USHRT_MAX
+
 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
/*
@@ -6056,6 +6062,9 @@ mem_cgroup_css_alloc(struct cgroup *cont)
long error = -ENOMEM;
int node;
 
+   if (cont-id  MEM_CGROUP_ID_MAX)
+   return ERR_PTR(-ENOSPC);
+
memcg = mem_cgroup_alloc();
if (!memcg)
return ERR_PTR(error);
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v5 4/5] memcg: stop using css id

2013-08-07 Thread Li Zefan
Now memcg uses cgroup id instead of css id. Update some comments and
set mem_cgroup_subsys-use_id to 0.

Signed-off-by: Li Zefan lize...@huawei.com
Acked-by: Michal Hocko mho...@suse.cz
---
 mm/memcontrol.c | 24 
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c0bb83c..1864ba7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -599,16 +599,11 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
 #ifdef CONFIG_MEMCG_KMEM
 /*
  * This will be the memcg's index in each cache's -memcg_params-memcg_caches.
- * There are two main reasons for not using the css_id for this:
- *  1) this works better in sparse environments, where we have a lot of memcgs,
- * but only a few kmem-limited. Or also, if we have, for instance, 200
- * memcgs, and none but the 200th is kmem-limited, we'd have to have a
- * 200 entry array for that.
- *
- *  2) In order not to violate the cgroup API, we would like to do all memory
- * allocation in -create(). At that point, we haven't yet allocated the
- * css_id. Having a separate index prevents us from messing with the cgroup
- * core for this
+ * The main reason for not using cgroup id for this:
+ *  this works better in sparse environments, where we have a lot of memcgs,
+ *  but only a few kmem-limited. Or also, if we have, for instance, 200
+ *  memcgs, and none but the 200th is kmem-limited, we'd have to have a
+ *  200 entry array for that.
  *
  * The current size of the caches array is stored in
  * memcg_limited_groups_array_size.  It will double each time we have to
@@ -623,14 +618,14 @@ int memcg_limited_groups_array_size;
  * cgroups is a reasonable guess. In the future, it could be a parameter or
  * tunable, but that is strictly not necessary.
  *
- * MAX_SIZE should be as large as the number of css_ids. Ideally, we could get
+ * MAX_SIZE should be as large as the number of cgrp_ids. Ideally, we could get
  * this constant directly from cgroup, but it is understandable that this is
  * better kept as an internal representation in cgroup.c. In any case, the
- * css_id space is not getting any smaller, and we don't have to necessarily
+ * cgrp_id space is not getting any smaller, and we don't have to necessarily
  * increase ours as well if it increases.
  */
 #define MEMCG_CACHES_MIN_SIZE 4
-#define MEMCG_CACHES_MAX_SIZE 65535
+#define MEMCG_CACHES_MAX_SIZE MEM_CGROUP_ID_MAX
 
 /*
  * A lot of the calls to the cache allocation functions are expected to be
@@ -6019,8 +6014,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
int node;
size_t size = memcg_size();
 
-   free_css_id(mem_cgroup_subsys, memcg-css);
-
for_each_node(node)
free_mem_cgroup_per_zone_info(memcg, node);
 
@@ -6802,7 +6795,6 @@ struct cgroup_subsys mem_cgroup_subsys = {
.bind = mem_cgroup_bind,
.base_cftypes = mem_cgroup_files,
.early_init = 0,
-   .use_id = 1,
 };
 
 #ifdef CONFIG_MEMCG_SWAP
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCHSET cgroup/for-3.12] cgroup: make cgroup_event specific to memcg

2013-08-07 Thread Li Zefan
 If somebody needs a notification interface (and there is no one available
 right now) then you cannot prevent from such a pointless work anyway...
 
 I'm gonna add one for freezer state transitions.  It'll be simple
 this file changed thing and will probably apply that to at least oom
 and vmpressure.  I'm relatively confident that it's gonna be pretty
 simple and that's gonna be the cgroup event mechanism.
 

I would like to see this happen. I have a feeling that we're deprecating
features a bit aggressively without providing alternatives.

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: stable: a59f4e079d19464eebb9b06513a1d4f55fdae5ba needs backport ?

2013-08-07 Thread Li Zefan
Hi Greg,

Could you queue this commit for 3.0 and 3.4? It has been acked by Ingo.

On 2013/8/2 19:14, Ingo Molnar wrote:
 
 * Li Zefan lize...@huawei.com wrote:
 
 commit a59f4e079d19464eebb9b06513a1d4f55fdae5ba
 Author: Zhu Yanhai gaoyang@taobao.com
 Date:   Tue Jan 8 12:56:52 2013 +0800

 sched: Fix the broken sched_rr_get_interval()

 Without this patch, syscall sched_rr_get_interval() can return wrong
 value, and the bug was introduced in 2.6.24, so this looks like a
 candidate for stable kernel.

 The concern for backporting is that this might break some userspace
 programs? As the changelog says:

 [ Since this is an ABI and an old bug, we'll test this via a
   slow upstream route, to hopefully discover any app breakage. ]
 
 It appears to have caused no trouble.
 
 So I'm fine with backporting it to older -stable kernels:
 
 Acked-by: Ingo Molnar mi...@kernel.org
 
 Thanks,
 
   Ingo
 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl_binary.c: improve the usage of return value 'result'

2013-08-06 Thread Li Zefan
>> The right answer to the code is to config it out and then you don't have
>> to worry about it one way or another.
>>
> 
> Pardon?
> 
> Excuse me, my English is not quite well, I don't quite understand your
> meaning, could you please repeat again in details or say more clearly ?
> 
> 
>> The sysctl binary path has never been properly maintained and I don't
>> intend to start.   But I will spend 5 minutes to say this patch seems to
>> make the code worse not better.
>>
> 
> I guess no one ever invited you to maintain this file (for just as you
> said, this file will be removed), so don't worry about it.
> 
> Hmm... do you mean you spend 5 minutes to get a conclusion ? if so,
> better not use word 'seems' which is not a suitable word appeared in
> the results, proofs or conclusions.
> 

To be honest...

You are too bad in english to do kernel development. You don't seem to
know how to communicate in english...

And people easily get frustrated or even pissed off when discussing with
you. That's why tglx descided to put your emails into /dev/null...

> At least, one conclusion is: this patch switches from old-style to
> new-style, not for optimization.
> 
> But for sysctl_getname() and "checkpatch.pl" of this patch, better to
> get more discussion.
> 
> Is it OK ?

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kernel/sysctl_binary.c: improve the usage of return value 'result'

2013-08-06 Thread Li Zefan
 The right answer to the code is to config it out and then you don't have
 to worry about it one way or another.

 
 Pardon?
 
 Excuse me, my English is not quite well, I don't quite understand your
 meaning, could you please repeat again in details or say more clearly ?
 
 
 The sysctl binary path has never been properly maintained and I don't
 intend to start.   But I will spend 5 minutes to say this patch seems to
 make the code worse not better.

 
 I guess no one ever invited you to maintain this file (for just as you
 said, this file will be removed), so don't worry about it.
 
 Hmm... do you mean you spend 5 minutes to get a conclusion ? if so,
 better not use word 'seems' which is not a suitable word appeared in
 the results, proofs or conclusions.
 

To be honest...

You are too bad in english to do kernel development. You don't seem to
know how to communicate in english...

And people easily get frustrated or even pissed off when discussing with
you. That's why tglx descided to put your emails into /dev/null...

 At least, one conclusion is: this patch switches from old-style to
 new-style, not for optimization.
 
 But for sysctl_getname() and checkpatch.pl of this patch, better to
 get more discussion.
 
 Is it OK ?

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/5] cgroup, memcg: move cgroup_event implementation to memcg

2013-08-05 Thread Li Zefan
On 2013/8/6 10:02, Li Zefan wrote:
>>  static struct cftype mem_cgroup_files[] = {
>>  {
>>  .name = "usage_in_bytes",
>> @@ -5973,6 +6192,12 @@ static struct cftype mem_cgroup_files[] = {
>>  .read_u64 = mem_cgroup_hierarchy_read,
>>  },
>>  {
>> +.name = "cgroup.event_control",
>> +.write_string = cgroup_write_event_control,
>> +.flags = CFTYPE_NO_PREFIX,
>> +.mode = S_IWUGO,
>> +},
> 
> One of the misdesign of cgroup eventfd is, cgroup.event_control is
> totally redunant...
> 

ok. write_string() is needed to accept arguments and pass them to
the event register function. still not good.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/5] cgroup, memcg: move cgroup_event implementation to memcg

2013-08-05 Thread Li Zefan
>  static struct cftype mem_cgroup_files[] = {
>   {
>   .name = "usage_in_bytes",
> @@ -5973,6 +6192,12 @@ static struct cftype mem_cgroup_files[] = {
>   .read_u64 = mem_cgroup_hierarchy_read,
>   },
>   {
> + .name = "cgroup.event_control",
> + .write_string = cgroup_write_event_control,
> + .flags = CFTYPE_NO_PREFIX,
> + .mode = S_IWUGO,
> + },

One of the misdesign of cgroup eventfd is, cgroup.event_control is
totally redunant...

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


stable: please queue commit 0231bb5 ("perf: Fix event group context move")

2013-08-05 Thread Li Zefan
Plese consider adding commit 0231bb5336758426b44ccd798ccd3c5419c95d58
to stable tree. It fixes a real bug, which was explained here:

http://thread.gmane.org/gmane.linux.kernel.perf.user/1144

and I've managed to reproduce the bug in 3.4 kernel with the test
program attached in the above email, and I've confirmed the bug fix.

The effect of the bug is, if we open a group of mixed events(sw/hw)
and the "disabled" flag is not set and sw events come first, then
some of the perf events won't be started after the call to perf_event_open().

The bug fix should be applicable to kernels >= 3.0 && <= 3.7.

--
Li Zefan
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


stable: please queue commit 0231bb5 (perf: Fix event group context move)

2013-08-05 Thread Li Zefan
Plese consider adding commit 0231bb5336758426b44ccd798ccd3c5419c95d58
to stable tree. It fixes a real bug, which was explained here:

http://thread.gmane.org/gmane.linux.kernel.perf.user/1144

and I've managed to reproduce the bug in 3.4 kernel with the test
program attached in the above email, and I've confirmed the bug fix.

The effect of the bug is, if we open a group of mixed events(sw/hw)
and the disabled flag is not set and sw events come first, then
some of the perf events won't be started after the call to perf_event_open().

The bug fix should be applicable to kernels = 3.0  = 3.7.

--
Li Zefan
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/5] cgroup, memcg: move cgroup_event implementation to memcg

2013-08-05 Thread Li Zefan
  static struct cftype mem_cgroup_files[] = {
   {
   .name = usage_in_bytes,
 @@ -5973,6 +6192,12 @@ static struct cftype mem_cgroup_files[] = {
   .read_u64 = mem_cgroup_hierarchy_read,
   },
   {
 + .name = cgroup.event_control,
 + .write_string = cgroup_write_event_control,
 + .flags = CFTYPE_NO_PREFIX,
 + .mode = S_IWUGO,
 + },

One of the misdesign of cgroup eventfd is, cgroup.event_control is
totally redunant...

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/5] cgroup, memcg: move cgroup_event implementation to memcg

2013-08-05 Thread Li Zefan
On 2013/8/6 10:02, Li Zefan wrote:
  static struct cftype mem_cgroup_files[] = {
  {
  .name = usage_in_bytes,
 @@ -5973,6 +6192,12 @@ static struct cftype mem_cgroup_files[] = {
  .read_u64 = mem_cgroup_hierarchy_read,
  },
  {
 +.name = cgroup.event_control,
 +.write_string = cgroup_write_event_control,
 +.flags = CFTYPE_NO_PREFIX,
 +.mode = S_IWUGO,
 +},
 
 One of the misdesign of cgroup eventfd is, cgroup.event_control is
 totally redunant...
 

ok. write_string() is needed to accept arguments and pass them to
the event register function. still not good.

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH cgroup/for-3.12] cgroup: make css_for_each_descendant() and friends include the origin css in the iteration

2013-08-04 Thread Li Zefan
> Previously, all css descendant iterators didn't include the origin
> (root of subtree) css in the iteration.  The reasons were maintaining
> consistency with css_for_each_child() and that at the time of
> introduction more use cases needed skipping the origin anyway;
> however, given that css_is_descendant() considers self to be a
> descendant, omitting the origin css has become more confusing and
> looking at the accumulated use cases rather clearly indicates that
> including origin would result in simpler code overall.
> 
> While this is a change which can easily lead to subtle bugs, cgroup
> API including the iterators has recently gone through major
> restructuring and no out-of-tree changes will be applicable without
> adjustments making this a relatively acceptable opportunity for this
> type of change.
> 
> The conversions are mostly straight-forward.  If the iteration block
> had explicit origin handling before or after, it's moved inside the
> iteration.  If not, if (pos == origin) continue; is added.  Some
> conversions add extra reference get/put around origin handling by
> consolidating origin handling and the rest.  While the extra ref
> operations aren't strictly necessary, this shouldn't cause any
> noticeable difference.
> 
> Signed-off-by: Tejun Heo 
> Cc: Li Zefan 
> Cc: Jens Axboe 
> Cc: Vivek Goyal 
> Cc: Matt Helsley 
> Cc: Johannes Weiner 
> Cc: Michal Hocko 
> Cc: Balbir Singh 
> Cc: Aristeu Rozanski 

I was wondering this would be better when I converted cgroup_cfts_commits()
to use this iterator.

Acked-by: Li Zefan 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/5] memcg: rename cgroup_event to mem_cgroup_event

2013-08-04 Thread Li Zefan
On 2013/8/5 0:07, Tejun Heo wrote:
> cgroup_event is only available in memcg now.  Let's brand it that way.
> While at it, add a comment encouraging deprecation of the feature and
> remove the respective section from cgroup documentation.
> 
> This patch is cosmetic.
> 
> Signed-off-by: Tejun Heo 
> ---
>  Documentation/cgroups/cgroups.txt | 19 -
>  mm/memcontrol.c   | 57 
> +--
>  2 files changed, 37 insertions(+), 39 deletions(-)
> 
> diff --git a/Documentation/cgroups/cgroups.txt 
> b/Documentation/cgroups/cgroups.txt
> index 638bf17..ca5aee9 100644
> --- a/Documentation/cgroups/cgroups.txt
> +++ b/Documentation/cgroups/cgroups.txt
> @@ -472,25 +472,6 @@ you give a subsystem a name.
>  The name of the subsystem appears as part of the hierarchy description
>  in /proc/mounts and /proc//cgroups.
> 

2. Usage Examples and Syntax
  2.1 Basic Usage
  2.2 Attaching processes
  2.3 Mounting hierarchies by name
  2.4 Notification API

remove the index ?
 
> -2.4 Notification API
> -
> -
> -There is mechanism which allows to get notifications about changing
> -status of a cgroup.
> -
> -To register a new notification handler you need to:
> - - create a file descriptor for event notification using eventfd(2);
> - - open a control file to be monitored (e.g. memory.usage_in_bytes);
> - - write "  " to cgroup.event_control.
> -   Interpretation of args is defined by control file implementation;
> -
> -eventfd will be woken up by control file implementation or when the
> -cgroup is removed.
> -
> -To unregister a notification handler just close eventfd.
> -
> -NOTE: Support of notifications should be implemented for the control
> -file. See documentation for the subsystem.
>  

Why not move this section to Documentation/cgroups/memory.txt?

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3/5] cgroup, memcg: move cgroup_event implementation to memcg

2013-08-04 Thread Li Zefan
于 2013/8/5 0:07, Tejun Heo 写道:
> cgroup_event is way over-designed and tries to build a generic
> flexible event mechanism into cgroup - fully customizable event
> specification for each user of the interface.  This is utterly
> unnecessary and overboard especially in the light of the planned
> unified hierarchy as there's gonna be single agent.  Simply generating
> events at fixed points, or if that's too restrictive, configureable
> cadence or single set of configureable points should be enough.
> 
> Thankfully, memcg is the only user and gets to keep it.  Replacing it
> with something simpler on sane_behavior is strongly recommended.
> 
> This patch moves cgroup_event and "cgroup.event_control"
> implementation to mm/memcontrol.c.  Clearing of events on cgroup
> destruction is moved from cgroup_destroy_locked() to
> mem_cgroup_css_offline(), which shouldn't make any noticeable
> difference.
> 
> Note that "cgroup.event_control" will now exist only on the hierarchy
> with memcg attached to it.  While this change is visible to userland,
> it is unlikely to be noticeable as the file has never been meaningful
> outside memcg.
> 
> Signed-off-by: Tejun Heo 
> Cc: Johannes Weiner 
> Cc: Michal Hocko 
> Cc: Balbir Singh 
> ---
>  kernel/cgroup.c | 237 ---
>  mm/memcontrol.c | 238 
> 
>  2 files changed, 238 insertions(+), 237 deletions(-)
> 

init/Kconfig needs to be updated too:

menuconfig CGROUPS
boolean "Control Group support"
depends on EVENTFD
...
config SCHED_AUTOGROUP
bool "Automatic process group scheduling"
select EVENTFD
select CGROUPS

> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 2583b7b..a0b5e22 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -56,7 +56,6 @@
>  #include 
>  #include 
>  #include  /* TODO: replace with more sophisticated array */
> -#include 
>  #include 

poll.h also can be removed.

>  #include  /* used in cgroup_attach_task */
>  #include 
> @@ -154,36 +153,6 @@ struct css_id {
>   unsigned short stack[0]; /* Array of Length (depth+1) */
>  };
>  

[...]

>  
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 2885e3e..3700b65 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c

#include 
#include 

> @@ -239,6 +239,36 @@ struct mem_cgroup_eventfd_list {
>   struct eventfd_ctx *eventfd;
>  };


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/5] cgroup: export __cgroup_from_dentry() and __cgroup_dput()

2013-08-04 Thread Li Zefan
> +struct cgroup *__cgroup_from_dentry(struct dentry *dentry, struct cftype 
> **cftp)
>  {
> - if (file_inode(file)->i_fop != _file_operations)
> - return ERR_PTR(-EINVAL);
> - return __d_cft(file->f_dentry);
> + if (!dentry->d_inode ||
> + dentry->d_inode->i_op != _file_inode_operations)
> + return NULL;
> +
> + if (cftp)
> + *cftp = __d_cft(dentry);
> + return __d_cgrp(dentry->d_parent);
>  }
> +EXPORT_SYMBOL_GPL(__cgroup_from_dentry);

As we don't expect new users, why export this symbol? memcg can't be
built as a module.

>  
>  static int cgroup_create_file(struct dentry *dentry, umode_t mode,
>   struct super_block *sb)
> @@ -3953,7 +3956,7 @@ static int cgroup_write_notify_on_release(struct 
> cgroup_subsys_state *css,
>   *
>   * That's why we hold a reference before dput() and drop it right after.
>   */
> -static void cgroup_dput(struct cgroup *cgrp)
> +void __cgroup_dput(struct cgroup *cgrp)
>  {
>   struct super_block *sb = cgrp->root->sb;
>  
> @@ -3961,6 +3964,7 @@ static void cgroup_dput(struct cgroup *cgrp)
>   dput(cgrp->dentry);
>   deactivate_super(sb);
>  }
> +EXPORT_SYMBOL_GPL(__cgroup_dput);

ditto

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/5] cgroup: export __cgroup_from_dentry() and __cgroup_dput()

2013-08-04 Thread Li Zefan
 +struct cgroup *__cgroup_from_dentry(struct dentry *dentry, struct cftype 
 **cftp)
  {
 - if (file_inode(file)-i_fop != cgroup_file_operations)
 - return ERR_PTR(-EINVAL);
 - return __d_cft(file-f_dentry);
 + if (!dentry-d_inode ||
 + dentry-d_inode-i_op != cgroup_file_inode_operations)
 + return NULL;
 +
 + if (cftp)
 + *cftp = __d_cft(dentry);
 + return __d_cgrp(dentry-d_parent);
  }
 +EXPORT_SYMBOL_GPL(__cgroup_from_dentry);

As we don't expect new users, why export this symbol? memcg can't be
built as a module.

  
  static int cgroup_create_file(struct dentry *dentry, umode_t mode,
   struct super_block *sb)
 @@ -3953,7 +3956,7 @@ static int cgroup_write_notify_on_release(struct 
 cgroup_subsys_state *css,
   *
   * That's why we hold a reference before dput() and drop it right after.
   */
 -static void cgroup_dput(struct cgroup *cgrp)
 +void __cgroup_dput(struct cgroup *cgrp)
  {
   struct super_block *sb = cgrp-root-sb;
  
 @@ -3961,6 +3964,7 @@ static void cgroup_dput(struct cgroup *cgrp)
   dput(cgrp-dentry);
   deactivate_super(sb);
  }
 +EXPORT_SYMBOL_GPL(__cgroup_dput);

ditto

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3/5] cgroup, memcg: move cgroup_event implementation to memcg

2013-08-04 Thread Li Zefan
于 2013/8/5 0:07, Tejun Heo 写道:
 cgroup_event is way over-designed and tries to build a generic
 flexible event mechanism into cgroup - fully customizable event
 specification for each user of the interface.  This is utterly
 unnecessary and overboard especially in the light of the planned
 unified hierarchy as there's gonna be single agent.  Simply generating
 events at fixed points, or if that's too restrictive, configureable
 cadence or single set of configureable points should be enough.
 
 Thankfully, memcg is the only user and gets to keep it.  Replacing it
 with something simpler on sane_behavior is strongly recommended.
 
 This patch moves cgroup_event and cgroup.event_control
 implementation to mm/memcontrol.c.  Clearing of events on cgroup
 destruction is moved from cgroup_destroy_locked() to
 mem_cgroup_css_offline(), which shouldn't make any noticeable
 difference.
 
 Note that cgroup.event_control will now exist only on the hierarchy
 with memcg attached to it.  While this change is visible to userland,
 it is unlikely to be noticeable as the file has never been meaningful
 outside memcg.
 
 Signed-off-by: Tejun Heo t...@kernel.org
 Cc: Johannes Weiner han...@cmpxchg.org
 Cc: Michal Hocko mho...@suse.cz
 Cc: Balbir Singh bsinghar...@gmail.com
 ---
  kernel/cgroup.c | 237 ---
  mm/memcontrol.c | 238 
 
  2 files changed, 238 insertions(+), 237 deletions(-)
 

init/Kconfig needs to be updated too:

menuconfig CGROUPS
boolean Control Group support
depends on EVENTFD
...
config SCHED_AUTOGROUP
bool Automatic process group scheduling
select EVENTFD
select CGROUPS

 diff --git a/kernel/cgroup.c b/kernel/cgroup.c
 index 2583b7b..a0b5e22 100644
 --- a/kernel/cgroup.c
 +++ b/kernel/cgroup.c
 @@ -56,7 +56,6 @@
  #include linux/pid_namespace.h
  #include linux/idr.h
  #include linux/vmalloc.h /* TODO: replace with more sophisticated array */
 -#include linux/eventfd.h
  #include linux/poll.h

poll.h also can be removed.

  #include linux/flex_array.h /* used in cgroup_attach_task */
  #include linux/kthread.h
 @@ -154,36 +153,6 @@ struct css_id {
   unsigned short stack[0]; /* Array of Length (depth+1) */
  };
  

[...]

  
 diff --git a/mm/memcontrol.c b/mm/memcontrol.c
 index 2885e3e..3700b65 100644
 --- a/mm/memcontrol.c
 +++ b/mm/memcontrol.c

#include linux/eventfd.h
#include linux/poll.h

 @@ -239,6 +239,36 @@ struct mem_cgroup_eventfd_list {
   struct eventfd_ctx *eventfd;
  };


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/5] memcg: rename cgroup_event to mem_cgroup_event

2013-08-04 Thread Li Zefan
On 2013/8/5 0:07, Tejun Heo wrote:
 cgroup_event is only available in memcg now.  Let's brand it that way.
 While at it, add a comment encouraging deprecation of the feature and
 remove the respective section from cgroup documentation.
 
 This patch is cosmetic.
 
 Signed-off-by: Tejun Heo t...@kernel.org
 ---
  Documentation/cgroups/cgroups.txt | 19 -
  mm/memcontrol.c   | 57 
 +--
  2 files changed, 37 insertions(+), 39 deletions(-)
 
 diff --git a/Documentation/cgroups/cgroups.txt 
 b/Documentation/cgroups/cgroups.txt
 index 638bf17..ca5aee9 100644
 --- a/Documentation/cgroups/cgroups.txt
 +++ b/Documentation/cgroups/cgroups.txt
 @@ -472,25 +472,6 @@ you give a subsystem a name.
  The name of the subsystem appears as part of the hierarchy description
  in /proc/mounts and /proc/pid/cgroups.
 

2. Usage Examples and Syntax
  2.1 Basic Usage
  2.2 Attaching processes
  2.3 Mounting hierarchies by name
  2.4 Notification API

remove the index ?
 
 -2.4 Notification API
 -
 -
 -There is mechanism which allows to get notifications about changing
 -status of a cgroup.
 -
 -To register a new notification handler you need to:
 - - create a file descriptor for event notification using eventfd(2);
 - - open a control file to be monitored (e.g. memory.usage_in_bytes);
 - - write event_fd control_fd args to cgroup.event_control.
 -   Interpretation of args is defined by control file implementation;
 -
 -eventfd will be woken up by control file implementation or when the
 -cgroup is removed.
 -
 -To unregister a notification handler just close eventfd.
 -
 -NOTE: Support of notifications should be implemented for the control
 -file. See documentation for the subsystem.
  

Why not move this section to Documentation/cgroups/memory.txt?

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH cgroup/for-3.12] cgroup: make css_for_each_descendant() and friends include the origin css in the iteration

2013-08-04 Thread Li Zefan
 Previously, all css descendant iterators didn't include the origin
 (root of subtree) css in the iteration.  The reasons were maintaining
 consistency with css_for_each_child() and that at the time of
 introduction more use cases needed skipping the origin anyway;
 however, given that css_is_descendant() considers self to be a
 descendant, omitting the origin css has become more confusing and
 looking at the accumulated use cases rather clearly indicates that
 including origin would result in simpler code overall.
 
 While this is a change which can easily lead to subtle bugs, cgroup
 API including the iterators has recently gone through major
 restructuring and no out-of-tree changes will be applicable without
 adjustments making this a relatively acceptable opportunity for this
 type of change.
 
 The conversions are mostly straight-forward.  If the iteration block
 had explicit origin handling before or after, it's moved inside the
 iteration.  If not, if (pos == origin) continue; is added.  Some
 conversions add extra reference get/put around origin handling by
 consolidating origin handling and the rest.  While the extra ref
 operations aren't strictly necessary, this shouldn't cause any
 noticeable difference.
 
 Signed-off-by: Tejun Heo t...@kernel.org
 Cc: Li Zefan lize...@huawei.com
 Cc: Jens Axboe ax...@kernel.dk
 Cc: Vivek Goyal vgo...@redhat.com
 Cc: Matt Helsley matth...@us.ibm.com
 Cc: Johannes Weiner han...@cmpxchg.org
 Cc: Michal Hocko mho...@suse.cz
 Cc: Balbir Singh bsinghar...@gmail.com
 Cc: Aristeu Rozanski a...@redhat.com

I was wondering this would be better when I converted cgroup_cfts_commits()
to use this iterator.

Acked-by: Li Zefan lize...@huawei.com

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


stable: a59f4e079d19464eebb9b06513a1d4f55fdae5ba needs backport ?

2013-08-02 Thread Li Zefan
commit a59f4e079d19464eebb9b06513a1d4f55fdae5ba
Author: Zhu Yanhai 
Date:   Tue Jan 8 12:56:52 2013 +0800

sched: Fix the broken sched_rr_get_interval()

Without this patch, syscall sched_rr_get_interval() can return wrong
value, and the bug was introduced in 2.6.24, so this looks like a
candidate for stable kernel.

The concern for backporting is that this might break some userspace
programs? As the changelog says:

[ Since this is an ABI and an old bug, we'll test this via a
  slow upstream route, to hopefully discover any app breakage. ]
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [ 146/184] softirq: reduce latencies

2013-08-02 Thread Li Zefan
Cc: Ben Greear
Cc: Tejun

Hi Willy,

This patch introduced a bug, which was then fixed by commit 34376a50fb1f
("Fix lockup related to stop_machine being stuck in __do_softirq."),
do we need this fix for 2.6.32 ?

On 2013/6/5 1:23, Willy Tarreau wrote:
> 2.6.32-longterm review patch.  If anyone has any objections, please let me 
> know.
> 
> --
> 
> From: Eric Dumazet 
> 
> In various network workloads, __do_softirq() latencies can be up
> to 20 ms if HZ=1000, and 200 ms if HZ=100.
> 
> This is because we iterate 10 times in the softirq dispatcher,
> and some actions can consume a lot of cycles.
> 
> This patch changes the fallback to ksoftirqd condition to :
> 
> - A time limit of 2 ms.
> - need_resched() being set on current task
> 
> When one of this condition is met, we wakeup ksoftirqd for further
> softirq processing if we still have pending softirqs.
> 
> Using need_resched() as the only condition can trigger RCU stalls,
> as we can keep BH disabled for too long.
> 
> I ran several benchmarks and got no significant difference in
> throughput, but a very significant reduction of latencies (one order
> of magnitude) :
> 
> In following bench, 200 antagonist "netperf -t TCP_RR" are started in
> background, using all available cpus.
> 
> Then we start one "netperf -t TCP_RR", bound to the cpu handling the NIC
> IRQ (hard+soft)
> 
> Before patch :
> 
> RT_LATENCY,MIN_LATENCY,MAX_LATENCY,P50_LATENCY,P90_LATENCY,P99_LATENCY,MEAN_LATENCY,STDDEV_LATENCY
> MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
> to 7.7.7.84 () port 0 AF_INET : first burst 0 : cpu bind
> RT_LATENCY=550110.424
> MIN_LATENCY=146858
> MAX_LATENCY=997109
> P50_LATENCY=305000
> P90_LATENCY=55
> P99_LATENCY=71
> MEAN_LATENCY=376989.12
> STDDEV_LATENCY=184046.92
> 
> After patch :
> 
> RT_LATENCY,MIN_LATENCY,MAX_LATENCY,P50_LATENCY,P90_LATENCY,P99_LATENCY,MEAN_LATENCY,STDDEV_LATENCY
> MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
> to 7.7.7.84 () port 0 AF_INET : first burst 0 : cpu bind
> RT_LATENCY=40545.492
> MIN_LATENCY=9834
> MAX_LATENCY=78366
> P50_LATENCY=33583
> P90_LATENCY=59000
> P99_LATENCY=69000
> MEAN_LATENCY=38364.67
> STDDEV_LATENCY=12865.26
> 
> Signed-off-by: Eric Dumazet 
> Cc: David Miller 
> Cc: Tom Herbert 
> Cc: Ben Hutchings 
> Signed-off-by: David S. Miller 
> (cherry picked from commit c10d73671ad30f54692f7f69f0e09e75d3a8926a)
> Signed-off-by: Willy Tarreau 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [ 146/184] softirq: reduce latencies

2013-08-02 Thread Li Zefan
Cc: Ben Greear
Cc: Tejun

Hi Willy,

This patch introduced a bug, which was then fixed by commit 34376a50fb1f
(Fix lockup related to stop_machine being stuck in __do_softirq.),
do we need this fix for 2.6.32 ?

On 2013/6/5 1:23, Willy Tarreau wrote:
 2.6.32-longterm review patch.  If anyone has any objections, please let me 
 know.
 
 --
 
 From: Eric Dumazet eduma...@google.com
 
 In various network workloads, __do_softirq() latencies can be up
 to 20 ms if HZ=1000, and 200 ms if HZ=100.
 
 This is because we iterate 10 times in the softirq dispatcher,
 and some actions can consume a lot of cycles.
 
 This patch changes the fallback to ksoftirqd condition to :
 
 - A time limit of 2 ms.
 - need_resched() being set on current task
 
 When one of this condition is met, we wakeup ksoftirqd for further
 softirq processing if we still have pending softirqs.
 
 Using need_resched() as the only condition can trigger RCU stalls,
 as we can keep BH disabled for too long.
 
 I ran several benchmarks and got no significant difference in
 throughput, but a very significant reduction of latencies (one order
 of magnitude) :
 
 In following bench, 200 antagonist netperf -t TCP_RR are started in
 background, using all available cpus.
 
 Then we start one netperf -t TCP_RR, bound to the cpu handling the NIC
 IRQ (hard+soft)
 
 Before patch :
 
 RT_LATENCY,MIN_LATENCY,MAX_LATENCY,P50_LATENCY,P90_LATENCY,P99_LATENCY,MEAN_LATENCY,STDDEV_LATENCY
 MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
 to 7.7.7.84 () port 0 AF_INET : first burst 0 : cpu bind
 RT_LATENCY=550110.424
 MIN_LATENCY=146858
 MAX_LATENCY=997109
 P50_LATENCY=305000
 P90_LATENCY=55
 P99_LATENCY=71
 MEAN_LATENCY=376989.12
 STDDEV_LATENCY=184046.92
 
 After patch :
 
 RT_LATENCY,MIN_LATENCY,MAX_LATENCY,P50_LATENCY,P90_LATENCY,P99_LATENCY,MEAN_LATENCY,STDDEV_LATENCY
 MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET
 to 7.7.7.84 () port 0 AF_INET : first burst 0 : cpu bind
 RT_LATENCY=40545.492
 MIN_LATENCY=9834
 MAX_LATENCY=78366
 P50_LATENCY=33583
 P90_LATENCY=59000
 P99_LATENCY=69000
 MEAN_LATENCY=38364.67
 STDDEV_LATENCY=12865.26
 
 Signed-off-by: Eric Dumazet eduma...@google.com
 Cc: David Miller da...@davemloft.net
 Cc: Tom Herbert therb...@google.com
 Cc: Ben Hutchings bhutchi...@solarflare.com
 Signed-off-by: David S. Miller da...@davemloft.net
 (cherry picked from commit c10d73671ad30f54692f7f69f0e09e75d3a8926a)
 Signed-off-by: Willy Tarreau w...@1wt.eu

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


stable: a59f4e079d19464eebb9b06513a1d4f55fdae5ba needs backport ?

2013-08-02 Thread Li Zefan
commit a59f4e079d19464eebb9b06513a1d4f55fdae5ba
Author: Zhu Yanhai gaoyang@taobao.com
Date:   Tue Jan 8 12:56:52 2013 +0800

sched: Fix the broken sched_rr_get_interval()

Without this patch, syscall sched_rr_get_interval() can return wrong
value, and the bug was introduced in 2.6.24, so this looks like a
candidate for stable kernel.

The concern for backporting is that this might break some userspace
programs? As the changelog says:

[ Since this is an ABI and an old bug, we'll test this via a
  slow upstream route, to hopefully discover any app breakage. ]
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 21/23] cgroup: make cftype->[un]register_event() deal with cgroup_subsys_state instead of cgroup

2013-08-01 Thread Li Zefan
> @@ -506,15 +506,17 @@ struct cftype {
>* you want to provide this functionality. Use eventfd_signal()
>* on eventfd to send notification to userspace.
>*/
> - int (*register_event)(struct cgroup *cgrp, struct cftype *cft,
> - struct eventfd_ctx *eventfd, const char *args);
> + int (*register_event)(struct cgroup_subsys_state *css,
> +   struct cftype *cft, struct eventfd_ctx *eventfd,
> +   const char *args);
>   /*
>* unregister_event() callback will be called when userspace
>* closes the eventfd or on cgroup removing.
>* This callback must be implemented, if you want provide
>* notification functionality.
>*/
> - void (*unregister_event)(struct cgroup *cgrp, struct cftype *cft,
> + void (*unregister_event)(struct cgroup_subsys_state *css,
> +  struct cftype *cft,
>   struct eventfd_ctx *eventfd);

align this line?

>  };

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 08/23] cgroup: pass around cgroup_subsys_state instead of cgroup in subsystem methods

2013-08-01 Thread Li Zefan
> @@ -4199,12 +4208,13 @@ static void init_cgroup_css(struct 
> cgroup_subsys_state *css,
>  /* invoke ->css_online() on a new CSS and mark it online if successful */
>  static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
>  {
> + struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
>   int ret = 0;
>  
>   lockdep_assert_held(_mutex);
>  
>   if (ss->css_online)
> - ret = ss->css_online(cgrp);
> + ret = ss->css_online(css);
>   if (!ret)
>   cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;

Then this can be changed to css->flags |= CSS_ONLINE.

>   return ret;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 08/23] cgroup: pass around cgroup_subsys_state instead of cgroup in subsystem methods

2013-08-01 Thread Li Zefan
> @@ -4298,7 +4308,7 @@ static long cgroup_create(struct cgroup *parent, struct 
> dentry *dentry,
>   for_each_root_subsys(root, ss) {
>   struct cgroup_subsys_state *css;
>  
> - css = ss->css_alloc(cgrp);
> + css = ss->css_alloc(parent->subsys[ss->subsys_id]);

As this patchset is based on for-3.12 branch, which lacks the fix in for-3.11,
so the css_alloc() in that bug fix is not converted.

>   if (IS_ERR(css)) {
>   err = PTR_ERR(css);
>   goto err_free_all;
> @@ -4377,7 +4387,7 @@ err_free_all:
>  
>   if (css) {
>   percpu_ref_cancel_init(>refcnt);
> - ss->css_free(cgrp);
> + ss->css_free(css);
>   }
>   }
>   mutex_unlock(_mutex);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 16/23] cgroup: relocate cgroup_advance_iter()

2013-08-01 Thread Li Zefan
On 2013/8/2 5:49, Tejun Heo wrote:
> For some reason, cgroup_advance_iter() is standing lonely all away
> from its iter comrades.  Relocate it.
> 

There're some other functions that are in the same situation. Do you
think it's better to relocate them, or just leave it as it is?

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCHSET cgroup/for-3.12] cgroup: use cgroup_subsys_state as the primary subsystem interface handle

2013-08-01 Thread Li Zefan
On 2013/8/2 5:49, Tejun Heo wrote:
> Hello,
> 
> Currently, struct cgroup * is used as the main interface handle
> between cgroup core and its subsystems, which works but is a bit
> clunky because subsystems usually care much more about css's
> (cgroup_subsys_state) a lot more than cgroups, which is natural as a
> css is the intersection between a cgroup and a subsystem.
> 
> In addition to being a bit clunky, dealing with cgroups directly pose
> a bit of trouble for the planned unified hierarchy support on two
> fronts.  First, most iterations become subsystem dependent as task
> membership is affected by which subtree has the specific subsystem
> enabled and thus require specifying which subsystem the iteration is
> for, which is automatically achieved if the interfaces deal with css's
> instead of cgroups.
> 
> Second, as css's may be created, attached, detached and destroyed
> dynamically multiple times across the lifetime of a given cgroup as
> they're enabled and disabled, which makes cgroup -> css mapping much
> more difficult to synchronize.  Giving out cgroup to subsystems and
> then requiring them to take the extra steps to deal with their css's
> coming and going dynamically is a lot more fragile than cgroup core
> proper handling it internally and giving out the resulting css's to
> subsystems.
> 
> So, this patchset converts all cgroup subsystem APIs to deal with
> css's instead of cgroups.  The patchset is fairly large but most of
> the conversions, while being tedious, aren't complex.  At the end of
> series, subsystems no longer make cgroup -> css mapping themselves and
> cgroup_css() - formerly cgroup_subsys_state() - is made internal to
> cgroup core proper.
> 
> This is a rather large update to the interface and likely to play as a
> barrier when porting commits, which is inconvenient but also provides
> an opportunity to clean up the API where we can as doing so won't
> significantly raise the level of inconvenience.  As such, this
> patchset contains some API cleanups and I'll probably follow up with
> further API updates that I've been meaning to do and, if you have some
> good idea to clean up cgroup internal API, this probably is a good
> time to submit it.
> 
> This patchset contains the following 23 patches.
> 
>  0001-cgroup-s-cgroup_subsys_state-cgroup_css-s-task_subsy.patch
>  0002-cpuset-drop-const-qualifiers-from-struct-cpuset-inst.patch
>  0003-netprio_cgroup-pass-around-css-instead-of-cgroup-and.patch
>  0004-hugetlb_cgroup-pass-around-hugetlb_cgroup-instead-of.patch
>  0005-cgroup-add-subsystem-pointer-to-cgroup_subsys_state.patch
>  0006-cgroup-add-update-accessors-which-obtain-subsys-spec.patch
>  0007-cgroup-add-css_parent.patch
>  0008-cgroup-pass-around-cgroup_subsys_state-instead-of-cg.patch
>  0009-cgroup-add-subsys-backlink-pointer-to-cftype.patch
>  0010-cgroup-pin-cgroup_subsys_state-when-opening-a-cgroup.patch
>  0011-cgroup-add-cgroup-dummy_css.patch
>  0012-cgroup-pass-around-cgroup_subsys_state-instead-of-cg.patch
>  0013-cgroup-convert-cgroup_next_sibling-to-cgroup_next_ch.patch
>  0014-cgroup-always-use-cgroup_next_child-to-walk-the-chil.patch
>  0015-cgroup-make-hierarchy-iterators-deal-with-cgroup_sub.patch
>  0016-cgroup-relocate-cgroup_advance_iter.patch
>  0017-cgroup-rename-cgroup_iter-to-cgroup_task_iter.patch
>  0018-cgroup-make-cgroup_task_iter-remember-the-cgroup-bei.patch
>  0019-cgroup-remove-struct-cgroup_scanner.patch
>  0020-cgroup-make-task-iterators-deal-with-cgroup_subsys_s.patch
>  0021-cgroup-make-cftype-un-register_event-deal-with-cgrou.patch
>  0022-cgroup-make-cgroup_taskset-deal-with-cgroup_subsys_s.patch
>  0023-cgroup-unexport-cgroup_css.patch
> 

Looks good to me!

Acked-by: Li Zefan 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCHSET cgroup/for-3.12] cgroup: use cgroup_subsys_state as the primary subsystem interface handle

2013-08-01 Thread Li Zefan
On 2013/8/2 5:49, Tejun Heo wrote:
 Hello,
 
 Currently, struct cgroup * is used as the main interface handle
 between cgroup core and its subsystems, which works but is a bit
 clunky because subsystems usually care much more about css's
 (cgroup_subsys_state) a lot more than cgroups, which is natural as a
 css is the intersection between a cgroup and a subsystem.
 
 In addition to being a bit clunky, dealing with cgroups directly pose
 a bit of trouble for the planned unified hierarchy support on two
 fronts.  First, most iterations become subsystem dependent as task
 membership is affected by which subtree has the specific subsystem
 enabled and thus require specifying which subsystem the iteration is
 for, which is automatically achieved if the interfaces deal with css's
 instead of cgroups.
 
 Second, as css's may be created, attached, detached and destroyed
 dynamically multiple times across the lifetime of a given cgroup as
 they're enabled and disabled, which makes cgroup - css mapping much
 more difficult to synchronize.  Giving out cgroup to subsystems and
 then requiring them to take the extra steps to deal with their css's
 coming and going dynamically is a lot more fragile than cgroup core
 proper handling it internally and giving out the resulting css's to
 subsystems.
 
 So, this patchset converts all cgroup subsystem APIs to deal with
 css's instead of cgroups.  The patchset is fairly large but most of
 the conversions, while being tedious, aren't complex.  At the end of
 series, subsystems no longer make cgroup - css mapping themselves and
 cgroup_css() - formerly cgroup_subsys_state() - is made internal to
 cgroup core proper.
 
 This is a rather large update to the interface and likely to play as a
 barrier when porting commits, which is inconvenient but also provides
 an opportunity to clean up the API where we can as doing so won't
 significantly raise the level of inconvenience.  As such, this
 patchset contains some API cleanups and I'll probably follow up with
 further API updates that I've been meaning to do and, if you have some
 good idea to clean up cgroup internal API, this probably is a good
 time to submit it.
 
 This patchset contains the following 23 patches.
 
  0001-cgroup-s-cgroup_subsys_state-cgroup_css-s-task_subsy.patch
  0002-cpuset-drop-const-qualifiers-from-struct-cpuset-inst.patch
  0003-netprio_cgroup-pass-around-css-instead-of-cgroup-and.patch
  0004-hugetlb_cgroup-pass-around-hugetlb_cgroup-instead-of.patch
  0005-cgroup-add-subsystem-pointer-to-cgroup_subsys_state.patch
  0006-cgroup-add-update-accessors-which-obtain-subsys-spec.patch
  0007-cgroup-add-css_parent.patch
  0008-cgroup-pass-around-cgroup_subsys_state-instead-of-cg.patch
  0009-cgroup-add-subsys-backlink-pointer-to-cftype.patch
  0010-cgroup-pin-cgroup_subsys_state-when-opening-a-cgroup.patch
  0011-cgroup-add-cgroup-dummy_css.patch
  0012-cgroup-pass-around-cgroup_subsys_state-instead-of-cg.patch
  0013-cgroup-convert-cgroup_next_sibling-to-cgroup_next_ch.patch
  0014-cgroup-always-use-cgroup_next_child-to-walk-the-chil.patch
  0015-cgroup-make-hierarchy-iterators-deal-with-cgroup_sub.patch
  0016-cgroup-relocate-cgroup_advance_iter.patch
  0017-cgroup-rename-cgroup_iter-to-cgroup_task_iter.patch
  0018-cgroup-make-cgroup_task_iter-remember-the-cgroup-bei.patch
  0019-cgroup-remove-struct-cgroup_scanner.patch
  0020-cgroup-make-task-iterators-deal-with-cgroup_subsys_s.patch
  0021-cgroup-make-cftype-un-register_event-deal-with-cgrou.patch
  0022-cgroup-make-cgroup_taskset-deal-with-cgroup_subsys_s.patch
  0023-cgroup-unexport-cgroup_css.patch
 

Looks good to me!

Acked-by: Li Zefan lize...@huawei.com

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 16/23] cgroup: relocate cgroup_advance_iter()

2013-08-01 Thread Li Zefan
On 2013/8/2 5:49, Tejun Heo wrote:
 For some reason, cgroup_advance_iter() is standing lonely all away
 from its iter comrades.  Relocate it.
 

There're some other functions that are in the same situation. Do you
think it's better to relocate them, or just leave it as it is?

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 08/23] cgroup: pass around cgroup_subsys_state instead of cgroup in subsystem methods

2013-08-01 Thread Li Zefan
 @@ -4298,7 +4308,7 @@ static long cgroup_create(struct cgroup *parent, struct 
 dentry *dentry,
   for_each_root_subsys(root, ss) {
   struct cgroup_subsys_state *css;
  
 - css = ss-css_alloc(cgrp);
 + css = ss-css_alloc(parent-subsys[ss-subsys_id]);

As this patchset is based on for-3.12 branch, which lacks the fix in for-3.11,
so the css_alloc() in that bug fix is not converted.

   if (IS_ERR(css)) {
   err = PTR_ERR(css);
   goto err_free_all;
 @@ -4377,7 +4387,7 @@ err_free_all:
  
   if (css) {
   percpu_ref_cancel_init(css-refcnt);
 - ss-css_free(cgrp);
 + ss-css_free(css);
   }
   }
   mutex_unlock(cgroup_mutex);

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 08/23] cgroup: pass around cgroup_subsys_state instead of cgroup in subsystem methods

2013-08-01 Thread Li Zefan
 @@ -4199,12 +4208,13 @@ static void init_cgroup_css(struct 
 cgroup_subsys_state *css,
  /* invoke -css_online() on a new CSS and mark it online if successful */
  static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
  {
 + struct cgroup_subsys_state *css = cgrp-subsys[ss-subsys_id];
   int ret = 0;
  
   lockdep_assert_held(cgroup_mutex);
  
   if (ss-css_online)
 - ret = ss-css_online(cgrp);
 + ret = ss-css_online(css);
   if (!ret)
   cgrp-subsys[ss-subsys_id]-flags |= CSS_ONLINE;

Then this can be changed to css-flags |= CSS_ONLINE.

   return ret;

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 21/23] cgroup: make cftype-[un]register_event() deal with cgroup_subsys_state instead of cgroup

2013-08-01 Thread Li Zefan
 @@ -506,15 +506,17 @@ struct cftype {
* you want to provide this functionality. Use eventfd_signal()
* on eventfd to send notification to userspace.
*/
 - int (*register_event)(struct cgroup *cgrp, struct cftype *cft,
 - struct eventfd_ctx *eventfd, const char *args);
 + int (*register_event)(struct cgroup_subsys_state *css,
 +   struct cftype *cft, struct eventfd_ctx *eventfd,
 +   const char *args);
   /*
* unregister_event() callback will be called when userspace
* closes the eventfd or on cgroup removing.
* This callback must be implemented, if you want provide
* notification functionality.
*/
 - void (*unregister_event)(struct cgroup *cgrp, struct cftype *cft,
 + void (*unregister_event)(struct cgroup_subsys_state *css,
 +  struct cftype *cft,
   struct eventfd_ctx *eventfd);

align this line?

  };

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 3/7] cgroup: restructure the failure path in cgroup_write_event_control()

2013-07-31 Thread Li Zefan
It uses a single label and checks the validity of each pointer. This
is err-prone, and actually we had a bug because one of the check was
insufficient.

Use multi lables as we do in other places.

v2:
- drop initializations of local variables.

Signed-off-by: Li Zefan 
---
 kernel/cgroup.c | 39 ++-
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index fb11569..ae905a5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3934,11 +3934,11 @@ static void cgroup_event_ptable_queue_proc(struct file 
*file,
 static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
  const char *buffer)
 {
-   struct cgroup_event *event = NULL;
+   struct cgroup_event *event;
struct cgroup *cgrp_cfile;
unsigned int efd, cfd;
-   struct file *efile = NULL;
-   struct file *cfile = NULL;
+   struct file *efile;
+   struct file *cfile;
char *endp;
int ret;
 
@@ -3964,31 +3964,31 @@ static int cgroup_write_event_control(struct cgroup 
*cgrp, struct cftype *cft,
efile = eventfd_fget(efd);
if (IS_ERR(efile)) {
ret = PTR_ERR(efile);
-   goto fail;
+   goto out_kfree;
}
 
event->eventfd = eventfd_ctx_fileget(efile);
if (IS_ERR(event->eventfd)) {
ret = PTR_ERR(event->eventfd);
-   goto fail;
+   goto out_put_efile;
}
 
cfile = fget(cfd);
if (!cfile) {
ret = -EBADF;
-   goto fail;
+   goto out_put_eventfd;
}
 
/* the process need read permission on control file */
/* AV: shouldn't we check that it's been opened for read instead? */
ret = inode_permission(file_inode(cfile), MAY_READ);
if (ret < 0)
-   goto fail;
+   goto out_put_cfile;
 
event->cft = __file_cft(cfile);
if (IS_ERR(event->cft)) {
ret = PTR_ERR(event->cft);
-   goto fail;
+   goto out_put_cfile;
}
 
/*
@@ -3998,18 +3998,18 @@ static int cgroup_write_event_control(struct cgroup 
*cgrp, struct cftype *cft,
cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent);
if (cgrp_cfile != cgrp) {
ret = -EINVAL;
-   goto fail;
+   goto out_put_cfile;
}
 
if (!event->cft->register_event || !event->cft->unregister_event) {
ret = -EINVAL;
-   goto fail;
+   goto out_put_cfile;
}
 
ret = event->cft->register_event(cgrp, event->cft,
event->eventfd, buffer);
if (ret)
-   goto fail;
+   goto out_put_cfile;
 
efile->f_op->poll(efile, >pt);
 
@@ -4029,16 +4029,13 @@ static int cgroup_write_event_control(struct cgroup 
*cgrp, struct cftype *cft,
 
return 0;
 
-fail:
-   if (cfile)
-   fput(cfile);
-
-   if (event && event->eventfd && !IS_ERR(event->eventfd))
-   eventfd_ctx_put(event->eventfd);
-
-   if (!IS_ERR_OR_NULL(efile))
-   fput(efile);
-
+out_put_cfile:
+   fput(cfile);
+out_put_eventfd:
+   eventfd_ctx_put(event->eventfd);
+out_put_efile:
+   fput(efile);
+out_kfree:
kfree(event);
 
return ret;
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 4/7] cgroup: rename cgroup_pidlist->mutex

2013-07-31 Thread Li Zefan
It's a rw_semaphore not a mutex.

Signed-off-by: Li Zefan 
---
 kernel/cgroup.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ae905a5..6662811 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3436,7 +3436,7 @@ struct cgroup_pidlist {
/* pointer to the cgroup we belong to, for list removal purposes */
struct cgroup *owner;
/* protects the other fields */
-   struct rw_semaphore mutex;
+   struct rw_semaphore rwsem;
 };
 
 /*
@@ -3509,7 +3509,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct 
cgroup *cgrp,
struct pid_namespace *ns = task_active_pid_ns(current);
 
/*
-* We can't drop the pidlist_mutex before taking the l->mutex in case
+* We can't drop the pidlist_mutex before taking the l->rwsem in case
 * the last ref-holder is trying to remove l from the list at the same
 * time. Holding the pidlist_mutex precludes somebody taking whichever
 * list we find out from under us - compare release_pid_array().
@@ -3518,7 +3518,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct 
cgroup *cgrp,
list_for_each_entry(l, >pidlists, links) {
if (l->key.type == type && l->key.ns == ns) {
/* make sure l doesn't vanish out from under us */
-   down_write(>mutex);
+   down_write(>rwsem);
mutex_unlock(>pidlist_mutex);
return l;
}
@@ -3529,8 +3529,8 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct 
cgroup *cgrp,
mutex_unlock(>pidlist_mutex);
return l;
}
-   init_rwsem(>mutex);
-   down_write(>mutex);
+   init_rwsem(>rwsem);
+   down_write(>rwsem);
l->key.type = type;
l->key.ns = get_pid_ns(ns);
l->owner = cgrp;
@@ -3591,7 +3591,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum 
cgroup_filetype type,
l->list = array;
l->length = length;
l->use_count++;
-   up_write(>mutex);
+   up_write(>rwsem);
*lp = l;
return 0;
 }
@@ -3669,7 +3669,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, 
loff_t *pos)
int index = 0, pid = *pos;
int *iter;
 
-   down_read(>mutex);
+   down_read(>rwsem);
if (pid) {
int end = l->length;
 
@@ -3696,7 +3696,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, 
loff_t *pos)
 static void cgroup_pidlist_stop(struct seq_file *s, void *v)
 {
struct cgroup_pidlist *l = s->private;
-   up_read(>mutex);
+   up_read(>rwsem);
 }
 
 static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
@@ -3742,7 +3742,7 @@ static void cgroup_release_pid_array(struct 
cgroup_pidlist *l)
 * pidlist_mutex, we have to take pidlist_mutex first.
 */
mutex_lock(>owner->pidlist_mutex);
-   down_write(>mutex);
+   down_write(>rwsem);
BUG_ON(!l->use_count);
if (!--l->use_count) {
/* we're the last user if refcount is 0; remove and free */
@@ -3750,12 +3750,12 @@ static void cgroup_release_pid_array(struct 
cgroup_pidlist *l)
mutex_unlock(>owner->pidlist_mutex);
pidlist_free(l->list);
put_pid_ns(l->key.ns);
-   up_write(>mutex);
+   up_write(>rwsem);
kfree(l);
return;
}
mutex_unlock(>owner->pidlist_mutex);
-   up_write(>mutex);
+   up_write(>rwsem);
 }
 
 static int cgroup_pidlist_release(struct inode *inode, struct file *file)
-- 
1.8.0.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/7] cgroup: remove struct cgroup_seqfile_state

2013-07-31 Thread Li Zefan
On 2013/7/31 19:35, Tejun Heo wrote:
> On Wed, Jul 31, 2013 at 05:27:39PM +0800, Li Zefan wrote:
>>>  static int cgroup_seqfile_release(struct inode *inode, struct file *file)
>>>  {
>>> -   struct seq_file *seq = file->private_data;
>>> -   kfree(seq->private);
>>> return single_release(inode, file);
>>>  }
>>
>> Oh, I just realized now we can remove cgroup_seqfile_release().
>>
>> Will send out an updated patch...
> 
> Applied the updated patch.  Can you please post updated patches as
> replies to the original path from next time?  That makes things easier
> to track on my side.
> 

Sure.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 5/5] cgroup: remove struct cgroup_seqfile_state

2013-07-31 Thread Li Zefan
On 2013/7/31 19:36, Tejun Heo wrote:
> On Wed, Jul 31, 2013 at 05:36:25PM +0800, Li Zefan wrote:
>> We can use struct cfent instead.
>>
>> v2:
>> - remove cgroup_seqfile_release().
>>
>> Signed-off-by: Li Zefan 
> 
> Applied to libata/for-3.12.
> 

libata? I hope you didn't. :)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 6/7] cgroup: rename current_css_set_cg_links

2013-07-31 Thread Li Zefan
On 2013/7/31 19:38, Tejun Heo wrote:
> Hello,
> 
> On Wed, Jul 31, 2013 at 04:18:07PM +0800, Li Zefan wrote:
>> As we've renamed css_set->cg_links to css_set->cgrp_links (See commit
>> 69d0206c79 "cgroup: bring some sanity to naming around cg_cgroup_link"),
>> it's better to also rename the debug file.
>>
>> As the "debug" cgroup subsystem is for debug only and acts as a sample
>> on how to write a cgroup subsystem, it's fine to change the interface.
> 
> I'd rather not do this.  It doesn't really buy us anything and stuff
> like cgrp_links isn't interesting to actual controllers anyway.
> 

not a big deal

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 5/5] cgroup: remove struct cgroup_seqfile_state

2013-07-31 Thread Li Zefan
We can use struct cfent instead.

v2:
- remove cgroup_seqfile_release().

Signed-off-by: Li Zefan 
---
 kernel/cgroup.c | 45 +
 1 file changed, 13 insertions(+), 32 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0d378b1..c836dff 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2362,11 +2362,6 @@ static ssize_t cgroup_file_read(struct file *file, char 
__user *buf,
  * supports string->u64 maps, but can be extended in future.
  */
 
-struct cgroup_seqfile_state {
-   struct cftype *cft;
-   struct cgroup *cgroup;
-};
-
 static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
 {
struct seq_file *sf = cb->state;
@@ -2375,59 +2370,45 @@ static int cgroup_map_add(struct cgroup_map_cb *cb, 
const char *key, u64 value)
 
 static int cgroup_seqfile_show(struct seq_file *m, void *arg)
 {
-   struct cgroup_seqfile_state *state = m->private;
-   struct cftype *cft = state->cft;
+   struct cfent *cfe = m->private;
+   struct cftype *cft = cfe->type;
+   struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent);
+
if (cft->read_map) {
struct cgroup_map_cb cb = {
.fill = cgroup_map_add,
.state = m,
};
-   return cft->read_map(state->cgroup, cft, );
+   return cft->read_map(cgrp, cft, );
}
-   return cft->read_seq_string(state->cgroup, cft, m);
-}
-
-static int cgroup_seqfile_release(struct inode *inode, struct file *file)
-{
-   struct seq_file *seq = file->private_data;
-   kfree(seq->private);
-   return single_release(inode, file);
+   return cft->read_seq_string(cgrp, cft, m);
 }
 
 static const struct file_operations cgroup_seqfile_operations = {
.read = seq_read,
.write = cgroup_file_write,
.llseek = seq_lseek,
-   .release = cgroup_seqfile_release,
+   .release = single_release,
 };
 
 static int cgroup_file_open(struct inode *inode, struct file *file)
 {
int err;
+   struct cfent *cfe;
struct cftype *cft;
 
err = generic_file_open(inode, file);
if (err)
return err;
-   cft = __d_cft(file->f_dentry);
+   cfe = __d_cfe(file->f_dentry);
+   cft = cfe->type;
 
if (cft->read_map || cft->read_seq_string) {
-   struct cgroup_seqfile_state *state;
-
-   state = kzalloc(sizeof(*state), GFP_USER);
-   if (!state)
-   return -ENOMEM;
-
-   state->cft = cft;
-   state->cgroup = __d_cgrp(file->f_dentry->d_parent);
file->f_op = _seqfile_operations;
-   err = single_open(file, cgroup_seqfile_show, state);
-   if (err < 0)
-   kfree(state);
-   } else if (cft->open)
+   err = single_open(file, cgroup_seqfile_show, cfe);
+   } else if (cft->open) {
err = cft->open(inode, file);
-   else
-   err = 0;
+   }
 
return err;
 }
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/7] cgroup: remove struct cgroup_seqfile_state

2013-07-31 Thread Li Zefan
>  static int cgroup_seqfile_release(struct inode *inode, struct file *file)
>  {
> - struct seq_file *seq = file->private_data;
> - kfree(seq->private);
>   return single_release(inode, file);
>  }

Oh, I just realized now we can remove cgroup_seqfile_release().

Will send out an updated patch...

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/7] some pending cgroup patches

2013-07-31 Thread Li Zefan
The first one is a small bug fix, and the rest are pure cleanups.

Li Zefan (7):
  cgroup: fix a leak when percpu_ref_init() fails
  cgroup: remove sparse tags from offline_css()
  cgroup: restructure the failure path in cgroup_write_event_control()
  cgroup: rename cgroup_pidlist->mutex
  cgroup: remove struct cgroup_seqfile_state
  cgroup: rename current_css_set_cg_links
  cgroup: more naming cleanups
--
 include/linux/cgroup.h |   6 +--
 kernel/cgroup.c| 138 
---
 kernel/cpuset.c|  16 
 3 files changed, 72 insertions(+), 88 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 6/7] cgroup: rename current_css_set_cg_links

2013-07-31 Thread Li Zefan
As we've renamed css_set->cg_links to css_set->cgrp_links (See commit
69d0206c79 "cgroup: bring some sanity to naming around cg_cgroup_link"),
it's better to also rename the debug file.

As the "debug" cgroup subsystem is for debug only and acts as a sample
on how to write a cgroup subsystem, it's fine to change the interface.

Signed-off-by: Li Zefan 
---
 kernel/cgroup.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ac77d87..89d63b0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5312,9 +5312,9 @@ static u64 current_css_set_refcount_read(struct cgroup 
*cgrp,
return count;
 }
 
-static int current_css_set_cg_links_read(struct cgroup *cgrp,
-struct cftype *cft,
-struct seq_file *seq)
+static int current_css_set_cgrp_links_read(struct cgroup *cgrp,
+  struct cftype *cft,
+  struct seq_file *seq)
 {
struct cgrp_cset_link *link;
struct css_set *cset;
@@ -5387,8 +5387,8 @@ static struct cftype debug_files[] =  {
},
 
{
-   .name = "current_css_set_cg_links",
-   .read_seq_string = current_css_set_cg_links_read,
+   .name = "current_css_set_cgrp_links",
+   .read_seq_string = current_css_set_cgrp_links_read,
},
 
{
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 5/7] cgroup: remove struct cgroup_seqfile_state

2013-07-31 Thread Li Zefan
We can use struct struct cfent instead.

Signed-off-by: Li Zefan 
---
 kernel/cgroup.c | 38 --
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0d378b1..ac77d87 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2362,11 +2362,6 @@ static ssize_t cgroup_file_read(struct file *file, char 
__user *buf,
  * supports string->u64 maps, but can be extended in future.
  */
 
-struct cgroup_seqfile_state {
-   struct cftype *cft;
-   struct cgroup *cgroup;
-};
-
 static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
 {
struct seq_file *sf = cb->state;
@@ -2375,22 +2370,22 @@ static int cgroup_map_add(struct cgroup_map_cb *cb, 
const char *key, u64 value)
 
 static int cgroup_seqfile_show(struct seq_file *m, void *arg)
 {
-   struct cgroup_seqfile_state *state = m->private;
-   struct cftype *cft = state->cft;
+   struct cfent *cfe = m->private;
+   struct cftype *cft = cfe->type;
+   struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent);
+
if (cft->read_map) {
struct cgroup_map_cb cb = {
.fill = cgroup_map_add,
.state = m,
};
-   return cft->read_map(state->cgroup, cft, );
+   return cft->read_map(cgrp, cft, );
}
-   return cft->read_seq_string(state->cgroup, cft, m);
+   return cft->read_seq_string(cgrp, cft, m);
 }
 
 static int cgroup_seqfile_release(struct inode *inode, struct file *file)
 {
-   struct seq_file *seq = file->private_data;
-   kfree(seq->private);
return single_release(inode, file);
 }
 
@@ -2404,30 +2399,21 @@ static const struct file_operations 
cgroup_seqfile_operations = {
 static int cgroup_file_open(struct inode *inode, struct file *file)
 {
int err;
+   struct cfent *cfe;
struct cftype *cft;
 
err = generic_file_open(inode, file);
if (err)
return err;
-   cft = __d_cft(file->f_dentry);
+   cfe = __d_cfe(file->f_dentry);
+   cft = cfe->type;
 
if (cft->read_map || cft->read_seq_string) {
-   struct cgroup_seqfile_state *state;
-
-   state = kzalloc(sizeof(*state), GFP_USER);
-   if (!state)
-   return -ENOMEM;
-
-   state->cft = cft;
-   state->cgroup = __d_cgrp(file->f_dentry->d_parent);
file->f_op = _seqfile_operations;
-   err = single_open(file, cgroup_seqfile_show, state);
-   if (err < 0)
-   kfree(state);
-   } else if (cft->open)
+   err = single_open(file, cgroup_seqfile_show, cfe);
+   } else if (cft->open) {
err = cft->open(inode, file);
-   else
-   err = 0;
+   }
 
return err;
 }
-- 
1.8.0.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 7/7] cgroup: more naming cleanups

2013-07-31 Thread Li Zefan
Constantly use @cset for css_set varabbles and use @cgrp as cgroup
variables.

Signed-off-by: Li Zefan 
---
 include/linux/cgroup.h |  6 +++---
 kernel/cgroup.c| 26 +-
 kernel/cpuset.c| 16 
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c07e175..ee048ba 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -400,8 +400,8 @@ struct cgroup_map_cb {
 
 /* cftype->flags */
 enum {
-   CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cg */
-   CFTYPE_NOT_ON_ROOT  = (1 << 1), /* don't create on root cg */
+   CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */
+   CFTYPE_NOT_ON_ROOT  = (1 << 1), /* don't create on root cgrp */
CFTYPE_INSANE   = (1 << 2), /* don't create if 
sane_behavior */
 };
 
@@ -519,7 +519,7 @@ struct cftype_set {
 };
 
 struct cgroup_scanner {
-   struct cgroup *cg;
+   struct cgroup *cgrp;
int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan);
void (*process_task)(struct task_struct *p,
struct cgroup_scanner *scan);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 89d63b0..3729f43 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -431,7 +431,7 @@ static inline void put_css_set_taskexit(struct css_set 
*cset)
  * @new_cgrp: cgroup that's being entered by the task
  * @template: desired set of css pointers in css_set (pre-calculated)
  *
- * Returns true if "cg" matches "old_cg" except for the hierarchy
+ * Returns true if "cset" matches "old_cset" except for the hierarchy
  * which "new_cgrp" belongs to, for which it should match "new_cgrp".
  */
 static bool compare_css_sets(struct css_set *cset,
@@ -1804,7 +1804,7 @@ EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy);
 struct task_and_cgroup {
struct task_struct  *task;
struct cgroup   *cgrp;
-   struct css_set  *cg;
+   struct css_set  *cset;
 };
 
 struct cgroup_taskset {
@@ -2022,8 +2022,8 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct 
task_struct *tsk,
 
tc = flex_array_get(group, i);
old_cset = task_css_set(tc->task);
-   tc->cg = find_css_set(old_cset, cgrp);
-   if (!tc->cg) {
+   tc->cset = find_css_set(old_cset, cgrp);
+   if (!tc->cset) {
retval = -ENOMEM;
goto out_put_css_set_refs;
}
@@ -2036,7 +2036,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct 
task_struct *tsk,
 */
for (i = 0; i < group_size; i++) {
tc = flex_array_get(group, i);
-   cgroup_task_migrate(tc->cgrp, tc->task, tc->cg);
+   cgroup_task_migrate(tc->cgrp, tc->task, tc->cset);
}
/* nothing is sensitive to fork() after this point. */
 
@@ -2056,9 +2056,9 @@ out_put_css_set_refs:
if (retval) {
for (i = 0; i < group_size; i++) {
tc = flex_array_get(group, i);
-   if (!tc->cg)
+   if (!tc->cset)
break;
-   put_css_set(tc->cg);
+   put_css_set(tc->cset);
}
}
 out_cancel_attach:
@@ -2168,9 +2168,9 @@ int cgroup_attach_task_all(struct task_struct *from, 
struct task_struct *tsk)
 
mutex_lock(_mutex);
for_each_active_root(root) {
-   struct cgroup *from_cg = task_cgroup_from_root(from, root);
+   struct cgroup *from_cgrp = task_cgroup_from_root(from, root);
 
-   retval = cgroup_attach_task(from_cg, tsk, false);
+   retval = cgroup_attach_task(from_cgrp, tsk, false);
if (retval)
break;
}
@@ -3275,8 +3275,8 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
 * guarantees forward progress and that we don't miss any tasks.
 */
heap->size = 0;
-   cgroup_iter_start(scan->cg, );
-   while ((p = cgroup_iter_next(scan->cg, ))) {
+   cgroup_iter_start(scan->cgrp, );
+   while ((p = cgroup_iter_next(scan->cgrp, ))) {
/*
 * Only affect tasks that qualify per the caller's callback,
 * if he provided one
@@ -3309,7 +3309,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
 * the heap and wasn't inserted
 */
}
-   cgroup_iter_end(scan->cg, );
+   cgroup_iter_end(scan->cgrp, );
 
if (heap->size) {
for (i = 0; i < heap->size; i++) {
@@ -3355,7 +3355,7 @@ int cgroup_tr

[PATCH 3/7] cgroup: restructure the failure path in cgroup_write_event_control()

2013-07-31 Thread Li Zefan
It uses a single label and checks the validity of each pointer. This
is err-prone, and actually we once had a bug because one of the check
was insufficient.

Use multi lables as we do in other places.

Signed-off-by: Li Zefan 
---
 kernel/cgroup.c | 33 +++--
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 916a699..f0d7f5d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3945,31 +3945,31 @@ static int cgroup_write_event_control(struct cgroup 
*cgrp, struct cftype *cft,
efile = eventfd_fget(efd);
if (IS_ERR(efile)) {
ret = PTR_ERR(efile);
-   goto fail;
+   goto out_kfree;
}
 
event->eventfd = eventfd_ctx_fileget(efile);
if (IS_ERR(event->eventfd)) {
ret = PTR_ERR(event->eventfd);
-   goto fail;
+   goto out_put_efile;
}
 
cfile = fget(cfd);
if (!cfile) {
ret = -EBADF;
-   goto fail;
+   goto out_put_eventfd;
}
 
/* the process need read permission on control file */
/* AV: shouldn't we check that it's been opened for read instead? */
ret = inode_permission(file_inode(cfile), MAY_READ);
if (ret < 0)
-   goto fail;
+   goto out_put_cfile;
 
event->cft = __file_cft(cfile);
if (IS_ERR(event->cft)) {
ret = PTR_ERR(event->cft);
-   goto fail;
+   goto out_put_cfile;
}
 
/*
@@ -3979,18 +3979,18 @@ static int cgroup_write_event_control(struct cgroup 
*cgrp, struct cftype *cft,
cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent);
if (cgrp_cfile != cgrp) {
ret = -EINVAL;
-   goto fail;
+   goto out_put_cfile;
}
 
if (!event->cft->register_event || !event->cft->unregister_event) {
ret = -EINVAL;
-   goto fail;
+   goto out_put_cfile;
}
 
ret = event->cft->register_event(cgrp, event->cft,
event->eventfd, buffer);
if (ret)
-   goto fail;
+   goto out_put_cfile;
 
efile->f_op->poll(efile, >pt);
 
@@ -4010,16 +4010,13 @@ static int cgroup_write_event_control(struct cgroup 
*cgrp, struct cftype *cft,
 
return 0;
 
-fail:
-   if (cfile)
-   fput(cfile);
-
-   if (event && event->eventfd && !IS_ERR(event->eventfd))
-   eventfd_ctx_put(event->eventfd);
-
-   if (!IS_ERR_OR_NULL(efile))
-   fput(efile);
-
+out_put_cfile:
+   fput(cfile);
+out_put_eventfd:
+   eventfd_ctx_put(event->eventfd);
+out_put_efile:
+   fput(efile);
+out_kfree:
kfree(event);
 
return ret;
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/7] cgroup: rename cgroup_pidlist->mutex

2013-07-31 Thread Li Zefan
It's a semaphore not a mutex.

Signed-off-by: Li Zefan 
---
 kernel/cgroup.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f0d7f5d..0d378b1 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3417,7 +3417,7 @@ struct cgroup_pidlist {
/* pointer to the cgroup we belong to, for list removal purposes */
struct cgroup *owner;
/* protects the other fields */
-   struct rw_semaphore mutex;
+   struct rw_semaphore sem;
 };
 
 /*
@@ -3490,7 +3490,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct 
cgroup *cgrp,
struct pid_namespace *ns = task_active_pid_ns(current);
 
/*
-* We can't drop the pidlist_mutex before taking the l->mutex in case
+* We can't drop the pidlist_mutex before taking the l->sem in case
 * the last ref-holder is trying to remove l from the list at the same
 * time. Holding the pidlist_mutex precludes somebody taking whichever
 * list we find out from under us - compare release_pid_array().
@@ -3499,7 +3499,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct 
cgroup *cgrp,
list_for_each_entry(l, >pidlists, links) {
if (l->key.type == type && l->key.ns == ns) {
/* make sure l doesn't vanish out from under us */
-   down_write(>mutex);
+   down_write(>sem);
mutex_unlock(>pidlist_mutex);
return l;
}
@@ -3510,8 +3510,8 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct 
cgroup *cgrp,
mutex_unlock(>pidlist_mutex);
return l;
}
-   init_rwsem(>mutex);
-   down_write(>mutex);
+   init_rwsem(>sem);
+   down_write(>sem);
l->key.type = type;
l->key.ns = get_pid_ns(ns);
l->owner = cgrp;
@@ -3572,7 +3572,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum 
cgroup_filetype type,
l->list = array;
l->length = length;
l->use_count++;
-   up_write(>mutex);
+   up_write(>sem);
*lp = l;
return 0;
 }
@@ -3650,7 +3650,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, 
loff_t *pos)
int index = 0, pid = *pos;
int *iter;
 
-   down_read(>mutex);
+   down_read(>sem);
if (pid) {
int end = l->length;
 
@@ -3677,7 +3677,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, 
loff_t *pos)
 static void cgroup_pidlist_stop(struct seq_file *s, void *v)
 {
struct cgroup_pidlist *l = s->private;
-   up_read(>mutex);
+   up_read(>sem);
 }
 
 static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
@@ -3723,7 +3723,7 @@ static void cgroup_release_pid_array(struct 
cgroup_pidlist *l)
 * pidlist_mutex, we have to take pidlist_mutex first.
 */
mutex_lock(>owner->pidlist_mutex);
-   down_write(>mutex);
+   down_write(>sem);
BUG_ON(!l->use_count);
if (!--l->use_count) {
/* we're the last user if refcount is 0; remove and free */
@@ -3731,12 +3731,12 @@ static void cgroup_release_pid_array(struct 
cgroup_pidlist *l)
mutex_unlock(>owner->pidlist_mutex);
pidlist_free(l->list);
put_pid_ns(l->key.ns);
-   up_write(>mutex);
+   up_write(>sem);
kfree(l);
return;
}
mutex_unlock(>owner->pidlist_mutex);
-   up_write(>mutex);
+   up_write(>sem);
 }
 
 static int cgroup_pidlist_release(struct inode *inode, struct file *file)
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/7] cgroup: remove sparse tags from offline_css()

2013-07-31 Thread Li Zefan
This should have been removed in commit d7eeac1913ff
("cgroup: hold cgroup_mutex before calling css_offline").

While at it, update the comments.

Signed-off-by: Li Zefan 
---
 kernel/cgroup.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e60eba2..916a699 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4164,7 +4164,7 @@ static void init_cgroup_css(struct cgroup_subsys_state 
*css,
INIT_WORK(>dput_work, css_dput_fn);
 }
 
-/* invoke ->post_create() on a new CSS and mark it online if successful */
+/* invoke ->css_online() on a new CSS and mark it online if successful */
 static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
 {
int ret = 0;
@@ -4178,9 +4178,8 @@ static int online_css(struct cgroup_subsys *ss, struct 
cgroup *cgrp)
return ret;
 }
 
-/* if the CSS is online, invoke ->pre_destory() on it and mark it offline */
+/* if the CSS is online, invoke ->css_offline() on it and mark it offline */
 static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
-   __releases(_mutex) __acquires(_mutex)
 {
struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
 
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/7] cgroup: fix a leak when percpu_ref_init() fails

2013-07-31 Thread Li Zefan
ss->css_free() is not called when perfcpu_ref_init() fails.

Signed-off-by: Li Zefan 
---
 kernel/cgroup.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a64b7b8..e60eba2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4274,8 +4274,10 @@ static long cgroup_create(struct cgroup *parent, struct 
dentry *dentry,
}
 
err = percpu_ref_init(>refcnt, css_release);
-   if (err)
+   if (err) {
+   ss->css_free(cgrp);
goto err_free_all;
+   }
 
init_cgroup_css(css, ss, cgrp);
}
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] Staging: rtl8192u/ieee80211: add missing single_release()

2013-07-31 Thread Li Zefan
The debug file is opened with single_open(), but there's no
single_release().

Signed-off-by: Li Zefan 
---
 drivers/staging/rtl8192u/ieee80211/ieee80211_module.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c 
b/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c
index e0870c0..434c431 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c
@@ -268,7 +268,8 @@ static const struct file_operations fops = {
.open = open_debug_level,
.read = seq_read,
.llseek = seq_lseek,
-   .write = write_debug_level
+   .write = write_debug_level,
+   .release = single_release,
 };
 
 int __init ieee80211_debug_init(void)
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] Staging: rtl8192e: add missing single_release()

2013-07-31 Thread Li Zefan
The debug file is opened with single_open(), but there's no
single_release().

Signed-off-by: Li Zefan 
---
 drivers/staging/rtl8192e/rtllib_module.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8192e/rtllib_module.c 
b/drivers/staging/rtl8192e/rtllib_module.c
index 84ea721..51d46e0 100644
--- a/drivers/staging/rtl8192e/rtllib_module.c
+++ b/drivers/staging/rtl8192e/rtllib_module.c
@@ -233,7 +233,8 @@ static const struct file_operations fops = {
.open = open_debug_level,
.read = seq_read,
.llseek = seq_lseek,
-   .write = write_debug_level
+   .write = write_debug_level,
+   .release = single_release,
 };
 
 int __init rtllib_init(void)
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] Staging: rtl8192e: add missing single_release()

2013-07-31 Thread Li Zefan
The debug file is opened with single_open(), but there's no
single_release().

Signed-off-by: Li Zefan lize...@huawei.com
---
 drivers/staging/rtl8192e/rtllib_module.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8192e/rtllib_module.c 
b/drivers/staging/rtl8192e/rtllib_module.c
index 84ea721..51d46e0 100644
--- a/drivers/staging/rtl8192e/rtllib_module.c
+++ b/drivers/staging/rtl8192e/rtllib_module.c
@@ -233,7 +233,8 @@ static const struct file_operations fops = {
.open = open_debug_level,
.read = seq_read,
.llseek = seq_lseek,
-   .write = write_debug_level
+   .write = write_debug_level,
+   .release = single_release,
 };
 
 int __init rtllib_init(void)
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] Staging: rtl8192u/ieee80211: add missing single_release()

2013-07-31 Thread Li Zefan
The debug file is opened with single_open(), but there's no
single_release().

Signed-off-by: Li Zefan lize...@huawei.com
---
 drivers/staging/rtl8192u/ieee80211/ieee80211_module.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c 
b/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c
index e0870c0..434c431 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c
@@ -268,7 +268,8 @@ static const struct file_operations fops = {
.open = open_debug_level,
.read = seq_read,
.llseek = seq_lseek,
-   .write = write_debug_level
+   .write = write_debug_level,
+   .release = single_release,
 };
 
 int __init ieee80211_debug_init(void)
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/7] cgroup: remove sparse tags from offline_css()

2013-07-31 Thread Li Zefan
This should have been removed in commit d7eeac1913ff
(cgroup: hold cgroup_mutex before calling css_offline).

While at it, update the comments.

Signed-off-by: Li Zefan lize...@huawei.com
---
 kernel/cgroup.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e60eba2..916a699 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4164,7 +4164,7 @@ static void init_cgroup_css(struct cgroup_subsys_state 
*css,
INIT_WORK(css-dput_work, css_dput_fn);
 }
 
-/* invoke -post_create() on a new CSS and mark it online if successful */
+/* invoke -css_online() on a new CSS and mark it online if successful */
 static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
 {
int ret = 0;
@@ -4178,9 +4178,8 @@ static int online_css(struct cgroup_subsys *ss, struct 
cgroup *cgrp)
return ret;
 }
 
-/* if the CSS is online, invoke -pre_destory() on it and mark it offline */
+/* if the CSS is online, invoke -css_offline() on it and mark it offline */
 static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
-   __releases(cgroup_mutex) __acquires(cgroup_mutex)
 {
struct cgroup_subsys_state *css = cgrp-subsys[ss-subsys_id];
 
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/7] cgroup: fix a leak when percpu_ref_init() fails

2013-07-31 Thread Li Zefan
ss-css_free() is not called when perfcpu_ref_init() fails.

Signed-off-by: Li Zefan lize...@huawei.com
---
 kernel/cgroup.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a64b7b8..e60eba2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4274,8 +4274,10 @@ static long cgroup_create(struct cgroup *parent, struct 
dentry *dentry,
}
 
err = percpu_ref_init(css-refcnt, css_release);
-   if (err)
+   if (err) {
+   ss-css_free(cgrp);
goto err_free_all;
+   }
 
init_cgroup_css(css, ss, cgrp);
}
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/7] cgroup: rename cgroup_pidlist-mutex

2013-07-31 Thread Li Zefan
It's a semaphore not a mutex.

Signed-off-by: Li Zefan lize...@huawei.com
---
 kernel/cgroup.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f0d7f5d..0d378b1 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3417,7 +3417,7 @@ struct cgroup_pidlist {
/* pointer to the cgroup we belong to, for list removal purposes */
struct cgroup *owner;
/* protects the other fields */
-   struct rw_semaphore mutex;
+   struct rw_semaphore sem;
 };
 
 /*
@@ -3490,7 +3490,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct 
cgroup *cgrp,
struct pid_namespace *ns = task_active_pid_ns(current);
 
/*
-* We can't drop the pidlist_mutex before taking the l-mutex in case
+* We can't drop the pidlist_mutex before taking the l-sem in case
 * the last ref-holder is trying to remove l from the list at the same
 * time. Holding the pidlist_mutex precludes somebody taking whichever
 * list we find out from under us - compare release_pid_array().
@@ -3499,7 +3499,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct 
cgroup *cgrp,
list_for_each_entry(l, cgrp-pidlists, links) {
if (l-key.type == type  l-key.ns == ns) {
/* make sure l doesn't vanish out from under us */
-   down_write(l-mutex);
+   down_write(l-sem);
mutex_unlock(cgrp-pidlist_mutex);
return l;
}
@@ -3510,8 +3510,8 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct 
cgroup *cgrp,
mutex_unlock(cgrp-pidlist_mutex);
return l;
}
-   init_rwsem(l-mutex);
-   down_write(l-mutex);
+   init_rwsem(l-sem);
+   down_write(l-sem);
l-key.type = type;
l-key.ns = get_pid_ns(ns);
l-owner = cgrp;
@@ -3572,7 +3572,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum 
cgroup_filetype type,
l-list = array;
l-length = length;
l-use_count++;
-   up_write(l-mutex);
+   up_write(l-sem);
*lp = l;
return 0;
 }
@@ -3650,7 +3650,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, 
loff_t *pos)
int index = 0, pid = *pos;
int *iter;
 
-   down_read(l-mutex);
+   down_read(l-sem);
if (pid) {
int end = l-length;
 
@@ -3677,7 +3677,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, 
loff_t *pos)
 static void cgroup_pidlist_stop(struct seq_file *s, void *v)
 {
struct cgroup_pidlist *l = s-private;
-   up_read(l-mutex);
+   up_read(l-sem);
 }
 
 static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
@@ -3723,7 +3723,7 @@ static void cgroup_release_pid_array(struct 
cgroup_pidlist *l)
 * pidlist_mutex, we have to take pidlist_mutex first.
 */
mutex_lock(l-owner-pidlist_mutex);
-   down_write(l-mutex);
+   down_write(l-sem);
BUG_ON(!l-use_count);
if (!--l-use_count) {
/* we're the last user if refcount is 0; remove and free */
@@ -3731,12 +3731,12 @@ static void cgroup_release_pid_array(struct 
cgroup_pidlist *l)
mutex_unlock(l-owner-pidlist_mutex);
pidlist_free(l-list);
put_pid_ns(l-key.ns);
-   up_write(l-mutex);
+   up_write(l-sem);
kfree(l);
return;
}
mutex_unlock(l-owner-pidlist_mutex);
-   up_write(l-mutex);
+   up_write(l-sem);
 }
 
 static int cgroup_pidlist_release(struct inode *inode, struct file *file)
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/7] cgroup: restructure the failure path in cgroup_write_event_control()

2013-07-31 Thread Li Zefan
It uses a single label and checks the validity of each pointer. This
is err-prone, and actually we once had a bug because one of the check
was insufficient.

Use multi lables as we do in other places.

Signed-off-by: Li Zefan lize...@huawei.com
---
 kernel/cgroup.c | 33 +++--
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 916a699..f0d7f5d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3945,31 +3945,31 @@ static int cgroup_write_event_control(struct cgroup 
*cgrp, struct cftype *cft,
efile = eventfd_fget(efd);
if (IS_ERR(efile)) {
ret = PTR_ERR(efile);
-   goto fail;
+   goto out_kfree;
}
 
event-eventfd = eventfd_ctx_fileget(efile);
if (IS_ERR(event-eventfd)) {
ret = PTR_ERR(event-eventfd);
-   goto fail;
+   goto out_put_efile;
}
 
cfile = fget(cfd);
if (!cfile) {
ret = -EBADF;
-   goto fail;
+   goto out_put_eventfd;
}
 
/* the process need read permission on control file */
/* AV: shouldn't we check that it's been opened for read instead? */
ret = inode_permission(file_inode(cfile), MAY_READ);
if (ret  0)
-   goto fail;
+   goto out_put_cfile;
 
event-cft = __file_cft(cfile);
if (IS_ERR(event-cft)) {
ret = PTR_ERR(event-cft);
-   goto fail;
+   goto out_put_cfile;
}
 
/*
@@ -3979,18 +3979,18 @@ static int cgroup_write_event_control(struct cgroup 
*cgrp, struct cftype *cft,
cgrp_cfile = __d_cgrp(cfile-f_dentry-d_parent);
if (cgrp_cfile != cgrp) {
ret = -EINVAL;
-   goto fail;
+   goto out_put_cfile;
}
 
if (!event-cft-register_event || !event-cft-unregister_event) {
ret = -EINVAL;
-   goto fail;
+   goto out_put_cfile;
}
 
ret = event-cft-register_event(cgrp, event-cft,
event-eventfd, buffer);
if (ret)
-   goto fail;
+   goto out_put_cfile;
 
efile-f_op-poll(efile, event-pt);
 
@@ -4010,16 +4010,13 @@ static int cgroup_write_event_control(struct cgroup 
*cgrp, struct cftype *cft,
 
return 0;
 
-fail:
-   if (cfile)
-   fput(cfile);
-
-   if (event  event-eventfd  !IS_ERR(event-eventfd))
-   eventfd_ctx_put(event-eventfd);
-
-   if (!IS_ERR_OR_NULL(efile))
-   fput(efile);
-
+out_put_cfile:
+   fput(cfile);
+out_put_eventfd:
+   eventfd_ctx_put(event-eventfd);
+out_put_efile:
+   fput(efile);
+out_kfree:
kfree(event);
 
return ret;
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/7] some pending cgroup patches

2013-07-31 Thread Li Zefan
The first one is a small bug fix, and the rest are pure cleanups.

Li Zefan (7):
  cgroup: fix a leak when percpu_ref_init() fails
  cgroup: remove sparse tags from offline_css()
  cgroup: restructure the failure path in cgroup_write_event_control()
  cgroup: rename cgroup_pidlist-mutex
  cgroup: remove struct cgroup_seqfile_state
  cgroup: rename current_css_set_cg_links
  cgroup: more naming cleanups
--
 include/linux/cgroup.h |   6 +--
 kernel/cgroup.c| 138 
---
 kernel/cpuset.c|  16 
 3 files changed, 72 insertions(+), 88 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 6/7] cgroup: rename current_css_set_cg_links

2013-07-31 Thread Li Zefan
As we've renamed css_set-cg_links to css_set-cgrp_links (See commit
69d0206c79 cgroup: bring some sanity to naming around cg_cgroup_link),
it's better to also rename the debug file.

As the debug cgroup subsystem is for debug only and acts as a sample
on how to write a cgroup subsystem, it's fine to change the interface.

Signed-off-by: Li Zefan lize...@huawei.com
---
 kernel/cgroup.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ac77d87..89d63b0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5312,9 +5312,9 @@ static u64 current_css_set_refcount_read(struct cgroup 
*cgrp,
return count;
 }
 
-static int current_css_set_cg_links_read(struct cgroup *cgrp,
-struct cftype *cft,
-struct seq_file *seq)
+static int current_css_set_cgrp_links_read(struct cgroup *cgrp,
+  struct cftype *cft,
+  struct seq_file *seq)
 {
struct cgrp_cset_link *link;
struct css_set *cset;
@@ -5387,8 +5387,8 @@ static struct cftype debug_files[] =  {
},
 
{
-   .name = current_css_set_cg_links,
-   .read_seq_string = current_css_set_cg_links_read,
+   .name = current_css_set_cgrp_links,
+   .read_seq_string = current_css_set_cgrp_links_read,
},
 
{
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 5/7] cgroup: remove struct cgroup_seqfile_state

2013-07-31 Thread Li Zefan
We can use struct struct cfent instead.

Signed-off-by: Li Zefan lize...@huawei.com
---
 kernel/cgroup.c | 38 --
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0d378b1..ac77d87 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2362,11 +2362,6 @@ static ssize_t cgroup_file_read(struct file *file, char 
__user *buf,
  * supports string-u64 maps, but can be extended in future.
  */
 
-struct cgroup_seqfile_state {
-   struct cftype *cft;
-   struct cgroup *cgroup;
-};
-
 static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
 {
struct seq_file *sf = cb-state;
@@ -2375,22 +2370,22 @@ static int cgroup_map_add(struct cgroup_map_cb *cb, 
const char *key, u64 value)
 
 static int cgroup_seqfile_show(struct seq_file *m, void *arg)
 {
-   struct cgroup_seqfile_state *state = m-private;
-   struct cftype *cft = state-cft;
+   struct cfent *cfe = m-private;
+   struct cftype *cft = cfe-type;
+   struct cgroup *cgrp = __d_cgrp(cfe-dentry-d_parent);
+
if (cft-read_map) {
struct cgroup_map_cb cb = {
.fill = cgroup_map_add,
.state = m,
};
-   return cft-read_map(state-cgroup, cft, cb);
+   return cft-read_map(cgrp, cft, cb);
}
-   return cft-read_seq_string(state-cgroup, cft, m);
+   return cft-read_seq_string(cgrp, cft, m);
 }
 
 static int cgroup_seqfile_release(struct inode *inode, struct file *file)
 {
-   struct seq_file *seq = file-private_data;
-   kfree(seq-private);
return single_release(inode, file);
 }
 
@@ -2404,30 +2399,21 @@ static const struct file_operations 
cgroup_seqfile_operations = {
 static int cgroup_file_open(struct inode *inode, struct file *file)
 {
int err;
+   struct cfent *cfe;
struct cftype *cft;
 
err = generic_file_open(inode, file);
if (err)
return err;
-   cft = __d_cft(file-f_dentry);
+   cfe = __d_cfe(file-f_dentry);
+   cft = cfe-type;
 
if (cft-read_map || cft-read_seq_string) {
-   struct cgroup_seqfile_state *state;
-
-   state = kzalloc(sizeof(*state), GFP_USER);
-   if (!state)
-   return -ENOMEM;
-
-   state-cft = cft;
-   state-cgroup = __d_cgrp(file-f_dentry-d_parent);
file-f_op = cgroup_seqfile_operations;
-   err = single_open(file, cgroup_seqfile_show, state);
-   if (err  0)
-   kfree(state);
-   } else if (cft-open)
+   err = single_open(file, cgroup_seqfile_show, cfe);
+   } else if (cft-open) {
err = cft-open(inode, file);
-   else
-   err = 0;
+   }
 
return err;
 }
-- 
1.8.0.2

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 7/7] cgroup: more naming cleanups

2013-07-31 Thread Li Zefan
Constantly use @cset for css_set varabbles and use @cgrp as cgroup
variables.

Signed-off-by: Li Zefan lize...@huawei.com
---
 include/linux/cgroup.h |  6 +++---
 kernel/cgroup.c| 26 +-
 kernel/cpuset.c| 16 
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c07e175..ee048ba 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -400,8 +400,8 @@ struct cgroup_map_cb {
 
 /* cftype-flags */
 enum {
-   CFTYPE_ONLY_ON_ROOT = (1  0), /* only create on root cg */
-   CFTYPE_NOT_ON_ROOT  = (1  1), /* don't create on root cg */
+   CFTYPE_ONLY_ON_ROOT = (1  0), /* only create on root cgrp */
+   CFTYPE_NOT_ON_ROOT  = (1  1), /* don't create on root cgrp */
CFTYPE_INSANE   = (1  2), /* don't create if 
sane_behavior */
 };
 
@@ -519,7 +519,7 @@ struct cftype_set {
 };
 
 struct cgroup_scanner {
-   struct cgroup *cg;
+   struct cgroup *cgrp;
int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan);
void (*process_task)(struct task_struct *p,
struct cgroup_scanner *scan);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 89d63b0..3729f43 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -431,7 +431,7 @@ static inline void put_css_set_taskexit(struct css_set 
*cset)
  * @new_cgrp: cgroup that's being entered by the task
  * @template: desired set of css pointers in css_set (pre-calculated)
  *
- * Returns true if cg matches old_cg except for the hierarchy
+ * Returns true if cset matches old_cset except for the hierarchy
  * which new_cgrp belongs to, for which it should match new_cgrp.
  */
 static bool compare_css_sets(struct css_set *cset,
@@ -1804,7 +1804,7 @@ EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy);
 struct task_and_cgroup {
struct task_struct  *task;
struct cgroup   *cgrp;
-   struct css_set  *cg;
+   struct css_set  *cset;
 };
 
 struct cgroup_taskset {
@@ -2022,8 +2022,8 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct 
task_struct *tsk,
 
tc = flex_array_get(group, i);
old_cset = task_css_set(tc-task);
-   tc-cg = find_css_set(old_cset, cgrp);
-   if (!tc-cg) {
+   tc-cset = find_css_set(old_cset, cgrp);
+   if (!tc-cset) {
retval = -ENOMEM;
goto out_put_css_set_refs;
}
@@ -2036,7 +2036,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct 
task_struct *tsk,
 */
for (i = 0; i  group_size; i++) {
tc = flex_array_get(group, i);
-   cgroup_task_migrate(tc-cgrp, tc-task, tc-cg);
+   cgroup_task_migrate(tc-cgrp, tc-task, tc-cset);
}
/* nothing is sensitive to fork() after this point. */
 
@@ -2056,9 +2056,9 @@ out_put_css_set_refs:
if (retval) {
for (i = 0; i  group_size; i++) {
tc = flex_array_get(group, i);
-   if (!tc-cg)
+   if (!tc-cset)
break;
-   put_css_set(tc-cg);
+   put_css_set(tc-cset);
}
}
 out_cancel_attach:
@@ -2168,9 +2168,9 @@ int cgroup_attach_task_all(struct task_struct *from, 
struct task_struct *tsk)
 
mutex_lock(cgroup_mutex);
for_each_active_root(root) {
-   struct cgroup *from_cg = task_cgroup_from_root(from, root);
+   struct cgroup *from_cgrp = task_cgroup_from_root(from, root);
 
-   retval = cgroup_attach_task(from_cg, tsk, false);
+   retval = cgroup_attach_task(from_cgrp, tsk, false);
if (retval)
break;
}
@@ -3275,8 +3275,8 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
 * guarantees forward progress and that we don't miss any tasks.
 */
heap-size = 0;
-   cgroup_iter_start(scan-cg, it);
-   while ((p = cgroup_iter_next(scan-cg, it))) {
+   cgroup_iter_start(scan-cgrp, it);
+   while ((p = cgroup_iter_next(scan-cgrp, it))) {
/*
 * Only affect tasks that qualify per the caller's callback,
 * if he provided one
@@ -3309,7 +3309,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
 * the heap and wasn't inserted
 */
}
-   cgroup_iter_end(scan-cg, it);
+   cgroup_iter_end(scan-cgrp, it);
 
if (heap-size) {
for (i = 0; i  heap-size; i++) {
@@ -3355,7 +3355,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct 
cgroup *from)
 {
struct cgroup_scanner scan;
 
-   scan.cg = from;
+   scan.cgrp = from;
scan.test_task = NULL; /* select all tasks

Re: [PATCH 5/7] cgroup: remove struct cgroup_seqfile_state

2013-07-31 Thread Li Zefan
  static int cgroup_seqfile_release(struct inode *inode, struct file *file)
  {
 - struct seq_file *seq = file-private_data;
 - kfree(seq-private);
   return single_release(inode, file);
  }

Oh, I just realized now we can remove cgroup_seqfile_release().

Will send out an updated patch...

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 5/5] cgroup: remove struct cgroup_seqfile_state

2013-07-31 Thread Li Zefan
We can use struct cfent instead.

v2:
- remove cgroup_seqfile_release().

Signed-off-by: Li Zefan lize...@huawei.com
---
 kernel/cgroup.c | 45 +
 1 file changed, 13 insertions(+), 32 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0d378b1..c836dff 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2362,11 +2362,6 @@ static ssize_t cgroup_file_read(struct file *file, char 
__user *buf,
  * supports string-u64 maps, but can be extended in future.
  */
 
-struct cgroup_seqfile_state {
-   struct cftype *cft;
-   struct cgroup *cgroup;
-};
-
 static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
 {
struct seq_file *sf = cb-state;
@@ -2375,59 +2370,45 @@ static int cgroup_map_add(struct cgroup_map_cb *cb, 
const char *key, u64 value)
 
 static int cgroup_seqfile_show(struct seq_file *m, void *arg)
 {
-   struct cgroup_seqfile_state *state = m-private;
-   struct cftype *cft = state-cft;
+   struct cfent *cfe = m-private;
+   struct cftype *cft = cfe-type;
+   struct cgroup *cgrp = __d_cgrp(cfe-dentry-d_parent);
+
if (cft-read_map) {
struct cgroup_map_cb cb = {
.fill = cgroup_map_add,
.state = m,
};
-   return cft-read_map(state-cgroup, cft, cb);
+   return cft-read_map(cgrp, cft, cb);
}
-   return cft-read_seq_string(state-cgroup, cft, m);
-}
-
-static int cgroup_seqfile_release(struct inode *inode, struct file *file)
-{
-   struct seq_file *seq = file-private_data;
-   kfree(seq-private);
-   return single_release(inode, file);
+   return cft-read_seq_string(cgrp, cft, m);
 }
 
 static const struct file_operations cgroup_seqfile_operations = {
.read = seq_read,
.write = cgroup_file_write,
.llseek = seq_lseek,
-   .release = cgroup_seqfile_release,
+   .release = single_release,
 };
 
 static int cgroup_file_open(struct inode *inode, struct file *file)
 {
int err;
+   struct cfent *cfe;
struct cftype *cft;
 
err = generic_file_open(inode, file);
if (err)
return err;
-   cft = __d_cft(file-f_dentry);
+   cfe = __d_cfe(file-f_dentry);
+   cft = cfe-type;
 
if (cft-read_map || cft-read_seq_string) {
-   struct cgroup_seqfile_state *state;
-
-   state = kzalloc(sizeof(*state), GFP_USER);
-   if (!state)
-   return -ENOMEM;
-
-   state-cft = cft;
-   state-cgroup = __d_cgrp(file-f_dentry-d_parent);
file-f_op = cgroup_seqfile_operations;
-   err = single_open(file, cgroup_seqfile_show, state);
-   if (err  0)
-   kfree(state);
-   } else if (cft-open)
+   err = single_open(file, cgroup_seqfile_show, cfe);
+   } else if (cft-open) {
err = cft-open(inode, file);
-   else
-   err = 0;
+   }
 
return err;
 }
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 5/5] cgroup: remove struct cgroup_seqfile_state

2013-07-31 Thread Li Zefan
On 2013/7/31 19:36, Tejun Heo wrote:
 On Wed, Jul 31, 2013 at 05:36:25PM +0800, Li Zefan wrote:
 We can use struct cfent instead.

 v2:
 - remove cgroup_seqfile_release().

 Signed-off-by: Li Zefan lize...@huawei.com
 
 Applied to libata/for-3.12.
 

libata? I hope you didn't. :)

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 6/7] cgroup: rename current_css_set_cg_links

2013-07-31 Thread Li Zefan
On 2013/7/31 19:38, Tejun Heo wrote:
 Hello,
 
 On Wed, Jul 31, 2013 at 04:18:07PM +0800, Li Zefan wrote:
 As we've renamed css_set-cg_links to css_set-cgrp_links (See commit
 69d0206c79 cgroup: bring some sanity to naming around cg_cgroup_link),
 it's better to also rename the debug file.

 As the debug cgroup subsystem is for debug only and acts as a sample
 on how to write a cgroup subsystem, it's fine to change the interface.
 
 I'd rather not do this.  It doesn't really buy us anything and stuff
 like cgrp_links isn't interesting to actual controllers anyway.
 

not a big deal

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/7] cgroup: remove struct cgroup_seqfile_state

2013-07-31 Thread Li Zefan
On 2013/7/31 19:35, Tejun Heo wrote:
 On Wed, Jul 31, 2013 at 05:27:39PM +0800, Li Zefan wrote:
  static int cgroup_seqfile_release(struct inode *inode, struct file *file)
  {
 -   struct seq_file *seq = file-private_data;
 -   kfree(seq-private);
 return single_release(inode, file);
  }

 Oh, I just realized now we can remove cgroup_seqfile_release().

 Will send out an updated patch...
 
 Applied the updated patch.  Can you please post updated patches as
 replies to the original path from next time?  That makes things easier
 to track on my side.
 

Sure.

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 4/7] cgroup: rename cgroup_pidlist-mutex

2013-07-31 Thread Li Zefan
It's a rw_semaphore not a mutex.

Signed-off-by: Li Zefan lize...@huawei.com
---
 kernel/cgroup.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ae905a5..6662811 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3436,7 +3436,7 @@ struct cgroup_pidlist {
/* pointer to the cgroup we belong to, for list removal purposes */
struct cgroup *owner;
/* protects the other fields */
-   struct rw_semaphore mutex;
+   struct rw_semaphore rwsem;
 };
 
 /*
@@ -3509,7 +3509,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct 
cgroup *cgrp,
struct pid_namespace *ns = task_active_pid_ns(current);
 
/*
-* We can't drop the pidlist_mutex before taking the l-mutex in case
+* We can't drop the pidlist_mutex before taking the l-rwsem in case
 * the last ref-holder is trying to remove l from the list at the same
 * time. Holding the pidlist_mutex precludes somebody taking whichever
 * list we find out from under us - compare release_pid_array().
@@ -3518,7 +3518,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct 
cgroup *cgrp,
list_for_each_entry(l, cgrp-pidlists, links) {
if (l-key.type == type  l-key.ns == ns) {
/* make sure l doesn't vanish out from under us */
-   down_write(l-mutex);
+   down_write(l-rwsem);
mutex_unlock(cgrp-pidlist_mutex);
return l;
}
@@ -3529,8 +3529,8 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct 
cgroup *cgrp,
mutex_unlock(cgrp-pidlist_mutex);
return l;
}
-   init_rwsem(l-mutex);
-   down_write(l-mutex);
+   init_rwsem(l-rwsem);
+   down_write(l-rwsem);
l-key.type = type;
l-key.ns = get_pid_ns(ns);
l-owner = cgrp;
@@ -3591,7 +3591,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum 
cgroup_filetype type,
l-list = array;
l-length = length;
l-use_count++;
-   up_write(l-mutex);
+   up_write(l-rwsem);
*lp = l;
return 0;
 }
@@ -3669,7 +3669,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, 
loff_t *pos)
int index = 0, pid = *pos;
int *iter;
 
-   down_read(l-mutex);
+   down_read(l-rwsem);
if (pid) {
int end = l-length;
 
@@ -3696,7 +3696,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, 
loff_t *pos)
 static void cgroup_pidlist_stop(struct seq_file *s, void *v)
 {
struct cgroup_pidlist *l = s-private;
-   up_read(l-mutex);
+   up_read(l-rwsem);
 }
 
 static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
@@ -3742,7 +3742,7 @@ static void cgroup_release_pid_array(struct 
cgroup_pidlist *l)
 * pidlist_mutex, we have to take pidlist_mutex first.
 */
mutex_lock(l-owner-pidlist_mutex);
-   down_write(l-mutex);
+   down_write(l-rwsem);
BUG_ON(!l-use_count);
if (!--l-use_count) {
/* we're the last user if refcount is 0; remove and free */
@@ -3750,12 +3750,12 @@ static void cgroup_release_pid_array(struct 
cgroup_pidlist *l)
mutex_unlock(l-owner-pidlist_mutex);
pidlist_free(l-list);
put_pid_ns(l-key.ns);
-   up_write(l-mutex);
+   up_write(l-rwsem);
kfree(l);
return;
}
mutex_unlock(l-owner-pidlist_mutex);
-   up_write(l-mutex);
+   up_write(l-rwsem);
 }
 
 static int cgroup_pidlist_release(struct inode *inode, struct file *file)
-- 
1.8.0.2

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 3/7] cgroup: restructure the failure path in cgroup_write_event_control()

2013-07-31 Thread Li Zefan
It uses a single label and checks the validity of each pointer. This
is err-prone, and actually we had a bug because one of the check was
insufficient.

Use multi lables as we do in other places.

v2:
- drop initializations of local variables.

Signed-off-by: Li Zefan lize...@huawei.com
---
 kernel/cgroup.c | 39 ++-
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index fb11569..ae905a5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3934,11 +3934,11 @@ static void cgroup_event_ptable_queue_proc(struct file 
*file,
 static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
  const char *buffer)
 {
-   struct cgroup_event *event = NULL;
+   struct cgroup_event *event;
struct cgroup *cgrp_cfile;
unsigned int efd, cfd;
-   struct file *efile = NULL;
-   struct file *cfile = NULL;
+   struct file *efile;
+   struct file *cfile;
char *endp;
int ret;
 
@@ -3964,31 +3964,31 @@ static int cgroup_write_event_control(struct cgroup 
*cgrp, struct cftype *cft,
efile = eventfd_fget(efd);
if (IS_ERR(efile)) {
ret = PTR_ERR(efile);
-   goto fail;
+   goto out_kfree;
}
 
event-eventfd = eventfd_ctx_fileget(efile);
if (IS_ERR(event-eventfd)) {
ret = PTR_ERR(event-eventfd);
-   goto fail;
+   goto out_put_efile;
}
 
cfile = fget(cfd);
if (!cfile) {
ret = -EBADF;
-   goto fail;
+   goto out_put_eventfd;
}
 
/* the process need read permission on control file */
/* AV: shouldn't we check that it's been opened for read instead? */
ret = inode_permission(file_inode(cfile), MAY_READ);
if (ret  0)
-   goto fail;
+   goto out_put_cfile;
 
event-cft = __file_cft(cfile);
if (IS_ERR(event-cft)) {
ret = PTR_ERR(event-cft);
-   goto fail;
+   goto out_put_cfile;
}
 
/*
@@ -3998,18 +3998,18 @@ static int cgroup_write_event_control(struct cgroup 
*cgrp, struct cftype *cft,
cgrp_cfile = __d_cgrp(cfile-f_dentry-d_parent);
if (cgrp_cfile != cgrp) {
ret = -EINVAL;
-   goto fail;
+   goto out_put_cfile;
}
 
if (!event-cft-register_event || !event-cft-unregister_event) {
ret = -EINVAL;
-   goto fail;
+   goto out_put_cfile;
}
 
ret = event-cft-register_event(cgrp, event-cft,
event-eventfd, buffer);
if (ret)
-   goto fail;
+   goto out_put_cfile;
 
efile-f_op-poll(efile, event-pt);
 
@@ -4029,16 +4029,13 @@ static int cgroup_write_event_control(struct cgroup 
*cgrp, struct cftype *cft,
 
return 0;
 
-fail:
-   if (cfile)
-   fput(cfile);
-
-   if (event  event-eventfd  !IS_ERR(event-eventfd))
-   eventfd_ctx_put(event-eventfd);
-
-   if (!IS_ERR_OR_NULL(efile))
-   fput(efile);
-
+out_put_cfile:
+   fput(cfile);
+out_put_eventfd:
+   eventfd_ctx_put(event-eventfd);
+out_put_efile:
+   fput(efile);
+out_kfree:
kfree(event);
 
return ret;
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 5/8] memcg: convert to use cgroup id

2013-07-30 Thread Li Zefan
Use cgroup id instead of css id. This is a preparation to kill css id.

Note, as memcg treat 0 as an invalid id, while cgroup id starts with 0,
we define memcg_id == cgroup_id + 1.

Signed-off-by: Li Zefan 
Acked-by: Michal Hocko 
---
 mm/memcontrol.c | 34 --
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 626c426..62541f5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -512,6 +512,25 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup 
*memcg)
return (memcg == root_mem_cgroup);
 }
 
+static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
+{
+   /*
+* The ID of the root cgroup is 0, but memcg treat 0 as an
+* invalid ID, so we return (cgroup_id + 1).
+*/
+   return memcg->css.cgroup->id + 1;
+}
+
+static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
+{
+   struct cgroup *cgrp;
+
+   cgrp = cgroup_from_id(_cgroup_subsys, id - 1);
+   if (!cgrp)
+   return NULL;
+   return mem_cgroup_from_cont(cgrp);
+}
+
 /* Writing them here to avoid exposing memcg's inner layout */
 #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
 
@@ -2821,15 +2840,10 @@ static void __mem_cgroup_cancel_local_charge(struct 
mem_cgroup *memcg,
  */
 static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
 {
-   struct cgroup_subsys_state *css;
-
/* ID 0 is unused ID */
if (!id)
return NULL;
-   css = css_lookup(_cgroup_subsys, id);
-   if (!css)
-   return NULL;
-   return mem_cgroup_from_css(css);
+   return mem_cgroup_from_id(id);
 }
 
 struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
@@ -4328,7 +4342,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, 
swp_entry_t ent, bool swapout)
 * css_get() was called in uncharge().
 */
if (do_swap_account && swapout && memcg)
-   swap_cgroup_record(ent, css_id(>css));
+   swap_cgroup_record(ent, mem_cgroup_id(memcg));
 }
 #endif
 
@@ -4380,8 +4394,8 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
 {
unsigned short old_id, new_id;
 
-   old_id = css_id(>css);
-   new_id = css_id(>css);
+   old_id = mem_cgroup_id(from);
+   new_id = mem_cgroup_id(to);
 
if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
mem_cgroup_swap_statistics(from, false);
@@ -6542,7 +6556,7 @@ static enum mc_target_type get_mctgt_type(struct 
vm_area_struct *vma,
}
/* There is a swap entry and a page doesn't exist or isn't charged */
if (ent.val && !ret &&
-   css_id(>css) == lookup_swap_cgroup_id(ent)) {
+   mem_cgroup_id(mc.from) == lookup_swap_cgroup_id(ent)) {
ret = MC_TARGET_SWAP;
if (target)
target->ent = ent;
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 8/8] cgroup: kill css_id

2013-07-30 Thread Li Zefan
The only user of css_id was memcg, and it has been convered to use
cgroup->id, so kill css_id.

Signed-off-by: Li Zefan 
Reviewed-by: Michal Hocko 
---
 include/linux/cgroup.h |  37 
 kernel/cgroup.c| 252 +
 2 files changed, 1 insertion(+), 288 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 4ef8452..c07e175 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -597,11 +597,6 @@ struct cgroup_subsys {
int subsys_id;
int disabled;
int early_init;
-   /*
-* True if this subsys uses ID. ID is not available before cgroup_init()
-* (not available in early_init time.)
-*/
-   bool use_id;
 
/*
 * If %false, this subsystem is properly hierarchical -
@@ -627,9 +622,6 @@ struct cgroup_subsys {
 */
struct cgroupfs_root *root;
struct list_head sibling;
-   /* used when use_id == true */
-   struct idr idr;
-   spinlock_t id_lock;
 
/* list of cftype_sets */
struct list_head cftsets;
@@ -874,35 +866,6 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
 
-/*
- * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
- * if cgroup_subsys.use_id == true. It can be used for looking up and scanning.
- * CSS ID is assigned at cgroup allocation (create) automatically
- * and removed when subsys calls free_css_id() function. This is because
- * the lifetime of cgroup_subsys_state is subsys's matter.
- *
- * Looking up and scanning function should be called under rcu_read_lock().
- * Taking cgroup_mutex is not necessary for following calls.
- * But the css returned by this routine can be "not populated yet" or "being
- * destroyed". The caller should check css and cgroup's status.
- */
-
-/*
- * Typically Called at ->destroy(), or somewhere the subsys frees
- * cgroup_subsys_state.
- */
-void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css);
-
-/* Find a cgroup_subsys_state which has given ID */
-
-struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id);
-
-/* Returns true if root is ancestor of cg */
-bool css_is_ancestor(struct cgroup_subsys_state *cg,
-const struct cgroup_subsys_state *root);
-
-/* Get id and depth of css */
-unsigned short css_id(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
 
 #else /* !CONFIG_CGROUPS */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a26e083..a64b7b8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -123,38 +123,6 @@ struct cfent {
 };
 
 /*
- * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
- * cgroup_subsys->use_id != 0.
- */
-#define CSS_ID_MAX (65535)
-struct css_id {
-   /*
-* The css to which this ID points. This pointer is set to valid value
-* after cgroup is populated. If cgroup is removed, this will be NULL.
-* This pointer is expected to be RCU-safe because destroy()
-* is called after synchronize_rcu(). But for safe use, css_tryget()
-* should be used for avoiding race.
-*/
-   struct cgroup_subsys_state __rcu *css;
-   /*
-* ID of this css.
-*/
-   unsigned short id;
-   /*
-* Depth in hierarchy which this ID belongs to.
-*/
-   unsigned short depth;
-   /*
-* ID is freed by RCU. (and lookup routine is RCU safe.)
-*/
-   struct rcu_head rcu_head;
-   /*
-* Hierarchy of CSS ID belongs to.
-*/
-   unsigned short stack[0]; /* Array of Length (depth+1) */
-};
-
-/*
  * cgroup_event represents events which userspace want to receive.
  */
 struct cgroup_event {
@@ -364,9 +332,6 @@ struct cgrp_cset_link {
 static struct css_set init_css_set;
 static struct cgrp_cset_link init_cgrp_cset_link;
 
-static int cgroup_init_idr(struct cgroup_subsys *ss,
-  struct cgroup_subsys_state *css);
-
 /* css_set_lock protects the list of css_set objects, and the
  * chain of tasks off each css_set.  Nests outside task->alloc_lock
  * due to cgroup_iter_start() */
@@ -815,9 +780,6 @@ static struct backing_dev_info cgroup_backing_dev_info = {
.capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
 
-static int alloc_css_id(struct cgroup_subsys *ss,
-   struct cgroup *parent, struct cgroup *child);
-
 static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
 {
struct inode *inode = new_inode(sb);
@@ -4160,20 +4122,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp, 
unsigned long subsys_mask)
}
}
 
-   /* This cgroup is ready now */
-   for

[PATCH v4 3/8] cgroup: implement cgroup_from_id()

2013-07-30 Thread Li Zefan
This will be used as a replacement for css_lookup().

There's a difference with cgroup id and css id. cgroup id starts with 0,
while css id starts with 1.

v4:
- also check if cggroup_mutex is held.
- make it an inline function.

Signed-off-by: Li Zefan 
Reviewed-by: Michal Hocko 
---
 include/linux/cgroup.h | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 8c107e9..4ef8452 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -720,6 +720,24 @@ static inline struct cgroup* task_cgroup(struct 
task_struct *task,
return task_subsys_state(task, subsys_id)->cgroup;
 }
 
+/**
+ * cgroup_from_id - lookup cgroup by id
+ * @ss: cgroup subsys to be looked into
+ * @id: the cgroup id
+ *
+ * Returns the cgroup if there's valid one with @id, otherwise returns NULL.
+ * Should be called under rcu_read_lock().
+ */
+static inline struct cgroup *cgroup_from_id(struct cgroup_subsys *ss, int id)
+{
+#ifdef CONFIG_PROVE_RCU
+   rcu_lockdep_assert(rcu_read_lock_held() ||
+  lockdep_is_held(_mutex),
+  "cgroup_from_id() needs proper protection");
+#endif
+   return idr_find(>root->cgroup_idr, id);
+}
+
 struct cgroup *cgroup_next_sibling(struct cgroup *pos);
 
 /**
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 6/8] memcg: fail to create cgroup if the cgroup id is too big

2013-07-30 Thread Li Zefan
memcg requires the cgroup id to be smaller than 65536.

This is a preparation to kill css id.

Signed-off-by: Li Zefan 
Acked-by: Michal Hocko 
---
 mm/memcontrol.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 62541f5..ad7d7c9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -512,6 +512,12 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup 
*memcg)
return (memcg == root_mem_cgroup);
 }
 
+/*
+ * We restrict the id in the range of [1, 65535], so it can fit into
+ * an unsigned short.
+ */
+#define MEM_CGROUP_ID_MAX  USHRT_MAX
+
 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
/*
@@ -6248,6 +6254,9 @@ mem_cgroup_css_alloc(struct cgroup *cont)
long error = -ENOMEM;
int node;
 
+   if (cont->id > MEM_CGROUP_ID_MAX)
+   return ERR_PTR(-ENOSPC);
+
memcg = mem_cgroup_alloc();
if (!memcg)
return ERR_PTR(error);
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 7/8] memcg: stop using css id

2013-07-30 Thread Li Zefan
Now memcg uses cgroup id instead of css id. Update some comments and
set mem_cgroup_subsys->use_id to 0.

Signed-off-by: Li Zefan 
Acked-by: Michal Hocko 
---
 mm/memcontrol.c | 23 ---
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ad7d7c9..6b77ab6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -608,16 +608,11 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
 #ifdef CONFIG_MEMCG_KMEM
 /*
  * This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
- * There are two main reasons for not using the css_id for this:
- *  1) this works better in sparse environments, where we have a lot of memcgs,
- * but only a few kmem-limited. Or also, if we have, for instance, 200
- * memcgs, and none but the 200th is kmem-limited, we'd have to have a
- * 200 entry array for that.
- *
- *  2) In order not to violate the cgroup API, we would like to do all memory
- * allocation in ->create(). At that point, we haven't yet allocated the
- * css_id. Having a separate index prevents us from messing with the cgroup
- * core for this
+ * The main reason for not using cgroup id for this:
+ *  this works better in sparse environments, where we have a lot of memcgs,
+ *  but only a few kmem-limited. Or also, if we have, for instance, 200
+ *  memcgs, and none but the 200th is kmem-limited, we'd have to have a
+ *  200 entry array for that.
  *
  * The current size of the caches array is stored in
  * memcg_limited_groups_array_size.  It will double each time we have to
@@ -632,14 +627,14 @@ int memcg_limited_groups_array_size;
  * cgroups is a reasonable guess. In the future, it could be a parameter or
  * tunable, but that is strictly not necessary.
  *
- * MAX_SIZE should be as large as the number of css_ids. Ideally, we could get
+ * MAX_SIZE should be as large as the number of cgrp_ids. Ideally, we could get
  * this constant directly from cgroup, but it is understandable that this is
  * better kept as an internal representation in cgroup.c. In any case, the
- * css_id space is not getting any smaller, and we don't have to necessarily
+ * cgrp_id space is not getting any smaller, and we don't have to necessarily
  * increase ours as well if it increases.
  */
 #define MEMCG_CACHES_MIN_SIZE 4
-#define MEMCG_CACHES_MAX_SIZE 65535
+#define MEMCG_CACHES_MAX_SIZE MEM_CGROUP_ID_MAX
 
 /*
  * A lot of the calls to the cache allocation functions are expected to be
@@ -6188,7 +6183,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
size_t size = memcg_size();
 
mem_cgroup_remove_from_trees(memcg);
-   free_css_id(_cgroup_subsys, >css);
 
for_each_node(node)
free_mem_cgroup_per_zone_info(memcg, node);
@@ -6985,7 +6979,6 @@ struct cgroup_subsys mem_cgroup_subsys = {
.bind = mem_cgroup_bind,
.base_cftypes = mem_cgroup_files,
.early_init = 0,
-   .use_id = 1,
 };
 
 #ifdef CONFIG_MEMCG_SWAP
-- 
1.8.0.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 2/8] cgroup: document how cgroup IDs are assigned

2013-07-30 Thread Li Zefan
As cgroup id has been used in netprio cgroup and will be used in memcg,
it's important to make it clear how a cgroup id is allocated.

For example, in netprio cgroup, the id is used as index of anarray.

Signed-off-by: Li Zefan 
Reviewed-by: Michal Hocko 
---
 include/linux/cgroup.h | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2bd052d..8c107e9 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -161,7 +161,13 @@ struct cgroup_name {
 struct cgroup {
unsigned long flags;/* "unsigned long" so bitops work */
 
-   int id; /* idr allocated in-hierarchy ID */
+   /*
+* idr allocated in-hierarchy ID.
+*
+* The ID of the root cgroup is always 0, and a new cgroup
+* will be assigned with a smallest available ID.
+*/
+   int id;
 
/*
 * We link our 'sibling' struct into our parent's 'children'.
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 4/8] memcg: convert to use cgroup_is_descendant()

2013-07-30 Thread Li Zefan
This is a preparation to kill css_id.

Signed-off-by: Li Zefan 
Acked-by: Michal Hocko 
---
 mm/memcontrol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d12ca6f..626c426 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1434,7 +1434,7 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup 
*root_memcg,
return true;
if (!root_memcg->use_hierarchy || !memcg)
return false;
-   return css_is_ancestor(>css, _memcg->css);
+   return cgroup_is_descendant(memcg->css.cgroup, root_memcg->css.cgroup);
 }
 
 static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
-- 
1.8.0.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 1/8] cgroup: convert cgroup_ida to cgroup_idr

2013-07-30 Thread Li Zefan
This enables us to lookup a cgroup by its id.

v4:
- add a comment for idr_remove() in cgroup_offline_fn().

v3:
- on success, idr_alloc() returns the id but not 0, so fix the BUG_ON()
  in cgroup_init().
- pass the right value to idr_alloc() so that the id for dummy cgroup is 0.

Signed-off-by: Li Zefan 
Reviewed-by: Michal Hocko 
---
 include/linux/cgroup.h |  4 ++--
 kernel/cgroup.c| 33 +++--
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 297462b..2bd052d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -161,7 +161,7 @@ struct cgroup_name {
 struct cgroup {
unsigned long flags;/* "unsigned long" so bitops work */
 
-   int id; /* ida allocated in-hierarchy ID */
+   int id; /* idr allocated in-hierarchy ID */
 
/*
 * We link our 'sibling' struct into our parent's 'children'.
@@ -322,7 +322,7 @@ struct cgroupfs_root {
unsigned long flags;
 
/* IDs for cgroups in this hierarchy */
-   struct ida cgroup_ida;
+   struct idr cgroup_idr;
 
/* The path to use for release notifications. */
char release_agent_path[PATH_MAX];
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 345fac8..a26e083 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -866,8 +866,6 @@ static void cgroup_free_fn(struct work_struct *work)
 */
dput(cgrp->parent->dentry);
 
-   ida_simple_remove(>root->cgroup_ida, cgrp->id);
-
/*
 * Drop the active superblock reference that we took when we
 * created the cgroup. This will free cgrp->root, if we are
@@ -1379,6 +1377,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
cgrp->root = root;
RCU_INIT_POINTER(cgrp->name, _cgroup_name);
init_cgroup_housekeeping(cgrp);
+   idr_init(>cgroup_idr);
 }
 
 static int cgroup_init_root_id(struct cgroupfs_root *root, int start, int end)
@@ -1451,7 +1450,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct 
cgroup_sb_opts *opts)
 */
root->subsys_mask = opts->subsys_mask;
root->flags = opts->flags;
-   ida_init(>cgroup_ida);
if (opts->release_agent)
strcpy(root->release_agent_path, opts->release_agent);
if (opts->name)
@@ -1467,7 +1465,7 @@ static void cgroup_free_root(struct cgroupfs_root *root)
/* hierarhcy ID shoulid already have been released */
WARN_ON_ONCE(root->hierarchy_id);
 
-   ida_destroy(>cgroup_ida);
+   idr_destroy(>cgroup_idr);
kfree(root);
}
 }
@@ -1582,6 +1580,11 @@ static struct dentry *cgroup_mount(struct 
file_system_type *fs_type,
mutex_lock(_mutex);
mutex_lock(_root_mutex);
 
+   root_cgrp->id = idr_alloc(>cgroup_idr, root_cgrp,
+  0, 1, GFP_KERNEL);
+   if (root_cgrp->id < 0)
+   goto unlock_drop;
+
/* Check for name clashes with existing mounts */
ret = -EBUSY;
if (strlen(root->name))
@@ -4273,7 +4276,11 @@ static long cgroup_create(struct cgroup *parent, struct 
dentry *dentry,
goto err_free_cgrp;
rcu_assign_pointer(cgrp->name, name);
 
-   cgrp->id = ida_simple_get(>cgroup_ida, 1, 0, GFP_KERNEL);
+   /*
+* Temporarily set the pointer to NULL, so idr_find() won't return
+* a half-baked cgroup.
+*/
+   cgrp->id = idr_alloc(>cgroup_idr, NULL, 1, 0, GFP_KERNEL);
if (cgrp->id < 0)
goto err_free_name;
 
@@ -4371,6 +4378,8 @@ static long cgroup_create(struct cgroup *parent, struct 
dentry *dentry,
}
}
 
+   idr_replace(>cgroup_idr, cgrp, cgrp->id);
+
err = cgroup_addrm_files(cgrp, NULL, cgroup_base_files, true);
if (err)
goto err_destroy;
@@ -4397,7 +4406,7 @@ err_free_all:
/* Release the reference count that we took on the superblock */
deactivate_super(sb);
 err_free_id:
-   ida_simple_remove(>cgroup_ida, cgrp->id);
+   idr_remove(>cgroup_idr, cgrp->id);
 err_free_name:
kfree(rcu_dereference_raw(cgrp->name));
 err_free_cgrp:
@@ -4590,6 +4599,14 @@ static void cgroup_offline_fn(struct work_struct *work)
/* delete this cgroup from parent->children */
list_del_rcu(>sibling);
 
+   /*
+* We should remove the cgroup object from idr before its grace
+* period starts, so we won't be looking up a cgroup while the
+* cgroup is being freed.
+*/
+   idr_remove(>root->cgroup_idr, cgrp->id);
+   cgrp->id = -1;
+
dput(d);

[PATCH v4 0/8] memcg, cgroup: kill css_id

2013-07-30 Thread Li Zefan
This patchset converts memcg to use cgroup->id, and then we can remove
cgroup css_id.

As we've removed memcg's own refcnt, converting memcg to use cgroup->id
is very straight-forward.

The patchset is based on Tejun's cgroup tree.

v4:
- make cgroup_from_id() inline and check if cgroup_mutex is held.
- add a comment for idr_remove() in cgroup_offline)fn().

v2->v3:
- some minor cleanups suggested by Michal.
- fixed the call to idr_alloc() in cgroup_init() in the first patch.

Li Zefan (8):
  cgroup: convert cgroup_ida to cgroup_idr
  cgroup: document how cgroup IDs are assigned
  cgroup: implement cgroup_from_id()
  memcg: convert to use cgroup_is_descendant()
  memcg: convert to use cgroup id
  memcg: fail to create cgroup if the cgroup id is too big
  memcg: stop using css id
  cgroup: kill css_id
--
 include/linux/cgroup.h |  65 ++
 kernel/cgroup.c| 285 
++-
 mm/memcontrol.c|  68 --
 3 files changed, 96 insertions(+), 322 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: call_usermodehelper() returns -513 when ocfs2 umounting filesystems

2013-07-30 Thread Li Zefan
On 2013/7/30 16:51, Xue jiufei wrote:
> On 2013/7/30 15:30, Xue jiufei wrote:
>> Hi, 
>> We have encountered an error when umounting ocfs2 filesystems.
>> Function ocfs2_leave_group() calls call_usermodehelper() to stop
>> heartbeat thread, but it returns -513(ERESTARTNOINTR) in one test.
>> And after that error, every times umounting the filesystem, it
>> returns the same error.
>> And at the same time, there's another kworker thread which is
>> sending messages to other nodes always return return errno
>> -512(ERESTARTSYS). So I think these two threads may have
>> pending signals remain.
>> This error can not reproduced any longer. Has any ideas?

You should have made the question more clear, and that is:

How is it possible that a workqueue kernel thread has pending signals?

>>
>> The log is as follows:
>> [58463.684504] (kworker/u:1,17332,0):o2net_send_tcp_msg:1332 ERROR: sendmsg 
>> returned -512 instead of 104
>> [58915.213299] ocfs2: Error -513 running user helper "/usr/sbin/ocfs2_hb_ctl 
>> -K -u A5FD0ED8733D4F9C98D23B326AD7DE10"
>> [59406.443615] ocfs2: Error -513 running user helper "/usr/sbin/ocfs2_hb_ctl 
>> -K -u A5FD0ED8733D4F9C98D23B326AD7DE10"
>>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: call_usermodehelper() returns -513 when ocfs2 umounting filesystems

2013-07-30 Thread Li Zefan
On 2013/7/30 16:51, Xue jiufei wrote:
 On 2013/7/30 15:30, Xue jiufei wrote:
 Hi, 
 We have encountered an error when umounting ocfs2 filesystems.
 Function ocfs2_leave_group() calls call_usermodehelper() to stop
 heartbeat thread, but it returns -513(ERESTARTNOINTR) in one test.
 And after that error, every times umounting the filesystem, it
 returns the same error.
 And at the same time, there's another kworker thread which is
 sending messages to other nodes always return return errno
 -512(ERESTARTSYS). So I think these two threads may have
 pending signals remain.
 This error can not reproduced any longer. Has any ideas?

You should have made the question more clear, and that is:

How is it possible that a workqueue kernel thread has pending signals?


 The log is as follows:
 [58463.684504] (kworker/u:1,17332,0):o2net_send_tcp_msg:1332 ERROR: sendmsg 
 returned -512 instead of 104
 [58915.213299] ocfs2: Error -513 running user helper /usr/sbin/ocfs2_hb_ctl 
 -K -u A5FD0ED8733D4F9C98D23B326AD7DE10
 [59406.443615] ocfs2: Error -513 running user helper /usr/sbin/ocfs2_hb_ctl 
 -K -u A5FD0ED8733D4F9C98D23B326AD7DE10


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 1/8] cgroup: convert cgroup_ida to cgroup_idr

2013-07-30 Thread Li Zefan
This enables us to lookup a cgroup by its id.

v4:
- add a comment for idr_remove() in cgroup_offline_fn().

v3:
- on success, idr_alloc() returns the id but not 0, so fix the BUG_ON()
  in cgroup_init().
- pass the right value to idr_alloc() so that the id for dummy cgroup is 0.

Signed-off-by: Li Zefan lize...@huawei.com
Reviewed-by: Michal Hocko mho...@suse.cz
---
 include/linux/cgroup.h |  4 ++--
 kernel/cgroup.c| 33 +++--
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 297462b..2bd052d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -161,7 +161,7 @@ struct cgroup_name {
 struct cgroup {
unsigned long flags;/* unsigned long so bitops work */
 
-   int id; /* ida allocated in-hierarchy ID */
+   int id; /* idr allocated in-hierarchy ID */
 
/*
 * We link our 'sibling' struct into our parent's 'children'.
@@ -322,7 +322,7 @@ struct cgroupfs_root {
unsigned long flags;
 
/* IDs for cgroups in this hierarchy */
-   struct ida cgroup_ida;
+   struct idr cgroup_idr;
 
/* The path to use for release notifications. */
char release_agent_path[PATH_MAX];
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 345fac8..a26e083 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -866,8 +866,6 @@ static void cgroup_free_fn(struct work_struct *work)
 */
dput(cgrp-parent-dentry);
 
-   ida_simple_remove(cgrp-root-cgroup_ida, cgrp-id);
-
/*
 * Drop the active superblock reference that we took when we
 * created the cgroup. This will free cgrp-root, if we are
@@ -1379,6 +1377,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
cgrp-root = root;
RCU_INIT_POINTER(cgrp-name, root_cgroup_name);
init_cgroup_housekeeping(cgrp);
+   idr_init(root-cgroup_idr);
 }
 
 static int cgroup_init_root_id(struct cgroupfs_root *root, int start, int end)
@@ -1451,7 +1450,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct 
cgroup_sb_opts *opts)
 */
root-subsys_mask = opts-subsys_mask;
root-flags = opts-flags;
-   ida_init(root-cgroup_ida);
if (opts-release_agent)
strcpy(root-release_agent_path, opts-release_agent);
if (opts-name)
@@ -1467,7 +1465,7 @@ static void cgroup_free_root(struct cgroupfs_root *root)
/* hierarhcy ID shoulid already have been released */
WARN_ON_ONCE(root-hierarchy_id);
 
-   ida_destroy(root-cgroup_ida);
+   idr_destroy(root-cgroup_idr);
kfree(root);
}
 }
@@ -1582,6 +1580,11 @@ static struct dentry *cgroup_mount(struct 
file_system_type *fs_type,
mutex_lock(cgroup_mutex);
mutex_lock(cgroup_root_mutex);
 
+   root_cgrp-id = idr_alloc(root-cgroup_idr, root_cgrp,
+  0, 1, GFP_KERNEL);
+   if (root_cgrp-id  0)
+   goto unlock_drop;
+
/* Check for name clashes with existing mounts */
ret = -EBUSY;
if (strlen(root-name))
@@ -4273,7 +4276,11 @@ static long cgroup_create(struct cgroup *parent, struct 
dentry *dentry,
goto err_free_cgrp;
rcu_assign_pointer(cgrp-name, name);
 
-   cgrp-id = ida_simple_get(root-cgroup_ida, 1, 0, GFP_KERNEL);
+   /*
+* Temporarily set the pointer to NULL, so idr_find() won't return
+* a half-baked cgroup.
+*/
+   cgrp-id = idr_alloc(root-cgroup_idr, NULL, 1, 0, GFP_KERNEL);
if (cgrp-id  0)
goto err_free_name;
 
@@ -4371,6 +4378,8 @@ static long cgroup_create(struct cgroup *parent, struct 
dentry *dentry,
}
}
 
+   idr_replace(root-cgroup_idr, cgrp, cgrp-id);
+
err = cgroup_addrm_files(cgrp, NULL, cgroup_base_files, true);
if (err)
goto err_destroy;
@@ -4397,7 +4406,7 @@ err_free_all:
/* Release the reference count that we took on the superblock */
deactivate_super(sb);
 err_free_id:
-   ida_simple_remove(root-cgroup_ida, cgrp-id);
+   idr_remove(root-cgroup_idr, cgrp-id);
 err_free_name:
kfree(rcu_dereference_raw(cgrp-name));
 err_free_cgrp:
@@ -4590,6 +4599,14 @@ static void cgroup_offline_fn(struct work_struct *work)
/* delete this cgroup from parent-children */
list_del_rcu(cgrp-sibling);
 
+   /*
+* We should remove the cgroup object from idr before its grace
+* period starts, so we won't be looking up a cgroup while the
+* cgroup is being freed.
+*/
+   idr_remove(cgrp-root-cgroup_idr, cgrp-id);
+   cgrp-id = -1;
+
dput(d);
 
set_bit(CGRP_RELEASABLE, parent-flags);
@@ -4915,6 +4932,10 @@ int

[PATCH v4 0/8] memcg, cgroup: kill css_id

2013-07-30 Thread Li Zefan
This patchset converts memcg to use cgroup-id, and then we can remove
cgroup css_id.

As we've removed memcg's own refcnt, converting memcg to use cgroup-id
is very straight-forward.

The patchset is based on Tejun's cgroup tree.

v4:
- make cgroup_from_id() inline and check if cgroup_mutex is held.
- add a comment for idr_remove() in cgroup_offline)fn().

v2-v3:
- some minor cleanups suggested by Michal.
- fixed the call to idr_alloc() in cgroup_init() in the first patch.

Li Zefan (8):
  cgroup: convert cgroup_ida to cgroup_idr
  cgroup: document how cgroup IDs are assigned
  cgroup: implement cgroup_from_id()
  memcg: convert to use cgroup_is_descendant()
  memcg: convert to use cgroup id
  memcg: fail to create cgroup if the cgroup id is too big
  memcg: stop using css id
  cgroup: kill css_id
--
 include/linux/cgroup.h |  65 ++
 kernel/cgroup.c| 285 
++-
 mm/memcontrol.c|  68 --
 3 files changed, 96 insertions(+), 322 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 4/8] memcg: convert to use cgroup_is_descendant()

2013-07-30 Thread Li Zefan
This is a preparation to kill css_id.

Signed-off-by: Li Zefan lize...@huawei.com
Acked-by: Michal Hocko mho...@suse.cz
---
 mm/memcontrol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d12ca6f..626c426 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1434,7 +1434,7 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup 
*root_memcg,
return true;
if (!root_memcg-use_hierarchy || !memcg)
return false;
-   return css_is_ancestor(memcg-css, root_memcg-css);
+   return cgroup_is_descendant(memcg-css.cgroup, root_memcg-css.cgroup);
 }
 
 static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
-- 
1.8.0.2

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 2/8] cgroup: document how cgroup IDs are assigned

2013-07-30 Thread Li Zefan
As cgroup id has been used in netprio cgroup and will be used in memcg,
it's important to make it clear how a cgroup id is allocated.

For example, in netprio cgroup, the id is used as index of anarray.

Signed-off-by: Li Zefan lize...@huwei.com
Reviewed-by: Michal Hocko mho...@suse.cz
---
 include/linux/cgroup.h | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2bd052d..8c107e9 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -161,7 +161,13 @@ struct cgroup_name {
 struct cgroup {
unsigned long flags;/* unsigned long so bitops work */
 
-   int id; /* idr allocated in-hierarchy ID */
+   /*
+* idr allocated in-hierarchy ID.
+*
+* The ID of the root cgroup is always 0, and a new cgroup
+* will be assigned with a smallest available ID.
+*/
+   int id;
 
/*
 * We link our 'sibling' struct into our parent's 'children'.
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 6/8] memcg: fail to create cgroup if the cgroup id is too big

2013-07-30 Thread Li Zefan
memcg requires the cgroup id to be smaller than 65536.

This is a preparation to kill css id.

Signed-off-by: Li Zefan lize...@huawei.com
Acked-by: Michal Hocko mho...@suse.cz
---
 mm/memcontrol.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 62541f5..ad7d7c9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -512,6 +512,12 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup 
*memcg)
return (memcg == root_mem_cgroup);
 }
 
+/*
+ * We restrict the id in the range of [1, 65535], so it can fit into
+ * an unsigned short.
+ */
+#define MEM_CGROUP_ID_MAX  USHRT_MAX
+
 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
/*
@@ -6248,6 +6254,9 @@ mem_cgroup_css_alloc(struct cgroup *cont)
long error = -ENOMEM;
int node;
 
+   if (cont-id  MEM_CGROUP_ID_MAX)
+   return ERR_PTR(-ENOSPC);
+
memcg = mem_cgroup_alloc();
if (!memcg)
return ERR_PTR(error);
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 7/8] memcg: stop using css id

2013-07-30 Thread Li Zefan
Now memcg uses cgroup id instead of css id. Update some comments and
set mem_cgroup_subsys-use_id to 0.

Signed-off-by: Li Zefan lize...@huawei.com
Acked-by: Michal Hocko mho...@suse.cz
---
 mm/memcontrol.c | 23 ---
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ad7d7c9..6b77ab6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -608,16 +608,11 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
 #ifdef CONFIG_MEMCG_KMEM
 /*
  * This will be the memcg's index in each cache's -memcg_params-memcg_caches.
- * There are two main reasons for not using the css_id for this:
- *  1) this works better in sparse environments, where we have a lot of memcgs,
- * but only a few kmem-limited. Or also, if we have, for instance, 200
- * memcgs, and none but the 200th is kmem-limited, we'd have to have a
- * 200 entry array for that.
- *
- *  2) In order not to violate the cgroup API, we would like to do all memory
- * allocation in -create(). At that point, we haven't yet allocated the
- * css_id. Having a separate index prevents us from messing with the cgroup
- * core for this
+ * The main reason for not using cgroup id for this:
+ *  this works better in sparse environments, where we have a lot of memcgs,
+ *  but only a few kmem-limited. Or also, if we have, for instance, 200
+ *  memcgs, and none but the 200th is kmem-limited, we'd have to have a
+ *  200 entry array for that.
  *
  * The current size of the caches array is stored in
  * memcg_limited_groups_array_size.  It will double each time we have to
@@ -632,14 +627,14 @@ int memcg_limited_groups_array_size;
  * cgroups is a reasonable guess. In the future, it could be a parameter or
  * tunable, but that is strictly not necessary.
  *
- * MAX_SIZE should be as large as the number of css_ids. Ideally, we could get
+ * MAX_SIZE should be as large as the number of cgrp_ids. Ideally, we could get
  * this constant directly from cgroup, but it is understandable that this is
  * better kept as an internal representation in cgroup.c. In any case, the
- * css_id space is not getting any smaller, and we don't have to necessarily
+ * cgrp_id space is not getting any smaller, and we don't have to necessarily
  * increase ours as well if it increases.
  */
 #define MEMCG_CACHES_MIN_SIZE 4
-#define MEMCG_CACHES_MAX_SIZE 65535
+#define MEMCG_CACHES_MAX_SIZE MEM_CGROUP_ID_MAX
 
 /*
  * A lot of the calls to the cache allocation functions are expected to be
@@ -6188,7 +6183,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
size_t size = memcg_size();
 
mem_cgroup_remove_from_trees(memcg);
-   free_css_id(mem_cgroup_subsys, memcg-css);
 
for_each_node(node)
free_mem_cgroup_per_zone_info(memcg, node);
@@ -6985,7 +6979,6 @@ struct cgroup_subsys mem_cgroup_subsys = {
.bind = mem_cgroup_bind,
.base_cftypes = mem_cgroup_files,
.early_init = 0,
-   .use_id = 1,
 };
 
 #ifdef CONFIG_MEMCG_SWAP
-- 
1.8.0.2

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 3/8] cgroup: implement cgroup_from_id()

2013-07-30 Thread Li Zefan
This will be used as a replacement for css_lookup().

There's a difference with cgroup id and css id. cgroup id starts with 0,
while css id starts with 1.

v4:
- also check if cggroup_mutex is held.
- make it an inline function.

Signed-off-by: Li Zefan lize...@huawei.com
Reviewed-by: Michal Hocko mho...@suse.cz
---
 include/linux/cgroup.h | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 8c107e9..4ef8452 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -720,6 +720,24 @@ static inline struct cgroup* task_cgroup(struct 
task_struct *task,
return task_subsys_state(task, subsys_id)-cgroup;
 }
 
+/**
+ * cgroup_from_id - lookup cgroup by id
+ * @ss: cgroup subsys to be looked into
+ * @id: the cgroup id
+ *
+ * Returns the cgroup if there's valid one with @id, otherwise returns NULL.
+ * Should be called under rcu_read_lock().
+ */
+static inline struct cgroup *cgroup_from_id(struct cgroup_subsys *ss, int id)
+{
+#ifdef CONFIG_PROVE_RCU
+   rcu_lockdep_assert(rcu_read_lock_held() ||
+  lockdep_is_held(cgroup_mutex),
+  cgroup_from_id() needs proper protection);
+#endif
+   return idr_find(ss-root-cgroup_idr, id);
+}
+
 struct cgroup *cgroup_next_sibling(struct cgroup *pos);
 
 /**
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 8/8] cgroup: kill css_id

2013-07-30 Thread Li Zefan
The only user of css_id was memcg, and it has been convered to use
cgroup-id, so kill css_id.

Signed-off-by: Li Zefan lize...@huwei.com
Reviewed-by: Michal Hocko mho...@suse.cz
---
 include/linux/cgroup.h |  37 
 kernel/cgroup.c| 252 +
 2 files changed, 1 insertion(+), 288 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 4ef8452..c07e175 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -597,11 +597,6 @@ struct cgroup_subsys {
int subsys_id;
int disabled;
int early_init;
-   /*
-* True if this subsys uses ID. ID is not available before cgroup_init()
-* (not available in early_init time.)
-*/
-   bool use_id;
 
/*
 * If %false, this subsystem is properly hierarchical -
@@ -627,9 +622,6 @@ struct cgroup_subsys {
 */
struct cgroupfs_root *root;
struct list_head sibling;
-   /* used when use_id == true */
-   struct idr idr;
-   spinlock_t id_lock;
 
/* list of cftype_sets */
struct list_head cftsets;
@@ -874,35 +866,6 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
 
-/*
- * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
- * if cgroup_subsys.use_id == true. It can be used for looking up and scanning.
- * CSS ID is assigned at cgroup allocation (create) automatically
- * and removed when subsys calls free_css_id() function. This is because
- * the lifetime of cgroup_subsys_state is subsys's matter.
- *
- * Looking up and scanning function should be called under rcu_read_lock().
- * Taking cgroup_mutex is not necessary for following calls.
- * But the css returned by this routine can be not populated yet or being
- * destroyed. The caller should check css and cgroup's status.
- */
-
-/*
- * Typically Called at -destroy(), or somewhere the subsys frees
- * cgroup_subsys_state.
- */
-void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css);
-
-/* Find a cgroup_subsys_state which has given ID */
-
-struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id);
-
-/* Returns true if root is ancestor of cg */
-bool css_is_ancestor(struct cgroup_subsys_state *cg,
-const struct cgroup_subsys_state *root);
-
-/* Get id and depth of css */
-unsigned short css_id(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
 
 #else /* !CONFIG_CGROUPS */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a26e083..a64b7b8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -123,38 +123,6 @@ struct cfent {
 };
 
 /*
- * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
- * cgroup_subsys-use_id != 0.
- */
-#define CSS_ID_MAX (65535)
-struct css_id {
-   /*
-* The css to which this ID points. This pointer is set to valid value
-* after cgroup is populated. If cgroup is removed, this will be NULL.
-* This pointer is expected to be RCU-safe because destroy()
-* is called after synchronize_rcu(). But for safe use, css_tryget()
-* should be used for avoiding race.
-*/
-   struct cgroup_subsys_state __rcu *css;
-   /*
-* ID of this css.
-*/
-   unsigned short id;
-   /*
-* Depth in hierarchy which this ID belongs to.
-*/
-   unsigned short depth;
-   /*
-* ID is freed by RCU. (and lookup routine is RCU safe.)
-*/
-   struct rcu_head rcu_head;
-   /*
-* Hierarchy of CSS ID belongs to.
-*/
-   unsigned short stack[0]; /* Array of Length (depth+1) */
-};
-
-/*
  * cgroup_event represents events which userspace want to receive.
  */
 struct cgroup_event {
@@ -364,9 +332,6 @@ struct cgrp_cset_link {
 static struct css_set init_css_set;
 static struct cgrp_cset_link init_cgrp_cset_link;
 
-static int cgroup_init_idr(struct cgroup_subsys *ss,
-  struct cgroup_subsys_state *css);
-
 /* css_set_lock protects the list of css_set objects, and the
  * chain of tasks off each css_set.  Nests outside task-alloc_lock
  * due to cgroup_iter_start() */
@@ -815,9 +780,6 @@ static struct backing_dev_info cgroup_backing_dev_info = {
.capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
 
-static int alloc_css_id(struct cgroup_subsys *ss,
-   struct cgroup *parent, struct cgroup *child);
-
 static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
 {
struct inode *inode = new_inode(sb);
@@ -4160,20 +4122,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp, 
unsigned long subsys_mask)
}
}
 
-   /* This cgroup is ready now */
-   for_each_root_subsys(cgrp-root

[PATCH v4 5/8] memcg: convert to use cgroup id

2013-07-30 Thread Li Zefan
Use cgroup id instead of css id. This is a preparation to kill css id.

Note, as memcg treat 0 as an invalid id, while cgroup id starts with 0,
we define memcg_id == cgroup_id + 1.

Signed-off-by: Li Zefan lize...@huawei.com
Acked-by: Michal Hocko mho...@suse.cz
---
 mm/memcontrol.c | 34 --
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 626c426..62541f5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -512,6 +512,25 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup 
*memcg)
return (memcg == root_mem_cgroup);
 }
 
+static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
+{
+   /*
+* The ID of the root cgroup is 0, but memcg treat 0 as an
+* invalid ID, so we return (cgroup_id + 1).
+*/
+   return memcg-css.cgroup-id + 1;
+}
+
+static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
+{
+   struct cgroup *cgrp;
+
+   cgrp = cgroup_from_id(mem_cgroup_subsys, id - 1);
+   if (!cgrp)
+   return NULL;
+   return mem_cgroup_from_cont(cgrp);
+}
+
 /* Writing them here to avoid exposing memcg's inner layout */
 #if defined(CONFIG_INET)  defined(CONFIG_MEMCG_KMEM)
 
@@ -2821,15 +2840,10 @@ static void __mem_cgroup_cancel_local_charge(struct 
mem_cgroup *memcg,
  */
 static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
 {
-   struct cgroup_subsys_state *css;
-
/* ID 0 is unused ID */
if (!id)
return NULL;
-   css = css_lookup(mem_cgroup_subsys, id);
-   if (!css)
-   return NULL;
-   return mem_cgroup_from_css(css);
+   return mem_cgroup_from_id(id);
 }
 
 struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
@@ -4328,7 +4342,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, 
swp_entry_t ent, bool swapout)
 * css_get() was called in uncharge().
 */
if (do_swap_account  swapout  memcg)
-   swap_cgroup_record(ent, css_id(memcg-css));
+   swap_cgroup_record(ent, mem_cgroup_id(memcg));
 }
 #endif
 
@@ -4380,8 +4394,8 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
 {
unsigned short old_id, new_id;
 
-   old_id = css_id(from-css);
-   new_id = css_id(to-css);
+   old_id = mem_cgroup_id(from);
+   new_id = mem_cgroup_id(to);
 
if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
mem_cgroup_swap_statistics(from, false);
@@ -6542,7 +6556,7 @@ static enum mc_target_type get_mctgt_type(struct 
vm_area_struct *vma,
}
/* There is a swap entry and a page doesn't exist or isn't charged */
if (ent.val  !ret 
-   css_id(mc.from-css) == lookup_swap_cgroup_id(ent)) {
+   mem_cgroup_id(mc.from) == lookup_swap_cgroup_id(ent)) {
ret = MC_TARGET_SWAP;
if (target)
target-ent = ent;
-- 
1.8.0.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Checkpatch error on trace events macros

2013-07-29 Thread Li Zefan
On 2013/7/30 11:10, Joe Perches wrote:
> On Tue, 2013-07-30 at 11:04 +0800, Li Zefan wrote:
>> On 2013/7/30 10:36, Joe Perches wrote:
>>> On Tue, 2013-07-30 at 10:06 +0800, Li Zefan wrote:
>>>> On 2013/7/30 9:58, Joe Perches wrote:
>>>>> So what are these TRACE_ defines that need
>>>>> excluding from the "complex values" check?
>>>>>
>>>>> Anything other than 
>>>>>
>>>>> TRACE_SYSTEM
>>>>> TRACE_INCLUDE_FILE
>>>>> TRACE_INCLUDE_PATH
>>>>>
>>>>> ?
>>>>>
>>>>> samples/trace_events/trace-events-sample.h
>>>>> only has those 3.
>>>>>
>>>>
>>>> Try:
>>>>   # scripts/checkpatch.pl --file include/trace/events/*
>>>>
>>>> You'll see numerous errors. :)
>>>
>>> Nope, you'll see numerous whitespace defects, but no
>>> actual errors.
>>>
>>> If you run with:
>>>
>>> --ignore=spacing,long_line,code_indent,leading_space,printf_l,split_string,space_before_tab,trailing_whitespace,line_continuations
>>>
>>
>> Serious? I'd just not run checkpatch.pl. ;)
> 
> Your choice.  It's all whitespace and %Lx stuff.
> I think the reports are actual style defects.
> The line continuations and split strings uses are
> pretty poor there too.
> 
>> The "complex values" check complaints come from many places in 
>> include/trace/events/*,
>> and I'm not going to check where and why.
> 
> Nor I.
> You haven't answered my question either.
> 

Oh, I overlooked it.

TRACE_SYSTEM defines the directory name in /sys/kernel/debug/tracing/events. A 
trace
event belongs to a trace system:

  # cat /sys/kernel/debug/tracing/available_events
  ext3:ext3_free_inode
  ext3:ext3_request_inode
  ...

ext3 is the SYSTEM name.

TRACE_INCLUDE_FILE is needed if the .h filename is different than the SYSTEM 
name.

If the .h file is not in include/trace/events, then you must use 
TRACE_INCLUDE_PATH
to specify where the file is.

I'm not sure if there're any other maros need special treatment.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Checkpatch error on trace events macros

2013-07-29 Thread Li Zefan
On 2013/7/30 10:36, Joe Perches wrote:
> On Tue, 2013-07-30 at 10:06 +0800, Li Zefan wrote:
>> On 2013/7/30 9:58, Joe Perches wrote:
>>> So what are these TRACE_ defines that need
>>> excluding from the "complex values" check?
>>>
>>> Anything other than 
>>>
>>> TRACE_SYSTEM
>>> TRACE_INCLUDE_FILE
>>> TRACE_INCLUDE_PATH
>>>
>>> ?
>>>
>>> samples/trace_events/trace-events-sample.h
>>> only has those 3.
>>>
>>
>> Try:
>>   # scripts/checkpatch.pl --file include/trace/events/*
>>
>> You'll see numerous errors. :)
> 
> Nope, you'll see numerous whitespace defects, but no
> actual errors.
> 
> If you run with:
> 
> --ignore=spacing,long_line,code_indent,leading_space,printf_l,split_string,space_before_tab,trailing_whitespace,line_continuations
> 

Serious? I'd just not run checkpatch.pl. ;)

> it's flawless.
> 

The "complex values" check complaints come from many places in 
include/trace/events/*,
and I'm not going to check where and why.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Checkpatch error on trace events macros

2013-07-29 Thread Li Zefan
On 2013/7/30 9:58, Joe Perches wrote:
> On Tue, 2013-07-30 at 09:30 +0800, Li Zefan wrote:
>> On 2013/7/30 3:52, Sarah Sharp wrote:
>>> Hi Andy and Joe,
>>>
>>> Checkpatch is complaining when code adds new trace events macros:
>>>
>>> sarah@xanatos:~/git/kernels/xhci$ git am -s ~/Maildir.fetchmail/.to-apply
>>> Applying: xhci: add traces for debug messages in xhci_address_device()
>>> ERROR: Macros with complex values should be enclosed in parenthesis
>>> #86: FILE: drivers/usb/host/xhci-trace.h:15:
>>> +#define TRACE_SYSTEM xhci-hcd
>>>
>>> ERROR: Macros with complex values should be enclosed in parenthesis
>>> #115: FILE: drivers/usb/host/xhci-trace.h:44:
>>> +#define TRACE_INCLUDE_PATH .
>>>
>>> ERROR: Macros with complex values should be enclosed in parenthesis
>>> #118: FILE: drivers/usb/host/xhci-trace.h:47:
>>> +#define TRACE_INCLUDE_FILE xhci-trace
>>>
>>> total: 3 errors, 0 warnings, 169 lines checked
>>>
>>>
>>> The macros have to be defined that way for trace events to work.
>>
>> yeah, that's true, and we always just ignore chechpatch complaints
>> when it comes to TRACE_EVENT macros.
>>
>>> Can you fix checkpatch not to complain about trace event macros?
>>>
> 
> So what are these TRACE_ defines that need
> excluding from the "complex values" check?
> 
> Anything other than 
> 
> TRACE_SYSTEM
> TRACE_INCLUDE_FILE
> TRACE_INCLUDE_PATH
> 
> ?
> 
> samples/trace_events/trace-events-sample.h
> only has those 3.
> 

Try:
  # scripts/checkpatch.pl --file include/trace/events/*

You'll see numerous errors. :)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: memcg creates an unkillable task in 3.2-rc2

2013-07-29 Thread Li Zefan
> I am also seeing what looks like a leak somewhere in the cgroup code as
> well.  After some runs of the same reproducer I get into a state where
> after everything is clean up.  All of the control groups have been
> removed and the cgroup filesystem is unmounted, I can mount a cgroup
> filesystem with that same combindation of subsystems, but I can't mount
> a cgroup filesystem with any of those subsystems in any other
> combination.  So I am guessing that the superblock is from the original
> mounting is still lingering for some reason.
> 

If this happens again, you can check /proc/cgroups, 

#subsys_namehierarchy   num_cgroups enabled
cpuset  0   1   1
debug   0   1   1
cpu 0   1   1
cpuacct 0   1   1
memory  0   1   1
devices 0   1   1
freezer 0   1   1
blkio   0   1   1

If "hierachy" is not 0, then it didn't really unmounted. If "num_cgroups"
is not 1, then there're some cgroups not really destroyed though they've
been rmdired.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Checkpatch error on trace events macros

2013-07-29 Thread Li Zefan
On 2013/7/30 3:52, Sarah Sharp wrote:
> Hi Andy and Joe,
> 
> Checkpatch is complaining when code adds new trace events macros:
> 
> sarah@xanatos:~/git/kernels/xhci$ git am -s ~/Maildir.fetchmail/.to-apply
> Applying: xhci: add traces for debug messages in xhci_address_device()
> ERROR: Macros with complex values should be enclosed in parenthesis
> #86: FILE: drivers/usb/host/xhci-trace.h:15:
> +#define TRACE_SYSTEM xhci-hcd
> 
> ERROR: Macros with complex values should be enclosed in parenthesis
> #115: FILE: drivers/usb/host/xhci-trace.h:44:
> +#define TRACE_INCLUDE_PATH .
> 
> ERROR: Macros with complex values should be enclosed in parenthesis
> #118: FILE: drivers/usb/host/xhci-trace.h:47:
> +#define TRACE_INCLUDE_FILE xhci-trace
> 
> total: 3 errors, 0 warnings, 169 lines checked
> 
> 
> The macros have to be defined that way for trace events to work.

yeah, that's true, and we always just ignore chechpatch complaints
when it comes to TRACE_EVENT macros.

> Can you fix checkpatch not to complain about trace event macros?
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 1/8] cgroup: convert cgroup_ida to cgroup_idr

2013-07-29 Thread Li Zefan
On 2013/7/30 2:28, Tejun Heo wrote:
> Hello,
> 
> On Mon, Jul 29, 2013 at 03:07:48PM +0800, Li Zefan wrote:
>> @@ -4590,6 +4599,9 @@ static void cgroup_offline_fn(struct work_struct *work)
>>  /* delete this cgroup from parent->children */
>>  list_del_rcu(>sibling);
>>  
>> +if (cgrp->id)
>> +idr_remove(>root->cgroup_idr, cgrp->id);
>> +
> 
> Yeap, if we're gonna allow lookups, removal should happen here but can
> we please add short comment explaining why that is?

sure

> Also, do we want to clear cgrp->id?
> 

Set cgrp->id to 0? No, 0 is a valid id. The if is here because at first
I called idr_alloc() very late in cgroup_create(), so cgroup_offline_fn()
can be called while cgrp->id hasn't been initialized. Now I can remove
this check.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 2/8] cgroup: document how cgroup IDs are assigned

2013-07-29 Thread Li Zefan
On 2013/7/30 2:26, Tejun Heo wrote:
> On Mon, Jul 29, 2013 at 03:08:04PM +0800, Li Zefan wrote:
>> As cgroup id has been used in netprio cgroup and will be used in memcg,
>> it's important to make it clear how a cgroup id is allocated.
>>
>> For example, in netprio cgroup, the id is used as index of anarray.
>>
>> Signed-off-by: Li Zefan 
>> Reviewed-by: Michal Hocko 
> 
> We can merge this into the first patch?
> 

The first patch just changes ida to idr, it doesn't change how IDs are
allocated, so I prefer make this a standalone patch.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 8/8] cgroup: kill css_id

2013-07-29 Thread Li Zefan
The only user of css_id was memcg, and it has been convered to use
cgroup->id, so kill css_id.

Signed-off-by: Li Zefan 
Reviewed-by: Michal Hocko 
---
 include/linux/cgroup.h |  37 
 kernel/cgroup.c| 252 +
 2 files changed, 1 insertion(+), 288 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e8eb361..ec8c380 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -599,11 +599,6 @@ struct cgroup_subsys {
int subsys_id;
int disabled;
int early_init;
-   /*
-* True if this subsys uses ID. ID is not available before cgroup_init()
-* (not available in early_init time.)
-*/
-   bool use_id;
 
/*
 * If %false, this subsystem is properly hierarchical -
@@ -629,9 +624,6 @@ struct cgroup_subsys {
 */
struct cgroupfs_root *root;
struct list_head sibling;
-   /* used when use_id == true */
-   struct idr idr;
-   spinlock_t id_lock;
 
/* list of cftype_sets */
struct list_head cftsets;
@@ -858,35 +850,6 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
 
-/*
- * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
- * if cgroup_subsys.use_id == true. It can be used for looking up and scanning.
- * CSS ID is assigned at cgroup allocation (create) automatically
- * and removed when subsys calls free_css_id() function. This is because
- * the lifetime of cgroup_subsys_state is subsys's matter.
- *
- * Looking up and scanning function should be called under rcu_read_lock().
- * Taking cgroup_mutex is not necessary for following calls.
- * But the css returned by this routine can be "not populated yet" or "being
- * destroyed". The caller should check css and cgroup's status.
- */
-
-/*
- * Typically Called at ->destroy(), or somewhere the subsys frees
- * cgroup_subsys_state.
- */
-void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css);
-
-/* Find a cgroup_subsys_state which has given ID */
-
-struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id);
-
-/* Returns true if root is ancestor of cg */
-bool css_is_ancestor(struct cgroup_subsys_state *cg,
-const struct cgroup_subsys_state *root);
-
-/* Get id and depth of css */
-unsigned short css_id(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
 
 #else /* !CONFIG_CGROUPS */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index dc4a749..26caaa1 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -123,38 +123,6 @@ struct cfent {
 };
 
 /*
- * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
- * cgroup_subsys->use_id != 0.
- */
-#define CSS_ID_MAX (65535)
-struct css_id {
-   /*
-* The css to which this ID points. This pointer is set to valid value
-* after cgroup is populated. If cgroup is removed, this will be NULL.
-* This pointer is expected to be RCU-safe because destroy()
-* is called after synchronize_rcu(). But for safe use, css_tryget()
-* should be used for avoiding race.
-*/
-   struct cgroup_subsys_state __rcu *css;
-   /*
-* ID of this css.
-*/
-   unsigned short id;
-   /*
-* Depth in hierarchy which this ID belongs to.
-*/
-   unsigned short depth;
-   /*
-* ID is freed by RCU. (and lookup routine is RCU safe.)
-*/
-   struct rcu_head rcu_head;
-   /*
-* Hierarchy of CSS ID belongs to.
-*/
-   unsigned short stack[0]; /* Array of Length (depth+1) */
-};
-
-/*
  * cgroup_event represents events which userspace want to receive.
  */
 struct cgroup_event {
@@ -364,9 +332,6 @@ struct cgrp_cset_link {
 static struct css_set init_css_set;
 static struct cgrp_cset_link init_cgrp_cset_link;
 
-static int cgroup_init_idr(struct cgroup_subsys *ss,
-  struct cgroup_subsys_state *css);
-
 /* css_set_lock protects the list of css_set objects, and the
  * chain of tasks off each css_set.  Nests outside task->alloc_lock
  * due to cgroup_iter_start() */
@@ -815,9 +780,6 @@ static struct backing_dev_info cgroup_backing_dev_info = {
.capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
 
-static int alloc_css_id(struct cgroup_subsys *ss,
-   struct cgroup *parent, struct cgroup *child);
-
 static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
 {
struct inode *inode = new_inode(sb);
@@ -4160,20 +4122,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp, 
unsigned long subsys_mask)
}
}
 
-   /* This cgroup is ready now */
-   for

[PATCH v3 2/8] cgroup: document how cgroup IDs are assigned

2013-07-29 Thread Li Zefan
As cgroup id has been used in netprio cgroup and will be used in memcg,
it's important to make it clear how a cgroup id is allocated.

For example, in netprio cgroup, the id is used as index of anarray.

Signed-off-by: Li Zefan 
Reviewed-by: Michal Hocko 
---
 include/linux/cgroup.h | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2bd052d..8c107e9 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -161,7 +161,13 @@ struct cgroup_name {
 struct cgroup {
unsigned long flags;/* "unsigned long" so bitops work */
 
-   int id; /* idr allocated in-hierarchy ID */
+   /*
+* idr allocated in-hierarchy ID.
+*
+* The ID of the root cgroup is always 0, and a new cgroup
+* will be assigned with a smallest available ID.
+*/
+   int id;
 
/*
 * We link our 'sibling' struct into our parent's 'children'.
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 6/8] memcg: fail to create cgroup if the cgroup id is too big

2013-07-29 Thread Li Zefan
memcg requires the cgroup id to be smaller than 65536.

This is a preparation to kill css id.

Signed-off-by: Li Zefan 
Acked-by: Michal Hocko 
---
 mm/memcontrol.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 62541f5..ad7d7c9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -512,6 +512,12 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup 
*memcg)
return (memcg == root_mem_cgroup);
 }
 
+/*
+ * We restrict the id in the range of [1, 65535], so it can fit into
+ * an unsigned short.
+ */
+#define MEM_CGROUP_ID_MAX  USHRT_MAX
+
 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
/*
@@ -6248,6 +6254,9 @@ mem_cgroup_css_alloc(struct cgroup *cont)
long error = -ENOMEM;
int node;
 
+   if (cont->id > MEM_CGROUP_ID_MAX)
+   return ERR_PTR(-ENOSPC);
+
memcg = mem_cgroup_alloc();
if (!memcg)
return ERR_PTR(error);
-- 
1.8.0.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


<    2   3   4   5   6   7   8   9   10   11   >