from:"aris"

[PATCH v4 2/9] devcg: reorder device exception functions

2013-01-30 Thread aris

In preparation for the next patch, reorder dev_exception_add() and
dev_exception_rm().

This patch doesn't introduce any functional changes.

Acked-by: Tejun Heo 
Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   44 ++--
 1 file changed, 22 insertions(+), 22 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:14.739657516 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:14.987661210 -0500
@@ -104,18 +104,14 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct list_head *exceptions,
+static void dev_exception_rm(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
-   struct dev_exception_item *excopy, *walk;
+   struct dev_exception_item *walk, *tmp;
 
lockdep_assert_held(_mutex);
 
-   excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
-   if (!excopy)
-   return -ENOMEM;
-
-   list_for_each_entry(walk, exceptions, list) {
+   list_for_each_entry_safe(walk, tmp, exceptions, list) {
if (walk->type != ex->type)
continue;
if (walk->major != ex->major)
@@ -123,27 +119,29 @@ static int dev_exception_add(struct list
if (walk->minor != ex->minor)
continue;
 
-   walk->access |= ex->access;
-   kfree(excopy);
-   excopy = NULL;
+   walk->access &= ~ex->access;
+   if (!walk->access) {
+   list_del_rcu(>list);
+   kfree_rcu(walk, rcu);
+   }
}
-
-   if (excopy != NULL)
-   list_add_tail_rcu(>list, exceptions);
-   return 0;
 }
 
 /*
  * called under devcgroup_mutex
  */
-static void dev_exception_rm(struct list_head *exceptions,
+static int dev_exception_add(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
-   struct dev_exception_item *walk, *tmp;
+   struct dev_exception_item *excopy, *walk;
 
lockdep_assert_held(_mutex);
 
-   list_for_each_entry_safe(walk, tmp, exceptions, list) {
+   excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
+   if (!excopy)
+   return -ENOMEM;
+
+   list_for_each_entry(walk, exceptions, list) {
if (walk->type != ex->type)
continue;
if (walk->major != ex->major)
@@ -151,12 +149,14 @@ static void dev_exception_rm(struct list
if (walk->minor != ex->minor)
continue;
 
-   walk->access &= ~ex->access;
-   if (!walk->access) {
-   list_del_rcu(>list);
-   kfree_rcu(walk, rcu);
-   }
+   walk->access |= ex->access;
+   kfree(excopy);
+   excopy = NULL;
}
+
+   if (excopy != NULL)
+   list_add_tail_rcu(>list, exceptions);
+   return 0;
 }
 
 static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 8/9] devcg: refactor dev_exception_clean()

2013-01-30 Thread aris

This patch is in preparation for hierarchy support.

This patch doesn't introduce any functional changes.

Acked-by: Tejun Heo 
Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   34 --
 1 file changed, 16 insertions(+), 18 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:16.374681863 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:16.653686016 -0500
@@ -212,32 +212,33 @@ static int dev_exception_add(struct dev_
return rc;
 }
 
-static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
+static void dev_exception_clean(struct list_head *exceptions)
 {
struct dev_exception_item *ex, *tmp;
 
-   list_for_each_entry_safe(ex, tmp, _cgroup->exceptions, list) {
-   list_del_rcu(>list);
-   kfree_rcu(ex, rcu);
-   }
-   list_for_each_entry_safe(ex, tmp, _cgroup->local.exceptions,
-list) {
+   list_for_each_entry_safe(ex, tmp, exceptions, list) {
list_del_rcu(>list);
kfree_rcu(ex, rcu);
}
 }
 
+static void __dev_exception_clean_all(struct dev_cgroup *dev_cgroup)
+{
+   dev_exception_clean(_cgroup->exceptions);
+   dev_exception_clean(_cgroup->local.exceptions);
+}
+
 /**
- * dev_exception_clean - frees all entries of the exception list
+ * dev_exception_clean_all - frees all entries of the exception list
  * @dev_cgroup: dev_cgroup with the exception list to be cleaned
  *
  * called under devcgroup_mutex
  */
-static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
+static void dev_exception_clean_all(struct dev_cgroup *dev_cgroup)
 {
lockdep_assert_held(_mutex);
 
-   __dev_exception_clean(dev_cgroup);
+   __dev_exception_clean_all(dev_cgroup);
 }
 
 /**
@@ -303,7 +304,7 @@ static void devcgroup_css_free(struct cg
struct dev_cgroup *dev_cgroup;
 
dev_cgroup = cgroup_to_devcgroup(cgroup);
-   __dev_exception_clean(dev_cgroup);
+   __dev_exception_clean_all(dev_cgroup);
kfree(dev_cgroup);
 }
 
@@ -508,25 +509,22 @@   memset(, 0, sizeof(ex));
case DEVCG_ALLOW:
if (!may_allow_all(parent))
return -EPERM;
-   dev_exception_clean(devcgroup);
+   dev_exception_clean_all(devcgroup);
if (parent)
rc = dev_exceptions_copy(>exceptions,
 >exceptions);
devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
devcgroup->local.behavior = DEVCG_DEFAULT_ALLOW;
-
-   if (rc)
-   return rc;
break;
case DEVCG_DENY:
-   dev_exception_clean(devcgroup);
+   dev_exception_clean_all(devcgroup);
devcgroup->behavior = DEVCG_DEFAULT_DENY;
devcgroup->local.behavior = DEVCG_DEFAULT_DENY;
break;
default:
-   return -EINVAL;
+   rc = -EINVAL;
}
-   return 0;
+   return rc;
case 'b':
ex.type = DEV_BLOCK;
break;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 3/9] device_cgroup: keep track of local group settings

2013-01-30 Thread aris

In preparation for better hierarchy support, it's needed to retain the local
settings in order to try to reapply them after a propagated change if they're
still valid.

v2: split this patch in two, one to just move dev_exception_rm() before
dev_exception_add() while keeping functional changes in this patch as
requested by Tejun.

Acked-by: Tejun Heo 
Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 
 
---
 security/device_cgroup.c |   83 +--
 1 file changed, 67 insertions(+), 16 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:14.987661210 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:15.244665037 -0500
@@ -39,13 +39,27 @@ struct dev_exception_item {
struct rcu_head rcu;
 };
 
+enum devcg_behavior {
+   DEVCG_DEFAULT_NONE,
+   DEVCG_DEFAULT_ALLOW,
+   DEVCG_DEFAULT_DENY,
+};
+
 struct dev_cgroup {
struct cgroup_subsys_state css;
+
+   /* result of merging the parent's rules with local ones */
struct list_head exceptions;
-   enum {
-   DEVCG_DEFAULT_ALLOW,
-   DEVCG_DEFAULT_DENY,
-   } behavior;
+   enum devcg_behavior behavior;
+
+   /*
+* local set rules, saved so when a parent propagates new rules, the
+* local preferences can be preserved
+*/
+   struct {
+   struct list_head exceptions;
+   enum devcg_behavior behavior;
+   } local;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state 
*s)
@@ -104,8 +118,8 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static void dev_exception_rm(struct list_head *exceptions,
-struct dev_exception_item *ex)
+static void __dev_exception_rm(struct list_head *exceptions,
+  struct dev_exception_item *ex)
 {
struct dev_exception_item *walk, *tmp;
 
@@ -127,11 +141,18 @@ static void dev_exception_rm(struct list
}
 }
 
+static void dev_exception_rm(struct dev_cgroup *devcgroup,
+struct dev_exception_item *ex)
+{
+   __dev_exception_rm(>local.exceptions, ex);
+   __dev_exception_rm(>exceptions, ex);
+}
+
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct list_head *exceptions,
-struct dev_exception_item *ex)
+static int __dev_exception_add(struct list_head *exceptions,
+  struct dev_exception_item *ex)
 {
struct dev_exception_item *excopy, *walk;
 
@@ -159,6 +180,28 @@ static int dev_exception_add(struct list
return 0;
 }
 
+static int dev_exception_add(struct dev_cgroup *devcgroup,
+struct dev_exception_item *ex)
+{
+   int rc;
+
+   lockdep_assert_held(_mutex);
+
+   /*
+* we add to the local list so we can preserve local preferences if
+* the parent propagates down new rules
+*/
+   rc = __dev_exception_add(>local.exceptions, ex);
+   if (rc)
+   return rc;
+
+   rc = __dev_exception_add(>exceptions, ex);
+   if (rc)
+   __dev_exception_rm(>local.exceptions, ex);
+
+   return rc;
+}
+
 static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
 {
struct dev_exception_item *ex, *tmp;
@@ -167,6 +210,11 @@ static void __dev_exception_clean(struct
list_del_rcu(>list);
kfree_rcu(ex, rcu);
}
+   list_for_each_entry_safe(ex, tmp, _cgroup->local.exceptions,
+list) {
+   list_del_rcu(>list);
+   kfree_rcu(ex, rcu);
+   }
 }
 
 /**
@@ -195,6 +243,8 @@ static struct cgroup_subsys_state *devcg
if (!dev_cgroup)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(_cgroup->exceptions);
+   INIT_LIST_HEAD(_cgroup->local.exceptions);
+   dev_cgroup->local.behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup->parent;
 
if (parent_cgroup == NULL)
@@ -413,18 +463,19 @@   memset(, 0, sizeof(ex));
if (!may_allow_all(parent))
return -EPERM;
dev_exception_clean(devcgroup);
+   if (parent)
+   rc = dev_exceptions_copy(>exceptions,
+>exceptions);
devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
-   if (!parent)
-   break;
+   devcgroup->local.behavior = DEVCG_DEFAULT_ALLOW;
 
-   rc = dev_exceptions_copy(>exceptions,
->exceptions);
if (rc)
return rc;
break;
case DEVCG_DENY:

[PATCH v4 7/9] devcg: split single exception copy from dev_exceptions_copy()

2013-01-30 Thread aris

This patch is in preparation for hierarchy support

This patch doesn't introduce any functional changes.

Acked-by: Tejun Heo 
Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:16.076677425 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:16.374681863 -0500
@@ -89,20 +89,30 @@ static int devcgroup_can_attach(struct c
return 0;
 }
 
+static int dev_exception_copy(struct list_head *dest,
+ struct dev_exception_item *ex)
+{
+   struct dev_exception_item *new;
+
+   new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
+   if (!new)
+   return -ENOMEM;
+   list_add_tail(>list, dest);
+   return 0;
+}
+
 /*
  * called under devcgroup_mutex
  */
 static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
 {
-   struct dev_exception_item *ex, *tmp, *new;
+   struct dev_exception_item *ex, *tmp;
 
lockdep_assert_held(_mutex);
 
list_for_each_entry(ex, orig, list) {
-   new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
-   if (!new)
+   if (dev_exception_copy(dest, ex))
goto free_and_exit;
-   list_add_tail(>list, dest);
}
 
return 0;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 1/9] device_cgroup: prepare exception list handling functions for two lists

2013-01-30 Thread aris

In the following patches, device_cgroup structure will have two sets of
behavior and exceptions list (actual one, another with the local settings)
so rework the functions to use exception list, not a device_cgroup.

Acked-by: Tejun Heo 
Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:48:50.603298122 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:14.739657516 -0500
@@ -104,7 +104,7 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct dev_cgroup *dev_cgroup,
+static int dev_exception_add(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
struct dev_exception_item *excopy, *walk;
@@ -115,7 +115,7 @@ static int dev_exception_add(struct dev_
if (!excopy)
return -ENOMEM;
 
-   list_for_each_entry(walk, _cgroup->exceptions, list) {
+   list_for_each_entry(walk, exceptions, list) {
if (walk->type != ex->type)
continue;
if (walk->major != ex->major)
@@ -129,21 +129,21 @@ static int dev_exception_add(struct dev_
}
 
if (excopy != NULL)
-   list_add_tail_rcu(>list, _cgroup->exceptions);
+   list_add_tail_rcu(>list, exceptions);
return 0;
 }
 
 /*
  * called under devcgroup_mutex
  */
-static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
+static void dev_exception_rm(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
struct dev_exception_item *walk, *tmp;
 
lockdep_assert_held(_mutex);
 
-   list_for_each_entry_safe(walk, tmp, _cgroup->exceptions, list) {
+   list_for_each_entry_safe(walk, tmp, exceptions, list) {
if (walk->type != ex->type)
continue;
if (walk->major != ex->major)
@@ -514,10 +514,10 @@   case '\0':
 * don't want to break compatibility
 */
if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
-   dev_exception_rm(devcgroup, );
+   dev_exception_rm(>exceptions, );
return 0;
}
-   return dev_exception_add(devcgroup, );
+   return dev_exception_add(>exceptions, );
case DEVCG_DENY:
/*
 * If the default policy is to deny by default, try to remove
@@ -525,10 +525,10 @@   return 0;
 * don't want to break compatibility
 */
if (devcgroup->behavior == DEVCG_DEFAULT_DENY) {
-   dev_exception_rm(devcgroup, );
+   dev_exception_rm(>exceptions, );
return 0;
}
-   return dev_exception_add(devcgroup, );
+   return dev_exception_add(>exceptions, );
default:
return -EINVAL;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 5/9] devcg: prepare may_access() for hierarchy support

2013-01-30 Thread aris

Currently may_access() is only able to verify if an exception is valid for the
current cgroup, which has the same behavior. With hierarchy, it'll be also used
to verify if a cgroup local exception is valid towards its cgroup parent, which
might have different behavior.

v2:
- updated patch description
- rebased on top of a new patch to expand the may_access() logic to make it
  more clear
- fixed argument description order in may_access()

Acked-by: Tejun Heo 
Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   38 --
 1 file changed, 24 insertions(+), 14 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-30 08:58:02.0 
-0500
+++ github/security/device_cgroup.c 2013-01-30 09:00:09.435351867 -0500
@@ -354,9 +354,11 @@return 0;
  * verify if a certain access is allowed.
  * @dev_cgroup: dev cgroup to be tested against
  * @refex: new exception
+ * @behavior: behavior of the exception
  */
 static bool may_access(struct dev_cgroup *dev_cgroup,
-  struct dev_exception_item *refex)
+  struct dev_exception_item *refex,
+  enum devcg_behavior behavior)
 {
struct dev_exception_item *ex;
bool match = false;
@@ -380,19 +382,27 @@   if (ex->minor != ~0 && ex->minor != re
break;
}
 
-   /*
-* In two cases we'll consider this new exception valid:
-* - the dev cgroup has its default policy to deny + exception list:
-*   the new exception *should* match the exceptions
-* - the dev cgroup has its default policy to allow + exception list:
-*   the new exception should *not* match any of the exceptions
-*/
-   if (dev_cgroup->behavior == DEVCG_DEFAULT_DENY) {
-   if (match)
+   if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
+   if (behavior == DEVCG_DEFAULT_ALLOW) {
+   /* the exception will deny access to certain devices */
return true;
+   } else {
+   /* the exception will allow access to certain devices */
+   if (match)
+   /*
+* a new exception allowing access shouldn't
+* match an parent's exception
+*/
+   return false;
+   return true;
+   }
} else {
-   if (!match)
+   /* only behavior == DEVCG_DEFAULT_DENY allowed here */
+   if (match)
+   /* parent has an exception that matches the proposed */
return true;
+   else
+   return false;
}
return false;
 }
@@ -411,7 +421,7 @@ static int parent_has_perm(struct dev_cg
if (!pcg)
return 1;
parent = cgroup_to_devcgroup(pcg);
-   return may_access(parent, ex);
+   return may_access(parent, ex, childcg->behavior);
 }
 
 /**
@@ -445,7 +455,7 @@ static int devcgroup_update_access(struc
 {
const char *b;
char temp[12];  /* 11 + 1 characters needed for a u32 */
-   int count, rc;
+   int count, rc = 0;
struct dev_exception_item ex;
struct cgroup *p = devcgroup->css.cgroup;
struct dev_cgroup *parent = NULL;
@@ -663,7 +673,7 @@ memset(, 0, sizeof(ex));
 
rcu_read_lock();
dev_cgroup = task_devcgroup(current);
-   rc = may_access(dev_cgroup, );
+   rc = may_access(dev_cgroup, , dev_cgroup->behavior);
rcu_read_unlock();
 
if (!rc)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 0/9] devcg: introduce proper hierarchy support

2013-01-30 Thread aris

This patchset implements device cgroup hierarchy. Behaviors and exceptions
will be propagated down in the tree and local preferences will be re-evaluated
everytime a change in its parent occours, reapplying them if it's still
possible.

git://github.com/aristeu/linux-2.6.git
branch: devcg_hierarchy_review

v4:
- minor fixes pointed by Tejun

v3:
- update documentation
- move css_online/css_offline changes to a new patch
- use cgroup_for_each_descendant_pre() instead of own descendant walk
- move exception_copy rework to a separared patch
- move exception_clean rework to a separated patch
- new patch to just move dev_exception_rm() before dev_exception_add()
  as requested by Tejun.
- updated patch description for may_access() changes
- new patch to expand the may_access() logic before changing it
- fixed argument description order in may_access()

v2:
- rebase on top "device_cgroup: don't grab mutex in rcu callback"
- in case parent changes behavior or exceptions and the local exceptions won't
  apply anymore, remove them instead of keeping them around.

Cc: Tejun Heo  
  
Cc: Serge Hallyn

Signed-off-by: Aristeu Rozanski


-- 
Aristeu
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 4/9] devcg: expand may_access() logic

2013-01-30 Thread aris

In order to make the next patch more clear, expand may_access() logic.

v2: may_access() returns bool now

Acked-by: Tejun Heo 
Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-30 08:56:29.532063723 
-0500
+++ github/security/device_cgroup.c 2013-01-30 08:58:02.934460404 -0500
@@ -355,8 +355,8 @@ return 0;
  * @dev_cgroup: dev cgroup to be tested against
  * @refex: new exception
  */
-static int may_access(struct dev_cgroup *dev_cgroup,
- struct dev_exception_item *refex)
+static bool may_access(struct dev_cgroup *dev_cgroup,
+  struct dev_exception_item *refex)
 {
struct dev_exception_item *ex;
bool match = false;
@@ -382,16 +382,19 @@   if (ex->minor != ~0 && ex->minor != re
 
/*
 * In two cases we'll consider this new exception valid:
-* - the dev cgroup has its default policy to allow + exception list:
-*   the new exception should *not* match any of the exceptions
-*   (behavior == DEVCG_DEFAULT_ALLOW, !match)
 * - the dev cgroup has its default policy to deny + exception list:
 *   the new exception *should* match the exceptions
-*   (behavior == DEVCG_DEFAULT_DENY, match)
+* - the dev cgroup has its default policy to allow + exception list:
+*   the new exception should *not* match any of the exceptions
 */
-   if ((dev_cgroup->behavior == DEVCG_DEFAULT_DENY) == match)
-   return 1;
-   return 0;
+   if (dev_cgroup->behavior == DEVCG_DEFAULT_DENY) {
+   if (match)
+   return true;
+   } else {
+   if (!match)
+   return true;
+   }
+   return false;
 }
 
 /*

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 9/9] devcg: propagate local changes down the hierarchy

2013-01-30 Thread aris

This patch makes all changes propagate down in hierarchy respecting when
possible local configurations.

Behavior changes will clean up exceptions in all the children except when the
parent changes the behavior from allow to deny and the child's behavior was
already deny, in which case the local exceptions will be reused. The inverse
is not possible: you can't have a parent with behavior deny and a child with
behavior accept.

New exceptions allowing additional access to devices won't be propagated, but
it'll be possible to add an exception to access all of part of the newly
allowed device(s).

New exceptions disallowing access to devices will be propagated down and the
local group's exceptions will be revalidated for the new situation.
Example:
  A
 / \
B

groupbehavior  exceptions
Aallow "b 8:* rwm", "c 116:1 rw"
Bdeny  "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm"

If a new exception is added to group A:
# echo "c 116:* r" > A/devices.deny
it'll propagate down and after revalidating B's local exceptions, the exception
"c 116:2 rwm" will be removed.

In case parent behavior or exceptions change and local settings are not
allowed anymore, they'll be deleted.

v4:
- separated function to walk the tree and collect valid propagation targets

v3:
- update documentation
- move css_online/css_offline changes to a new patch
- use cgroup_for_each_descendant_pre() instead of own descendant walk
- move exception_copy rework to a separared patch
- move exception_clean rework to a separated patch

v2:
- instead of keeping the local settings that won't apply anymore, remove them

Acked-by: Tejun Heo 
Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 Documentation/cgroups/devices.txt |   66 +
 security/device_cgroup.c  |  186 --
 2 files changed, 246 insertions(+), 6 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-30 10:03:16.943873992 
-0500
+++ github/security/device_cgroup.c 2013-01-30 10:44:23.693586209 -0500
@@ -60,6 +60,9 @@ struct dev_cgroup {
struct list_head exceptions;
enum devcg_behavior behavior;
} local;
+
+   /* temporary list for pending propagation operations */
+   struct list_head propagate_pending;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state 
*s)
@@ -241,6 +244,11 @@ static void dev_exception_clean_all(stru
__dev_exception_clean_all(dev_cgroup);
 }
 
+static inline bool is_devcg_online(const struct dev_cgroup *devcg)
+{
+   return (devcg->behavior != DEVCG_DEFAULT_NONE);
+}
+
 /**
  * devcgroup_online - initializes devcgroup's behavior and exceptions based on
  *   parent's
@@ -292,6 +300,7 @@ static struct cgroup_subsys_state *devcg
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(_cgroup->exceptions);
INIT_LIST_HEAD(_cgroup->local.exceptions);
+   INIT_LIST_HEAD(_cgroup->propagate_pending);
dev_cgroup->local.behavior = DEVCG_DEFAULT_NONE;
dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup->parent;
@@ -471,6 +480,155 @@ static inline int may_allow_all(struct d
return parent->behavior == DEVCG_DEFAULT_ALLOW;
 }
 
+/**
+ * revalidate_exceptions - walks through the exception list and revalidates
+ *  the exceptions based on parents' behavior and
+ *  exceptions. Called with devcgroup_mutex held.
+ * @devcg: cgroup which exceptions will be checked
+ *
+ * returns: 0 in success, -ENOMEM in case of out of memory
+ *
+ * This is one of the two key functions for hierarchy implementation.
+ * This function is responsible for re-evaluating all the cgroup's locally
+ * set exceptions due to a parent's behavior or exception change.
+ * Refer to Documentation/cgroups/devices.txt for more details.
+ */
+static int revalidate_exceptions(struct dev_cgroup *devcg)
+{
+   struct dev_exception_item *ex;
+   struct list_head *this, *tmp;
+
+   list_for_each_safe(this, tmp, >local.exceptions) {
+   ex = container_of(this, struct dev_exception_item, list);
+   if (parent_has_perm(devcg, ex)) {
+   if (dev_exception_copy(>exceptions, ex))
+   goto error;
+   } else
+   __dev_exception_rm(>local.exceptions, ex);
+   }
+   return 0;
+
+error:
+   dev_exception_clean(>exceptions);
+   return -ENOMEM;
+}
+
+/**
+ * get_online_devcg - walks the cgroup tree and fills a list with the online
+ *   groups
+ * @root: cgroup used as starting point
+ * @online: list that will be filled with online groups
+ *
+ * Must be called with devcgroup_mutex held. Grabs RCU lock.
+ * Because devcgroup_mutex is held, no devcg will become online or offline
+ * during the

[PATCH v4 6/9] devcg: use css_online and css_offline

2013-01-30 Thread aris

Allocate resources and change behavior only when online. This is needed in
order to determine if a node is suitable for hierarchy propagation or if it's
being removed.

Locking:
Both functions take devcgroup_mutex to make changes to device_cgroup structure.
Hierarchy propagation will also take devcgroup_mutex before walking the
tree while walking the tree itself is protected by rcu lock.

Acked-by: Tejun Heo 
Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   59 +--
 1 file changed, 42 insertions(+), 17 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-30 09:00:09.435351867 
-0500
+++ github/security/device_cgroup.c 2013-01-30 09:09:12.572464122 -0500
@@ -230,14 +230,51 @@ static void dev_exception_clean(struct d
__dev_exception_clean(dev_cgroup);
 }
 
+/**
+ * devcgroup_online - initializes devcgroup's behavior and exceptions based on
+ *   parent's
+ * @cgroup: cgroup getting online
+ * returns 0 in case of success, error code otherwise
+ */
+static int devcgroup_online(struct cgroup *cgroup)
+{
+   struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL;
+   int ret = 0;
+
+   mutex_lock(_mutex);
+   dev_cgroup = cgroup_to_devcgroup(cgroup);
+   if (cgroup->parent)
+   parent_dev_cgroup = cgroup_to_devcgroup(cgroup->parent);
+
+   if (parent_dev_cgroup == NULL)
+   dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
+   else {
+   ret = dev_exceptions_copy(_cgroup->exceptions,
+ _dev_cgroup->exceptions);
+   if (!ret)
+   dev_cgroup->behavior = parent_dev_cgroup->behavior;
+   }
+   mutex_unlock(_mutex);
+
+   return ret;
+}
+
+static void devcgroup_offline(struct cgroup *cgroup)
+{
+   struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup);
+
+   mutex_lock(_mutex);
+   dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
+   mutex_unlock(_mutex);
+}
+
 /*
  * called from kernel/cgroup.c with cgroup_lock() held.
  */
 static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
 {
-   struct dev_cgroup *dev_cgroup, *parent_dev_cgroup;
+   struct dev_cgroup *dev_cgroup;
struct cgroup *parent_cgroup;
-   int ret;
 
dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
if (!dev_cgroup)
@@ -245,23 +282,9 @@ static struct cgroup_subsys_state *devcg
INIT_LIST_HEAD(_cgroup->exceptions);
INIT_LIST_HEAD(_cgroup->local.exceptions);
dev_cgroup->local.behavior = DEVCG_DEFAULT_NONE;
+   dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup->parent;
 
-   if (parent_cgroup == NULL)
-   dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
-   else {
-   parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
-   mutex_lock(_mutex);
-   ret = dev_exceptions_copy(_cgroup->exceptions,
- _dev_cgroup->exceptions);
-   dev_cgroup->behavior = parent_dev_cgroup->behavior;
-   mutex_unlock(_mutex);
-   if (ret) {
-   kfree(dev_cgroup);
-   return ERR_PTR(ret);
-   }
-   }
-
return _cgroup->css;
 }
 
@@ -635,6 +658,8 @@ struct cgroup_subsys devices_subsys = {
.can_attach = devcgroup_can_attach,
.css_alloc = devcgroup_css_alloc,
.css_free = devcgroup_css_free,
+   .css_online = devcgroup_online,
+   .css_offline = devcgroup_offline,
.subsys_id = devices_subsys_id,
.base_cftypes = dev_cgroup_files,
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 6/9] devcg: use css_online and css_offline

2013-01-30 Thread aris

Allocate resources and change behavior only when online. This is needed in
order to determine if a node is suitable for hierarchy propagation or if it's
being removed.

Locking:
Both functions take devcgroup_mutex to make changes to device_cgroup structure.
Hierarchy propagation will also take devcgroup_mutex before walking the
tree while walking the tree itself is protected by rcu lock.

Acked-by: Tejun Heo t...@kernel.org
Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   59 +--
 1 file changed, 42 insertions(+), 17 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-30 09:00:09.435351867 
-0500
+++ github/security/device_cgroup.c 2013-01-30 09:09:12.572464122 -0500
@@ -230,14 +230,51 @@ static void dev_exception_clean(struct d
__dev_exception_clean(dev_cgroup);
 }
 
+/**
+ * devcgroup_online - initializes devcgroup's behavior and exceptions based on
+ *   parent's
+ * @cgroup: cgroup getting online
+ * returns 0 in case of success, error code otherwise
+ */
+static int devcgroup_online(struct cgroup *cgroup)
+{
+   struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL;
+   int ret = 0;
+
+   mutex_lock(devcgroup_mutex);
+   dev_cgroup = cgroup_to_devcgroup(cgroup);
+   if (cgroup-parent)
+   parent_dev_cgroup = cgroup_to_devcgroup(cgroup-parent);
+
+   if (parent_dev_cgroup == NULL)
+   dev_cgroup-behavior = DEVCG_DEFAULT_ALLOW;
+   else {
+   ret = dev_exceptions_copy(dev_cgroup-exceptions,
+ parent_dev_cgroup-exceptions);
+   if (!ret)
+   dev_cgroup-behavior = parent_dev_cgroup-behavior;
+   }
+   mutex_unlock(devcgroup_mutex);
+
+   return ret;
+}
+
+static void devcgroup_offline(struct cgroup *cgroup)
+{
+   struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup);
+
+   mutex_lock(devcgroup_mutex);
+   dev_cgroup-behavior = DEVCG_DEFAULT_NONE;
+   mutex_unlock(devcgroup_mutex);
+}
+
 /*
  * called from kernel/cgroup.c with cgroup_lock() held.
  */
 static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
 {
-   struct dev_cgroup *dev_cgroup, *parent_dev_cgroup;
+   struct dev_cgroup *dev_cgroup;
struct cgroup *parent_cgroup;
-   int ret;
 
dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
if (!dev_cgroup)
@@ -245,23 +282,9 @@ static struct cgroup_subsys_state *devcg
INIT_LIST_HEAD(dev_cgroup-exceptions);
INIT_LIST_HEAD(dev_cgroup-local.exceptions);
dev_cgroup-local.behavior = DEVCG_DEFAULT_NONE;
+   dev_cgroup-behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup-parent;
 
-   if (parent_cgroup == NULL)
-   dev_cgroup-behavior = DEVCG_DEFAULT_ALLOW;
-   else {
-   parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
-   mutex_lock(devcgroup_mutex);
-   ret = dev_exceptions_copy(dev_cgroup-exceptions,
- parent_dev_cgroup-exceptions);
-   dev_cgroup-behavior = parent_dev_cgroup-behavior;
-   mutex_unlock(devcgroup_mutex);
-   if (ret) {
-   kfree(dev_cgroup);
-   return ERR_PTR(ret);
-   }
-   }
-
return dev_cgroup-css;
 }
 
@@ -635,6 +658,8 @@ struct cgroup_subsys devices_subsys = {
.can_attach = devcgroup_can_attach,
.css_alloc = devcgroup_css_alloc,
.css_free = devcgroup_css_free,
+   .css_online = devcgroup_online,
+   .css_offline = devcgroup_offline,
.subsys_id = devices_subsys_id,
.base_cftypes = dev_cgroup_files,
 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 9/9] devcg: propagate local changes down the hierarchy

2013-01-30 Thread aris

This patch makes all changes propagate down in hierarchy respecting when
possible local configurations.

Behavior changes will clean up exceptions in all the children except when the
parent changes the behavior from allow to deny and the child's behavior was
already deny, in which case the local exceptions will be reused. The inverse
is not possible: you can't have a parent with behavior deny and a child with
behavior accept.

New exceptions allowing additional access to devices won't be propagated, but
it'll be possible to add an exception to access all of part of the newly
allowed device(s).

New exceptions disallowing access to devices will be propagated down and the
local group's exceptions will be revalidated for the new situation.
Example:
  A
 / \
B

groupbehavior  exceptions
Aallow b 8:* rwm, c 116:1 rw
Bdeny  c 1:3 rwm, c 116:2 rwm, b 3:* rwm

If a new exception is added to group A:
# echo c 116:* r  A/devices.deny
it'll propagate down and after revalidating B's local exceptions, the exception
c 116:2 rwm will be removed.

In case parent behavior or exceptions change and local settings are not
allowed anymore, they'll be deleted.

v4:
- separated function to walk the tree and collect valid propagation targets

v3:
- update documentation
- move css_online/css_offline changes to a new patch
- use cgroup_for_each_descendant_pre() instead of own descendant walk
- move exception_copy rework to a separared patch
- move exception_clean rework to a separated patch

v2:
- instead of keeping the local settings that won't apply anymore, remove them

Acked-by: Tejun Heo t...@kernel.org
Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 Documentation/cgroups/devices.txt |   66 +
 security/device_cgroup.c  |  186 --
 2 files changed, 246 insertions(+), 6 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-30 10:03:16.943873992 
-0500
+++ github/security/device_cgroup.c 2013-01-30 10:44:23.693586209 -0500
@@ -60,6 +60,9 @@ struct dev_cgroup {
struct list_head exceptions;
enum devcg_behavior behavior;
} local;
+
+   /* temporary list for pending propagation operations */
+   struct list_head propagate_pending;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state 
*s)
@@ -241,6 +244,11 @@ static void dev_exception_clean_all(stru
__dev_exception_clean_all(dev_cgroup);
 }
 
+static inline bool is_devcg_online(const struct dev_cgroup *devcg)
+{
+   return (devcg-behavior != DEVCG_DEFAULT_NONE);
+}
+
 /**
  * devcgroup_online - initializes devcgroup's behavior and exceptions based on
  *   parent's
@@ -292,6 +300,7 @@ static struct cgroup_subsys_state *devcg
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(dev_cgroup-exceptions);
INIT_LIST_HEAD(dev_cgroup-local.exceptions);
+   INIT_LIST_HEAD(dev_cgroup-propagate_pending);
dev_cgroup-local.behavior = DEVCG_DEFAULT_NONE;
dev_cgroup-behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup-parent;
@@ -471,6 +480,155 @@ static inline int may_allow_all(struct d
return parent-behavior == DEVCG_DEFAULT_ALLOW;
 }
 
+/**
+ * revalidate_exceptions - walks through the exception list and revalidates
+ *  the exceptions based on parents' behavior and
+ *  exceptions. Called with devcgroup_mutex held.
+ * @devcg: cgroup which exceptions will be checked
+ *
+ * returns: 0 in success, -ENOMEM in case of out of memory
+ *
+ * This is one of the two key functions for hierarchy implementation.
+ * This function is responsible for re-evaluating all the cgroup's locally
+ * set exceptions due to a parent's behavior or exception change.
+ * Refer to Documentation/cgroups/devices.txt for more details.
+ */
+static int revalidate_exceptions(struct dev_cgroup *devcg)
+{
+   struct dev_exception_item *ex;
+   struct list_head *this, *tmp;
+
+   list_for_each_safe(this, tmp, devcg-local.exceptions) {
+   ex = container_of(this, struct dev_exception_item, list);
+   if (parent_has_perm(devcg, ex)) {
+   if (dev_exception_copy(devcg-exceptions, ex))
+   goto error;
+   } else
+   __dev_exception_rm(devcg-local.exceptions, ex);
+   }
+   return 0;
+
+error:
+   dev_exception_clean(devcg-exceptions);
+   return -ENOMEM;
+}
+
+/**
+ * get_online_devcg - walks the cgroup tree and fills a list with the online
+ *   groups
+ * @root: cgroup used as starting point
+ * @online: list that will be filled with online groups
+ *
+ * Must be called with devcgroup_mutex held. Grabs RCU lock.
+ * Because

[PATCH v4 0/9] devcg: introduce proper hierarchy support

2013-01-30 Thread aris

This patchset implements device cgroup hierarchy. Behaviors and exceptions
will be propagated down in the tree and local preferences will be re-evaluated
everytime a change in its parent occours, reapplying them if it's still
possible.

git://github.com/aristeu/linux-2.6.git
branch: devcg_hierarchy_review

v4:
- minor fixes pointed by Tejun

v3:
- update documentation
- move css_online/css_offline changes to a new patch
- use cgroup_for_each_descendant_pre() instead of own descendant walk
- move exception_copy rework to a separared patch
- move exception_clean rework to a separated patch
- new patch to just move dev_exception_rm() before dev_exception_add()
  as requested by Tejun.
- updated patch description for may_access() changes
- new patch to expand the may_access() logic before changing it
- fixed argument description order in may_access()

v2:
- rebase on top device_cgroup: don't grab mutex in rcu callback
- in case parent changes behavior or exceptions and the local exceptions won't
  apply anymore, remove them instead of keeping them around.

Cc: Tejun Heo t...@kernel.org 
  
Cc: Serge Hallyn serge.hal...@canonical.com   

Signed-off-by: Aristeu Rozanski a...@redhat.com   


-- 
Aristeu
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 4/9] devcg: expand may_access() logic

2013-01-30 Thread aris

In order to make the next patch more clear, expand may_access() logic.

v2: may_access() returns bool now

Acked-by: Tejun Heo t...@kernel.org
Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-30 08:56:29.532063723 
-0500
+++ github/security/device_cgroup.c 2013-01-30 08:58:02.934460404 -0500
@@ -355,8 +355,8 @@ return 0;
  * @dev_cgroup: dev cgroup to be tested against
  * @refex: new exception
  */
-static int may_access(struct dev_cgroup *dev_cgroup,
- struct dev_exception_item *refex)
+static bool may_access(struct dev_cgroup *dev_cgroup,
+  struct dev_exception_item *refex)
 {
struct dev_exception_item *ex;
bool match = false;
@@ -382,16 +382,19 @@   if (ex-minor != ~0  ex-minor != re
 
/*
 * In two cases we'll consider this new exception valid:
-* - the dev cgroup has its default policy to allow + exception list:
-*   the new exception should *not* match any of the exceptions
-*   (behavior == DEVCG_DEFAULT_ALLOW, !match)
 * - the dev cgroup has its default policy to deny + exception list:
 *   the new exception *should* match the exceptions
-*   (behavior == DEVCG_DEFAULT_DENY, match)
+* - the dev cgroup has its default policy to allow + exception list:
+*   the new exception should *not* match any of the exceptions
 */
-   if ((dev_cgroup-behavior == DEVCG_DEFAULT_DENY) == match)
-   return 1;
-   return 0;
+   if (dev_cgroup-behavior == DEVCG_DEFAULT_DENY) {
+   if (match)
+   return true;
+   } else {
+   if (!match)
+   return true;
+   }
+   return false;
 }
 
 /*

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 5/9] devcg: prepare may_access() for hierarchy support

2013-01-30 Thread aris

Currently may_access() is only able to verify if an exception is valid for the
current cgroup, which has the same behavior. With hierarchy, it'll be also used
to verify if a cgroup local exception is valid towards its cgroup parent, which
might have different behavior.

v2:
- updated patch description
- rebased on top of a new patch to expand the may_access() logic to make it
  more clear
- fixed argument description order in may_access()

Acked-by: Tejun Heo t...@kernel.org
Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   38 --
 1 file changed, 24 insertions(+), 14 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-30 08:58:02.0 
-0500
+++ github/security/device_cgroup.c 2013-01-30 09:00:09.435351867 -0500
@@ -354,9 +354,11 @@return 0;
  * verify if a certain access is allowed.
  * @dev_cgroup: dev cgroup to be tested against
  * @refex: new exception
+ * @behavior: behavior of the exception
  */
 static bool may_access(struct dev_cgroup *dev_cgroup,
-  struct dev_exception_item *refex)
+  struct dev_exception_item *refex,
+  enum devcg_behavior behavior)
 {
struct dev_exception_item *ex;
bool match = false;
@@ -380,19 +382,27 @@   if (ex-minor != ~0  ex-minor != re
break;
}
 
-   /*
-* In two cases we'll consider this new exception valid:
-* - the dev cgroup has its default policy to deny + exception list:
-*   the new exception *should* match the exceptions
-* - the dev cgroup has its default policy to allow + exception list:
-*   the new exception should *not* match any of the exceptions
-*/
-   if (dev_cgroup-behavior == DEVCG_DEFAULT_DENY) {
-   if (match)
+   if (dev_cgroup-behavior == DEVCG_DEFAULT_ALLOW) {
+   if (behavior == DEVCG_DEFAULT_ALLOW) {
+   /* the exception will deny access to certain devices */
return true;
+   } else {
+   /* the exception will allow access to certain devices */
+   if (match)
+   /*
+* a new exception allowing access shouldn't
+* match an parent's exception
+*/
+   return false;
+   return true;
+   }
} else {
-   if (!match)
+   /* only behavior == DEVCG_DEFAULT_DENY allowed here */
+   if (match)
+   /* parent has an exception that matches the proposed */
return true;
+   else
+   return false;
}
return false;
 }
@@ -411,7 +421,7 @@ static int parent_has_perm(struct dev_cg
if (!pcg)
return 1;
parent = cgroup_to_devcgroup(pcg);
-   return may_access(parent, ex);
+   return may_access(parent, ex, childcg-behavior);
 }
 
 /**
@@ -445,7 +455,7 @@ static int devcgroup_update_access(struc
 {
const char *b;
char temp[12];  /* 11 + 1 characters needed for a u32 */
-   int count, rc;
+   int count, rc = 0;
struct dev_exception_item ex;
struct cgroup *p = devcgroup-css.cgroup;
struct dev_cgroup *parent = NULL;
@@ -663,7 +673,7 @@ memset(ex, 0, sizeof(ex));
 
rcu_read_lock();
dev_cgroup = task_devcgroup(current);
-   rc = may_access(dev_cgroup, ex);
+   rc = may_access(dev_cgroup, ex, dev_cgroup-behavior);
rcu_read_unlock();
 
if (!rc)

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 7/9] devcg: split single exception copy from dev_exceptions_copy()

2013-01-30 Thread aris

This patch is in preparation for hierarchy support

This patch doesn't introduce any functional changes.

Acked-by: Tejun Heo t...@kernel.org
Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:16.076677425 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:16.374681863 -0500
@@ -89,20 +89,30 @@ static int devcgroup_can_attach(struct c
return 0;
 }
 
+static int dev_exception_copy(struct list_head *dest,
+ struct dev_exception_item *ex)
+{
+   struct dev_exception_item *new;
+
+   new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
+   if (!new)
+   return -ENOMEM;
+   list_add_tail(new-list, dest);
+   return 0;
+}
+
 /*
  * called under devcgroup_mutex
  */
 static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
 {
-   struct dev_exception_item *ex, *tmp, *new;
+   struct dev_exception_item *ex, *tmp;
 
lockdep_assert_held(devcgroup_mutex);
 
list_for_each_entry(ex, orig, list) {
-   new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
-   if (!new)
+   if (dev_exception_copy(dest, ex))
goto free_and_exit;
-   list_add_tail(new-list, dest);
}
 
return 0;

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 1/9] device_cgroup: prepare exception list handling functions for two lists

2013-01-30 Thread aris

In the following patches, device_cgroup structure will have two sets of
behavior and exceptions list (actual one, another with the local settings)
so rework the functions to use exception list, not a device_cgroup.

Acked-by: Tejun Heo t...@kernel.org
Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:48:50.603298122 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:14.739657516 -0500
@@ -104,7 +104,7 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct dev_cgroup *dev_cgroup,
+static int dev_exception_add(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
struct dev_exception_item *excopy, *walk;
@@ -115,7 +115,7 @@ static int dev_exception_add(struct dev_
if (!excopy)
return -ENOMEM;
 
-   list_for_each_entry(walk, dev_cgroup-exceptions, list) {
+   list_for_each_entry(walk, exceptions, list) {
if (walk-type != ex-type)
continue;
if (walk-major != ex-major)
@@ -129,21 +129,21 @@ static int dev_exception_add(struct dev_
}
 
if (excopy != NULL)
-   list_add_tail_rcu(excopy-list, dev_cgroup-exceptions);
+   list_add_tail_rcu(excopy-list, exceptions);
return 0;
 }
 
 /*
  * called under devcgroup_mutex
  */
-static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
+static void dev_exception_rm(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
struct dev_exception_item *walk, *tmp;
 
lockdep_assert_held(devcgroup_mutex);
 
-   list_for_each_entry_safe(walk, tmp, dev_cgroup-exceptions, list) {
+   list_for_each_entry_safe(walk, tmp, exceptions, list) {
if (walk-type != ex-type)
continue;
if (walk-major != ex-major)
@@ -514,10 +514,10 @@   case '\0':
 * don't want to break compatibility
 */
if (devcgroup-behavior == DEVCG_DEFAULT_ALLOW) {
-   dev_exception_rm(devcgroup, ex);
+   dev_exception_rm(devcgroup-exceptions, ex);
return 0;
}
-   return dev_exception_add(devcgroup, ex);
+   return dev_exception_add(devcgroup-exceptions, ex);
case DEVCG_DENY:
/*
 * If the default policy is to deny by default, try to remove
@@ -525,10 +525,10 @@   return 0;
 * don't want to break compatibility
 */
if (devcgroup-behavior == DEVCG_DEFAULT_DENY) {
-   dev_exception_rm(devcgroup, ex);
+   dev_exception_rm(devcgroup-exceptions, ex);
return 0;
}
-   return dev_exception_add(devcgroup, ex);
+   return dev_exception_add(devcgroup-exceptions, ex);
default:
return -EINVAL;
}

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 8/9] devcg: refactor dev_exception_clean()

2013-01-30 Thread aris

This patch is in preparation for hierarchy support.

This patch doesn't introduce any functional changes.

Acked-by: Tejun Heo t...@kernel.org
Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   34 --
 1 file changed, 16 insertions(+), 18 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:16.374681863 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:16.653686016 -0500
@@ -212,32 +212,33 @@ static int dev_exception_add(struct dev_
return rc;
 }
 
-static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
+static void dev_exception_clean(struct list_head *exceptions)
 {
struct dev_exception_item *ex, *tmp;
 
-   list_for_each_entry_safe(ex, tmp, dev_cgroup-exceptions, list) {
-   list_del_rcu(ex-list);
-   kfree_rcu(ex, rcu);
-   }
-   list_for_each_entry_safe(ex, tmp, dev_cgroup-local.exceptions,
-list) {
+   list_for_each_entry_safe(ex, tmp, exceptions, list) {
list_del_rcu(ex-list);
kfree_rcu(ex, rcu);
}
 }
 
+static void __dev_exception_clean_all(struct dev_cgroup *dev_cgroup)
+{
+   dev_exception_clean(dev_cgroup-exceptions);
+   dev_exception_clean(dev_cgroup-local.exceptions);
+}
+
 /**
- * dev_exception_clean - frees all entries of the exception list
+ * dev_exception_clean_all - frees all entries of the exception list
  * @dev_cgroup: dev_cgroup with the exception list to be cleaned
  *
  * called under devcgroup_mutex
  */
-static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
+static void dev_exception_clean_all(struct dev_cgroup *dev_cgroup)
 {
lockdep_assert_held(devcgroup_mutex);
 
-   __dev_exception_clean(dev_cgroup);
+   __dev_exception_clean_all(dev_cgroup);
 }
 
 /**
@@ -303,7 +304,7 @@ static void devcgroup_css_free(struct cg
struct dev_cgroup *dev_cgroup;
 
dev_cgroup = cgroup_to_devcgroup(cgroup);
-   __dev_exception_clean(dev_cgroup);
+   __dev_exception_clean_all(dev_cgroup);
kfree(dev_cgroup);
 }
 
@@ -508,25 +509,22 @@   memset(ex, 0, sizeof(ex));
case DEVCG_ALLOW:
if (!may_allow_all(parent))
return -EPERM;
-   dev_exception_clean(devcgroup);
+   dev_exception_clean_all(devcgroup);
if (parent)
rc = dev_exceptions_copy(devcgroup-exceptions,
 parent-exceptions);
devcgroup-behavior = DEVCG_DEFAULT_ALLOW;
devcgroup-local.behavior = DEVCG_DEFAULT_ALLOW;
-
-   if (rc)
-   return rc;
break;
case DEVCG_DENY:
-   dev_exception_clean(devcgroup);
+   dev_exception_clean_all(devcgroup);
devcgroup-behavior = DEVCG_DEFAULT_DENY;
devcgroup-local.behavior = DEVCG_DEFAULT_DENY;
break;
default:
-   return -EINVAL;
+   rc = -EINVAL;
}
-   return 0;
+   return rc;
case 'b':
ex.type = DEV_BLOCK;
break;

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 3/9] device_cgroup: keep track of local group settings

2013-01-30 Thread aris

In preparation for better hierarchy support, it's needed to retain the local
settings in order to try to reapply them after a propagated change if they're
still valid.

v2: split this patch in two, one to just move dev_exception_rm() before
dev_exception_add() while keeping functional changes in this patch as
requested by Tejun.

Acked-by: Tejun Heo t...@kernel.org
Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com
 
---
 security/device_cgroup.c |   83 +--
 1 file changed, 67 insertions(+), 16 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:14.987661210 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:15.244665037 -0500
@@ -39,13 +39,27 @@ struct dev_exception_item {
struct rcu_head rcu;
 };
 
+enum devcg_behavior {
+   DEVCG_DEFAULT_NONE,
+   DEVCG_DEFAULT_ALLOW,
+   DEVCG_DEFAULT_DENY,
+};
+
 struct dev_cgroup {
struct cgroup_subsys_state css;
+
+   /* result of merging the parent's rules with local ones */
struct list_head exceptions;
-   enum {
-   DEVCG_DEFAULT_ALLOW,
-   DEVCG_DEFAULT_DENY,
-   } behavior;
+   enum devcg_behavior behavior;
+
+   /*
+* local set rules, saved so when a parent propagates new rules, the
+* local preferences can be preserved
+*/
+   struct {
+   struct list_head exceptions;
+   enum devcg_behavior behavior;
+   } local;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state 
*s)
@@ -104,8 +118,8 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static void dev_exception_rm(struct list_head *exceptions,
-struct dev_exception_item *ex)
+static void __dev_exception_rm(struct list_head *exceptions,
+  struct dev_exception_item *ex)
 {
struct dev_exception_item *walk, *tmp;
 
@@ -127,11 +141,18 @@ static void dev_exception_rm(struct list
}
 }
 
+static void dev_exception_rm(struct dev_cgroup *devcgroup,
+struct dev_exception_item *ex)
+{
+   __dev_exception_rm(devcgroup-local.exceptions, ex);
+   __dev_exception_rm(devcgroup-exceptions, ex);
+}
+
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct list_head *exceptions,
-struct dev_exception_item *ex)
+static int __dev_exception_add(struct list_head *exceptions,
+  struct dev_exception_item *ex)
 {
struct dev_exception_item *excopy, *walk;
 
@@ -159,6 +180,28 @@ static int dev_exception_add(struct list
return 0;
 }
 
+static int dev_exception_add(struct dev_cgroup *devcgroup,
+struct dev_exception_item *ex)
+{
+   int rc;
+
+   lockdep_assert_held(devcgroup_mutex);
+
+   /*
+* we add to the local list so we can preserve local preferences if
+* the parent propagates down new rules
+*/
+   rc = __dev_exception_add(devcgroup-local.exceptions, ex);
+   if (rc)
+   return rc;
+
+   rc = __dev_exception_add(devcgroup-exceptions, ex);
+   if (rc)
+   __dev_exception_rm(devcgroup-local.exceptions, ex);
+
+   return rc;
+}
+
 static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
 {
struct dev_exception_item *ex, *tmp;
@@ -167,6 +210,11 @@ static void __dev_exception_clean(struct
list_del_rcu(ex-list);
kfree_rcu(ex, rcu);
}
+   list_for_each_entry_safe(ex, tmp, dev_cgroup-local.exceptions,
+list) {
+   list_del_rcu(ex-list);
+   kfree_rcu(ex, rcu);
+   }
 }
 
 /**
@@ -195,6 +243,8 @@ static struct cgroup_subsys_state *devcg
if (!dev_cgroup)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(dev_cgroup-exceptions);
+   INIT_LIST_HEAD(dev_cgroup-local.exceptions);
+   dev_cgroup-local.behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup-parent;
 
if (parent_cgroup == NULL)
@@ -413,18 +463,19 @@   memset(ex, 0, sizeof(ex));
if (!may_allow_all(parent))
return -EPERM;
dev_exception_clean(devcgroup);
+   if (parent)
+   rc = dev_exceptions_copy(devcgroup-exceptions,
+parent-exceptions);
devcgroup-behavior = DEVCG_DEFAULT_ALLOW;
-   if (!parent)
-   break;
+   devcgroup-local.behavior = DEVCG_DEFAULT_ALLOW;
 
-   rc = dev_exceptions_copy(devcgroup-exceptions,
-parent-exceptions);

[PATCH v4 2/9] devcg: reorder device exception functions

2013-01-30 Thread aris

In preparation for the next patch, reorder dev_exception_add() and
dev_exception_rm().

This patch doesn't introduce any functional changes.

Acked-by: Tejun Heo t...@kernel.org
Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   44 ++--
 1 file changed, 22 insertions(+), 22 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:14.739657516 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:14.987661210 -0500
@@ -104,18 +104,14 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct list_head *exceptions,
+static void dev_exception_rm(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
-   struct dev_exception_item *excopy, *walk;
+   struct dev_exception_item *walk, *tmp;
 
lockdep_assert_held(devcgroup_mutex);
 
-   excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
-   if (!excopy)
-   return -ENOMEM;
-
-   list_for_each_entry(walk, exceptions, list) {
+   list_for_each_entry_safe(walk, tmp, exceptions, list) {
if (walk-type != ex-type)
continue;
if (walk-major != ex-major)
@@ -123,27 +119,29 @@ static int dev_exception_add(struct list
if (walk-minor != ex-minor)
continue;
 
-   walk-access |= ex-access;
-   kfree(excopy);
-   excopy = NULL;
+   walk-access = ~ex-access;
+   if (!walk-access) {
+   list_del_rcu(walk-list);
+   kfree_rcu(walk, rcu);
+   }
}
-
-   if (excopy != NULL)
-   list_add_tail_rcu(excopy-list, exceptions);
-   return 0;
 }
 
 /*
  * called under devcgroup_mutex
  */
-static void dev_exception_rm(struct list_head *exceptions,
+static int dev_exception_add(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
-   struct dev_exception_item *walk, *tmp;
+   struct dev_exception_item *excopy, *walk;
 
lockdep_assert_held(devcgroup_mutex);
 
-   list_for_each_entry_safe(walk, tmp, exceptions, list) {
+   excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
+   if (!excopy)
+   return -ENOMEM;
+
+   list_for_each_entry(walk, exceptions, list) {
if (walk-type != ex-type)
continue;
if (walk-major != ex-major)
@@ -151,12 +149,14 @@ static void dev_exception_rm(struct list
if (walk-minor != ex-minor)
continue;
 
-   walk-access = ~ex-access;
-   if (!walk-access) {
-   list_del_rcu(walk-list);
-   kfree_rcu(walk, rcu);
-   }
+   walk-access |= ex-access;
+   kfree(excopy);
+   excopy = NULL;
}
+
+   if (excopy != NULL)
+   list_add_tail_rcu(excopy-list, exceptions);
+   return 0;
 }
 
 static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 9/9] devcg: propagate local changes down the hierarchy

2013-01-29 Thread aris

This patch makes all changes propagate down in hierarchy respecting when
possible local configurations.

Behavior changes will clean up exceptions in all the children except when the
parent changes the behavior from allow to deny and the child's behavior was
already deny, in which case the local exceptions will be reused. The inverse
is not possible: you can't have a parent with behavior deny and a child with
behavior accept.

New exceptions allowing additional access to devices won't be propagated, but
it'll be possible to add an exception to access all of part of the newly
allowed device(s).

New exceptions disallowing access to devices will be propagated down and the
local group's exceptions will be revalidated for the new situation.
Example:
  A
 / \
B

groupbehavior  exceptions
Aallow "b 8:* rwm", "c 116:1 rw"
Bdeny  "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm"

If a new exception is added to group A:
# echo "c 116:* r" > A/devices.deny
it'll propagate down and after revalidating B's local exceptions, the exception
"c 116:2 rwm" will be removed.

In case parent behavior or exceptions change and local settings are not
allowed anymore, they'll be deleted.

v3:
- update documentation
- move css_online/css_offline changes to a new patch
- use cgroup_for_each_descendant_pre() instead of own descendant walk
- move exception_copy rework to a separared patch
- move exception_clean rework to a separated patch

v2:
- instead of keeping the local settings that won't apply anymore, remove them


Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 Documentation/cgroups/devices.txt |   66 ++
 security/device_cgroup.c  |  170 --
 2 files changed, 230 insertions(+), 6 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:16.653686016 
-0500
+++ github/security/device_cgroup.c 2013-01-29 13:58:07.537099962 -0500
@@ -60,6 +60,9 @@ struct dev_cgroup {
struct list_head exceptions;
enum devcg_behavior behavior;
} local;
+
+   /* temporary list for pending propagation operations */
+   struct list_head propagate_pending;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state 
*s)
@@ -241,6 +244,11 @@ static void dev_exception_clean_all(stru
__dev_exception_clean_all(dev_cgroup);
 }
 
+static inline bool is_devcg_online(const struct dev_cgroup *devcg)
+{
+   return (devcg->behavior != DEVCG_DEFAULT_NONE);
+}
+
 /**
  * devcgroup_online - initializes devcgroup's behavior and exceptions based on
  *   parent's
@@ -292,6 +300,7 @@ static struct cgroup_subsys_state *devcg
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(_cgroup->exceptions);
INIT_LIST_HEAD(_cgroup->local.exceptions);
+   INIT_LIST_HEAD(_cgroup->propagate_pending);
dev_cgroup->local.behavior = DEVCG_DEFAULT_NONE;
dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup->parent;
@@ -471,6 +480,139 @@ static inline int may_allow_all(struct d
return parent->behavior == DEVCG_DEFAULT_ALLOW;
 }
 
+/**
+ * revalidate_exceptions - walks through the exception list and revalidates
+ *  the exceptions based on parents' behavior and
+ *  exceptions. Called with devcgroup_mutex held.
+ * @devcg: cgroup which exceptions will be checked
+ *
+ * returns: 0 in success, -ENOMEM in case of out of memory
+ *
+ * This is one of the two key functions for hierarchy implementation.
+ * This function is responsible for re-evaluating all the cgroup's locally
+ * set exceptions due to a parent's behavior or exception change.
+ * Refer to Documentation/cgroups/devices.txt for more details.
+ */
+static int revalidate_exceptions(struct dev_cgroup *devcg)
+{
+   struct dev_exception_item *ex;
+   struct list_head *this, *tmp;
+
+   list_for_each_safe(this, tmp, >local.exceptions) {
+   ex = container_of(this, struct dev_exception_item, list);
+   if (parent_has_perm(devcg, ex)) {
+   if (dev_exception_copy(>exceptions, ex))
+   goto error;
+   } else
+   __dev_exception_rm(>local.exceptions, ex);
+   }
+   return 0;
+
+error:
+   dev_exception_clean(>exceptions);
+   return -ENOMEM;
+}
+
+/**
+ * propagate_behavior - propagates a change in the behavior down in hierarchy
+ * @devcg_root: device cgroup that changed behavior
+ *
+ * returns: 0 in case of success, != 0 in case of error
+ *
+ * This is one of the two key functions for hierarchy implementation.
+ * All cgroup's children recursively will have the behavior changed and
+ * exceptions copied from the parent then its local behavior and exceptions
+ * re-evaluated and applied if they're still

[PATCH v3 2/9] devcg: reorder device exception functions

2013-01-29 Thread aris

In preparation for the next patch, reorder dev_exception_add() and
dev_exception_rm().

Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   44 ++--
 1 file changed, 22 insertions(+), 22 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:14.739657516 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:14.987661210 -0500
@@ -104,18 +104,14 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct list_head *exceptions,
+static void dev_exception_rm(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
-   struct dev_exception_item *excopy, *walk;
+   struct dev_exception_item *walk, *tmp;
 
lockdep_assert_held(_mutex);
 
-   excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
-   if (!excopy)
-   return -ENOMEM;
-
-   list_for_each_entry(walk, exceptions, list) {
+   list_for_each_entry_safe(walk, tmp, exceptions, list) {
if (walk->type != ex->type)
continue;
if (walk->major != ex->major)
@@ -123,27 +119,29 @@ static int dev_exception_add(struct list
if (walk->minor != ex->minor)
continue;
 
-   walk->access |= ex->access;
-   kfree(excopy);
-   excopy = NULL;
+   walk->access &= ~ex->access;
+   if (!walk->access) {
+   list_del_rcu(>list);
+   kfree_rcu(walk, rcu);
+   }
}
-
-   if (excopy != NULL)
-   list_add_tail_rcu(>list, exceptions);
-   return 0;
 }
 
 /*
  * called under devcgroup_mutex
  */
-static void dev_exception_rm(struct list_head *exceptions,
+static int dev_exception_add(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
-   struct dev_exception_item *walk, *tmp;
+   struct dev_exception_item *excopy, *walk;
 
lockdep_assert_held(_mutex);
 
-   list_for_each_entry_safe(walk, tmp, exceptions, list) {
+   excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
+   if (!excopy)
+   return -ENOMEM;
+
+   list_for_each_entry(walk, exceptions, list) {
if (walk->type != ex->type)
continue;
if (walk->major != ex->major)
@@ -151,12 +149,14 @@ static void dev_exception_rm(struct list
if (walk->minor != ex->minor)
continue;
 
-   walk->access &= ~ex->access;
-   if (!walk->access) {
-   list_del_rcu(>list);
-   kfree_rcu(walk, rcu);
-   }
+   walk->access |= ex->access;
+   kfree(excopy);
+   excopy = NULL;
}
+
+   if (excopy != NULL)
+   list_add_tail_rcu(>list, exceptions);
+   return 0;
 }
 
 static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 8/9] devcg: refactor dev_exception_clean()

2013-01-29 Thread aris

This patch is in preparation for hierarchy support.

Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   34 --
 1 file changed, 16 insertions(+), 18 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:16.374681863 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:16.653686016 -0500
@@ -212,32 +212,33 @@ static int dev_exception_add(struct dev_
return rc;
 }
 
-static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
+static void dev_exception_clean(struct list_head *exceptions)
 {
struct dev_exception_item *ex, *tmp;
 
-   list_for_each_entry_safe(ex, tmp, _cgroup->exceptions, list) {
-   list_del_rcu(>list);
-   kfree_rcu(ex, rcu);
-   }
-   list_for_each_entry_safe(ex, tmp, _cgroup->local.exceptions,
-list) {
+   list_for_each_entry_safe(ex, tmp, exceptions, list) {
list_del_rcu(>list);
kfree_rcu(ex, rcu);
}
 }
 
+static void __dev_exception_clean_all(struct dev_cgroup *dev_cgroup)
+{
+   dev_exception_clean(_cgroup->exceptions);
+   dev_exception_clean(_cgroup->local.exceptions);
+}
+
 /**
- * dev_exception_clean - frees all entries of the exception list
+ * dev_exception_clean_all - frees all entries of the exception list
  * @dev_cgroup: dev_cgroup with the exception list to be cleaned
  *
  * called under devcgroup_mutex
  */
-static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
+static void dev_exception_clean_all(struct dev_cgroup *dev_cgroup)
 {
lockdep_assert_held(_mutex);
 
-   __dev_exception_clean(dev_cgroup);
+   __dev_exception_clean_all(dev_cgroup);
 }
 
 /**
@@ -303,7 +304,7 @@ static void devcgroup_css_free(struct cg
struct dev_cgroup *dev_cgroup;
 
dev_cgroup = cgroup_to_devcgroup(cgroup);
-   __dev_exception_clean(dev_cgroup);
+   __dev_exception_clean_all(dev_cgroup);
kfree(dev_cgroup);
 }
 
@@ -508,25 +509,22 @@   memset(, 0, sizeof(ex));
case DEVCG_ALLOW:
if (!may_allow_all(parent))
return -EPERM;
-   dev_exception_clean(devcgroup);
+   dev_exception_clean_all(devcgroup);
if (parent)
rc = dev_exceptions_copy(>exceptions,
 >exceptions);
devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
devcgroup->local.behavior = DEVCG_DEFAULT_ALLOW;
-
-   if (rc)
-   return rc;
break;
case DEVCG_DENY:
-   dev_exception_clean(devcgroup);
+   dev_exception_clean_all(devcgroup);
devcgroup->behavior = DEVCG_DEFAULT_DENY;
devcgroup->local.behavior = DEVCG_DEFAULT_DENY;
break;
default:
-   return -EINVAL;
+   rc = -EINVAL;
}
-   return 0;
+   return rc;
case 'b':
ex.type = DEV_BLOCK;
break;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 1/9] device_cgroup: prepare exception list handling functions for two lists

2013-01-29 Thread aris

In the following patches, device_cgroup structure will have two sets of
behavior and exceptions list (actual one, another with the local settings)
so rework the functions to use exception list, not a device_cgroup.

Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:48:50.603298122 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:14.739657516 -0500
@@ -104,7 +104,7 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct dev_cgroup *dev_cgroup,
+static int dev_exception_add(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
struct dev_exception_item *excopy, *walk;
@@ -115,7 +115,7 @@ static int dev_exception_add(struct dev_
if (!excopy)
return -ENOMEM;
 
-   list_for_each_entry(walk, _cgroup->exceptions, list) {
+   list_for_each_entry(walk, exceptions, list) {
if (walk->type != ex->type)
continue;
if (walk->major != ex->major)
@@ -129,21 +129,21 @@ static int dev_exception_add(struct dev_
}
 
if (excopy != NULL)
-   list_add_tail_rcu(>list, _cgroup->exceptions);
+   list_add_tail_rcu(>list, exceptions);
return 0;
 }
 
 /*
  * called under devcgroup_mutex
  */
-static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
+static void dev_exception_rm(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
struct dev_exception_item *walk, *tmp;
 
lockdep_assert_held(_mutex);
 
-   list_for_each_entry_safe(walk, tmp, _cgroup->exceptions, list) {
+   list_for_each_entry_safe(walk, tmp, exceptions, list) {
if (walk->type != ex->type)
continue;
if (walk->major != ex->major)
@@ -514,10 +514,10 @@   case '\0':
 * don't want to break compatibility
 */
if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
-   dev_exception_rm(devcgroup, );
+   dev_exception_rm(>exceptions, );
return 0;
}
-   return dev_exception_add(devcgroup, );
+   return dev_exception_add(>exceptions, );
case DEVCG_DENY:
/*
 * If the default policy is to deny by default, try to remove
@@ -525,10 +525,10 @@   return 0;
 * don't want to break compatibility
 */
if (devcgroup->behavior == DEVCG_DEFAULT_DENY) {
-   dev_exception_rm(devcgroup, );
+   dev_exception_rm(>exceptions, );
return 0;
}
-   return dev_exception_add(devcgroup, );
+   return dev_exception_add(>exceptions, );
default:
return -EINVAL;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 3/9] device_cgroup: keep track of local group settings

2013-01-29 Thread aris

In preparation for better hierarchy support, it's needed to retain the local
settings in order to try to reapply them after a propagated change if they're
still valid.

v2: split this patch in two, one to just move dev_exception_rm() before
dev_exception_add() while keeping functional changes in this patch as
requested by Tejun.

Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 
 
---
 security/device_cgroup.c |   83 +--
 1 file changed, 67 insertions(+), 16 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:14.987661210 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:15.244665037 -0500
@@ -39,13 +39,27 @@ struct dev_exception_item {
struct rcu_head rcu;
 };
 
+enum devcg_behavior {
+   DEVCG_DEFAULT_NONE,
+   DEVCG_DEFAULT_ALLOW,
+   DEVCG_DEFAULT_DENY,
+};
+
 struct dev_cgroup {
struct cgroup_subsys_state css;
+
+   /* result of merging the parent's rules with local ones */
struct list_head exceptions;
-   enum {
-   DEVCG_DEFAULT_ALLOW,
-   DEVCG_DEFAULT_DENY,
-   } behavior;
+   enum devcg_behavior behavior;
+
+   /*
+* local set rules, saved so when a parent propagates new rules, the
+* local preferences can be preserved
+*/
+   struct {
+   struct list_head exceptions;
+   enum devcg_behavior behavior;
+   } local;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state 
*s)
@@ -104,8 +118,8 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static void dev_exception_rm(struct list_head *exceptions,
-struct dev_exception_item *ex)
+static void __dev_exception_rm(struct list_head *exceptions,
+  struct dev_exception_item *ex)
 {
struct dev_exception_item *walk, *tmp;
 
@@ -127,11 +141,18 @@ static void dev_exception_rm(struct list
}
 }
 
+static void dev_exception_rm(struct dev_cgroup *devcgroup,
+struct dev_exception_item *ex)
+{
+   __dev_exception_rm(>local.exceptions, ex);
+   __dev_exception_rm(>exceptions, ex);
+}
+
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct list_head *exceptions,
-struct dev_exception_item *ex)
+static int __dev_exception_add(struct list_head *exceptions,
+  struct dev_exception_item *ex)
 {
struct dev_exception_item *excopy, *walk;
 
@@ -159,6 +180,28 @@ static int dev_exception_add(struct list
return 0;
 }
 
+static int dev_exception_add(struct dev_cgroup *devcgroup,
+struct dev_exception_item *ex)
+{
+   int rc;
+
+   lockdep_assert_held(_mutex);
+
+   /*
+* we add to the local list so we can preserve local preferences if
+* the parent propagates down new rules
+*/
+   rc = __dev_exception_add(>local.exceptions, ex);
+   if (rc)
+   return rc;
+
+   rc = __dev_exception_add(>exceptions, ex);
+   if (rc)
+   __dev_exception_rm(>local.exceptions, ex);
+
+   return rc;
+}
+
 static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
 {
struct dev_exception_item *ex, *tmp;
@@ -167,6 +210,11 @@ static void __dev_exception_clean(struct
list_del_rcu(>list);
kfree_rcu(ex, rcu);
}
+   list_for_each_entry_safe(ex, tmp, _cgroup->local.exceptions,
+list) {
+   list_del_rcu(>list);
+   kfree_rcu(ex, rcu);
+   }
 }
 
 /**
@@ -195,6 +243,8 @@ static struct cgroup_subsys_state *devcg
if (!dev_cgroup)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(_cgroup->exceptions);
+   INIT_LIST_HEAD(_cgroup->local.exceptions);
+   dev_cgroup->local.behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup->parent;
 
if (parent_cgroup == NULL)
@@ -413,18 +463,19 @@   memset(, 0, sizeof(ex));
if (!may_allow_all(parent))
return -EPERM;
dev_exception_clean(devcgroup);
+   if (parent)
+   rc = dev_exceptions_copy(>exceptions,
+>exceptions);
devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
-   if (!parent)
-   break;
+   devcgroup->local.behavior = DEVCG_DEFAULT_ALLOW;
 
-   rc = dev_exceptions_copy(>exceptions,
->exceptions);
if (rc)
return rc;
break;
case DEVCG_DENY:
dev_exception_clean(devcgroup);

[PATCH v3 0/9] devcg: introduce proper hierarchy support

2013-01-29 Thread aris

This patchset implements device cgroup hierarchy. Behaviors and exceptions
will be propagated down in the tree and local preferences will be re-evaluated
everytime a change in its parent occours, reapplying them if it's still
possible.

git://github.com/aristeu/linux-2.6.git
branch: devcg_hierarchy_review

v3:
- update documentation
- move css_online/css_offline changes to a new patch
- use cgroup_for_each_descendant_pre() instead of own descendant walk
- move exception_copy rework to a separared patch
- move exception_clean rework to a separated patch
- new patch to just move dev_exception_rm() before dev_exception_add()
  as requested by Tejun.
- updated patch description for may_access() changes
- new patch to expand the may_access() logic before changing it
- fixed argument description order in may_access()

v2:
- rebase on top "device_cgroup: don't grab mutex in rcu callback"
- in case parent changes behavior or exceptions and the local exceptions won't
  apply anymore, remove them instead of keeping them around.

Cc: Tejun Heo  
  
Cc: Serge Hallyn

Signed-off-by: Aristeu Rozanski


-- 
Aristeu
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 7/9] devcg: split single exception copy from dev_exceptions_copy()

2013-01-29 Thread aris

This patch is in preparation for hierarchy support

Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:16.076677425 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:16.374681863 -0500
@@ -89,20 +89,30 @@ static int devcgroup_can_attach(struct c
return 0;
 }
 
+static int dev_exception_copy(struct list_head *dest,
+ struct dev_exception_item *ex)
+{
+   struct dev_exception_item *new;
+
+   new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
+   if (!new)
+   return -ENOMEM;
+   list_add_tail(>list, dest);
+   return 0;
+}
+
 /*
  * called under devcgroup_mutex
  */
 static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
 {
-   struct dev_exception_item *ex, *tmp, *new;
+   struct dev_exception_item *ex, *tmp;
 
lockdep_assert_held(_mutex);
 
list_for_each_entry(ex, orig, list) {
-   new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
-   if (!new)
+   if (dev_exception_copy(dest, ex))
goto free_and_exit;
-   list_add_tail(>list, dest);
}
 
return 0;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 6/9] devcg: use css_online and css_offline

2013-01-29 Thread aris

Allocate resources and change behavior only when online.
This patch is in preparation for hierarchy support.

Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   59 +--
 1 file changed, 42 insertions(+), 17 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:15.795673240 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:16.076677425 -0500
@@ -230,14 +230,51 @@ static void dev_exception_clean(struct d
__dev_exception_clean(dev_cgroup);
 }
 
+/**
+ * devcgroup_online - initializes devcgroup's behavior and exceptions based on
+ *   parent's
+ * @cgroup: cgroup getting online
+ * returns 0 in case of success, error code otherwise
+ */
+static int devcgroup_online(struct cgroup *cgroup)
+{
+   struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL;
+   int ret = 0;
+
+   mutex_lock(_mutex);
+   dev_cgroup = cgroup_to_devcgroup(cgroup);
+   if (cgroup->parent)
+   parent_dev_cgroup = cgroup_to_devcgroup(cgroup->parent);
+
+   if (parent_dev_cgroup == NULL)
+   dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
+   else {
+   ret = dev_exceptions_copy(_cgroup->exceptions,
+ _dev_cgroup->exceptions);
+   if (!ret)
+   dev_cgroup->behavior = parent_dev_cgroup->behavior;
+   }
+   mutex_unlock(_mutex);
+
+   return ret;
+}
+
+static void devcgroup_offline(struct cgroup *cgroup)
+{
+   struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup);
+
+   mutex_lock(_mutex);
+   dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
+   mutex_unlock(_mutex);
+}
+
 /*
  * called from kernel/cgroup.c with cgroup_lock() held.
  */
 static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
 {
-   struct dev_cgroup *dev_cgroup, *parent_dev_cgroup;
+   struct dev_cgroup *dev_cgroup;
struct cgroup *parent_cgroup;
-   int ret;
 
dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
if (!dev_cgroup)
@@ -245,23 +282,9 @@ static struct cgroup_subsys_state *devcg
INIT_LIST_HEAD(_cgroup->exceptions);
INIT_LIST_HEAD(_cgroup->local.exceptions);
dev_cgroup->local.behavior = DEVCG_DEFAULT_NONE;
+   dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup->parent;
 
-   if (parent_cgroup == NULL)
-   dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
-   else {
-   parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
-   mutex_lock(_mutex);
-   ret = dev_exceptions_copy(_cgroup->exceptions,
- _dev_cgroup->exceptions);
-   dev_cgroup->behavior = parent_dev_cgroup->behavior;
-   mutex_unlock(_mutex);
-   if (ret) {
-   kfree(dev_cgroup);
-   return ERR_PTR(ret);
-   }
-   }
-
return _cgroup->css;
 }
 
@@ -635,6 +658,8 @@ struct cgroup_subsys devices_subsys = {
.can_attach = devcgroup_can_attach,
.css_alloc = devcgroup_css_alloc,
.css_free = devcgroup_css_free,
+   .css_online = devcgroup_online,
+   .css_offline = devcgroup_offline,
.subsys_id = devices_subsys_id,
.base_cftypes = dev_cgroup_files,
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 5/9] devcg: prepare may_access() for hierarchy support

2013-01-29 Thread aris

Currently may_access() is only able to verify if an exception is valid for the
current cgroup, which has the same behavior. With hierarchy, it'll be also used
to verify if a cgroup local exception is valid towards its cgroup parent, which
might have different behavior.

v2:
- updated patch description
- rebased on top of a new patch to expand the may_access() logic to make it
  more clear
- fixed argument description order in may_access()

Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   44 +++-
 1 file changed, 27 insertions(+), 17 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:15.514669057 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:15.795673240 -0500
@@ -354,9 +354,11 @@return 0;
  * verify if a certain access is allowed.
  * @dev_cgroup: dev cgroup to be tested against
  * @refex: new exception
+ * @behavior: behavior of the exception
  */
-static int may_access(struct dev_cgroup *dev_cgroup,
- struct dev_exception_item *refex)
+static bool may_access(struct dev_cgroup *dev_cgroup,
+  struct dev_exception_item *refex,
+  enum devcg_behavior behavior)
 {
struct dev_exception_item *ex;
bool match = false;
@@ -380,19 +382,27 @@   if (ex->minor != ~0 && ex->minor != re
break;
}
 
-   /*
-* In two cases we'll consider this new exception valid:
-* - the dev cgroup has its default policy to deny + exception list:
-*   the new exception *should* match the exceptions
-* - the dev cgroup has its default policy to allow + exception list:
-*   the new exception should *not* match any of the exceptions
-*/
-   if (dev_cgroup->behavior == DEVCG_DEFAULT_DENY) {
-   if (match)
-   return 1;
+   if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
+   if (behavior == DEVCG_DEFAULT_ALLOW) {
+   /* the exception will deny access to certain devices */
+   return true;
+   } else {
+   /* the exception will allow access to certain devices */
+   if (match)
+   /*
+* a new exception allowing access shouldn't
+* match an parent's exception
+*/
+   return false;
+   return true;
+   }
} else {
-   if (!match)
-   return 1;
+   /* only behavior == DEVCG_DEFAULT_DENY allowed here */
+   if (match)
+   /* parent has an exception that matches the proposed */
+   return true;
+   else
+   return false;
}
return 0;
 }
@@ -411,7 +421,7 @@ static int parent_has_perm(struct dev_cg
if (!pcg)
return 1;
parent = cgroup_to_devcgroup(pcg);
-   return may_access(parent, ex);
+   return may_access(parent, ex, childcg->behavior);
 }
 
 /**
@@ -445,7 +455,7 @@ static int devcgroup_update_access(struc
 {
const char *b;
char temp[12];  /* 11 + 1 characters needed for a u32 */
-   int count, rc;
+   int count, rc = 0;
struct dev_exception_item ex;
struct cgroup *p = devcgroup->css.cgroup;
struct dev_cgroup *parent = NULL;
@@ -663,7 +673,7 @@ memset(, 0, sizeof(ex));
 
rcu_read_lock();
dev_cgroup = task_devcgroup(current);
-   rc = may_access(dev_cgroup, );
+   rc = may_access(dev_cgroup, , dev_cgroup->behavior);
rcu_read_unlock();
 
if (!rc)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 4/9] devcg: expand may_access() logic

2013-01-29 Thread aris

In order to make the next patch more clear, expand may_access() logic.

Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:15.244665037 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:15.514669057 -0500
@@ -382,15 +382,18 @@   if (ex->minor != ~0 && ex->minor != re
 
/*
 * In two cases we'll consider this new exception valid:
-* - the dev cgroup has its default policy to allow + exception list:
-*   the new exception should *not* match any of the exceptions
-*   (behavior == DEVCG_DEFAULT_ALLOW, !match)
 * - the dev cgroup has its default policy to deny + exception list:
 *   the new exception *should* match the exceptions
-*   (behavior == DEVCG_DEFAULT_DENY, match)
+* - the dev cgroup has its default policy to allow + exception list:
+*   the new exception should *not* match any of the exceptions
 */
-   if ((dev_cgroup->behavior == DEVCG_DEFAULT_DENY) == match)
-   return 1;
+   if (dev_cgroup->behavior == DEVCG_DEFAULT_DENY) {
+   if (match)
+   return 1;
+   } else {
+   if (!match)
+   return 1;
+   }
return 0;
 }
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 4/9] devcg: expand may_access() logic

2013-01-29 Thread aris

In order to make the next patch more clear, expand may_access() logic.

Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:15.244665037 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:15.514669057 -0500
@@ -382,15 +382,18 @@   if (ex-minor != ~0  ex-minor != re
 
/*
 * In two cases we'll consider this new exception valid:
-* - the dev cgroup has its default policy to allow + exception list:
-*   the new exception should *not* match any of the exceptions
-*   (behavior == DEVCG_DEFAULT_ALLOW, !match)
 * - the dev cgroup has its default policy to deny + exception list:
 *   the new exception *should* match the exceptions
-*   (behavior == DEVCG_DEFAULT_DENY, match)
+* - the dev cgroup has its default policy to allow + exception list:
+*   the new exception should *not* match any of the exceptions
 */
-   if ((dev_cgroup-behavior == DEVCG_DEFAULT_DENY) == match)
-   return 1;
+   if (dev_cgroup-behavior == DEVCG_DEFAULT_DENY) {
+   if (match)
+   return 1;
+   } else {
+   if (!match)
+   return 1;
+   }
return 0;
 }
 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 5/9] devcg: prepare may_access() for hierarchy support

2013-01-29 Thread aris

Currently may_access() is only able to verify if an exception is valid for the
current cgroup, which has the same behavior. With hierarchy, it'll be also used
to verify if a cgroup local exception is valid towards its cgroup parent, which
might have different behavior.

v2:
- updated patch description
- rebased on top of a new patch to expand the may_access() logic to make it
  more clear
- fixed argument description order in may_access()

Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   44 +++-
 1 file changed, 27 insertions(+), 17 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:15.514669057 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:15.795673240 -0500
@@ -354,9 +354,11 @@return 0;
  * verify if a certain access is allowed.
  * @dev_cgroup: dev cgroup to be tested against
  * @refex: new exception
+ * @behavior: behavior of the exception
  */
-static int may_access(struct dev_cgroup *dev_cgroup,
- struct dev_exception_item *refex)
+static bool may_access(struct dev_cgroup *dev_cgroup,
+  struct dev_exception_item *refex,
+  enum devcg_behavior behavior)
 {
struct dev_exception_item *ex;
bool match = false;
@@ -380,19 +382,27 @@   if (ex-minor != ~0  ex-minor != re
break;
}
 
-   /*
-* In two cases we'll consider this new exception valid:
-* - the dev cgroup has its default policy to deny + exception list:
-*   the new exception *should* match the exceptions
-* - the dev cgroup has its default policy to allow + exception list:
-*   the new exception should *not* match any of the exceptions
-*/
-   if (dev_cgroup-behavior == DEVCG_DEFAULT_DENY) {
-   if (match)
-   return 1;
+   if (dev_cgroup-behavior == DEVCG_DEFAULT_ALLOW) {
+   if (behavior == DEVCG_DEFAULT_ALLOW) {
+   /* the exception will deny access to certain devices */
+   return true;
+   } else {
+   /* the exception will allow access to certain devices */
+   if (match)
+   /*
+* a new exception allowing access shouldn't
+* match an parent's exception
+*/
+   return false;
+   return true;
+   }
} else {
-   if (!match)
-   return 1;
+   /* only behavior == DEVCG_DEFAULT_DENY allowed here */
+   if (match)
+   /* parent has an exception that matches the proposed */
+   return true;
+   else
+   return false;
}
return 0;
 }
@@ -411,7 +421,7 @@ static int parent_has_perm(struct dev_cg
if (!pcg)
return 1;
parent = cgroup_to_devcgroup(pcg);
-   return may_access(parent, ex);
+   return may_access(parent, ex, childcg-behavior);
 }
 
 /**
@@ -445,7 +455,7 @@ static int devcgroup_update_access(struc
 {
const char *b;
char temp[12];  /* 11 + 1 characters needed for a u32 */
-   int count, rc;
+   int count, rc = 0;
struct dev_exception_item ex;
struct cgroup *p = devcgroup-css.cgroup;
struct dev_cgroup *parent = NULL;
@@ -663,7 +673,7 @@ memset(ex, 0, sizeof(ex));
 
rcu_read_lock();
dev_cgroup = task_devcgroup(current);
-   rc = may_access(dev_cgroup, ex);
+   rc = may_access(dev_cgroup, ex, dev_cgroup-behavior);
rcu_read_unlock();
 
if (!rc)

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 6/9] devcg: use css_online and css_offline

2013-01-29 Thread aris

Allocate resources and change behavior only when online.
This patch is in preparation for hierarchy support.

Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   59 +--
 1 file changed, 42 insertions(+), 17 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:15.795673240 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:16.076677425 -0500
@@ -230,14 +230,51 @@ static void dev_exception_clean(struct d
__dev_exception_clean(dev_cgroup);
 }
 
+/**
+ * devcgroup_online - initializes devcgroup's behavior and exceptions based on
+ *   parent's
+ * @cgroup: cgroup getting online
+ * returns 0 in case of success, error code otherwise
+ */
+static int devcgroup_online(struct cgroup *cgroup)
+{
+   struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL;
+   int ret = 0;
+
+   mutex_lock(devcgroup_mutex);
+   dev_cgroup = cgroup_to_devcgroup(cgroup);
+   if (cgroup-parent)
+   parent_dev_cgroup = cgroup_to_devcgroup(cgroup-parent);
+
+   if (parent_dev_cgroup == NULL)
+   dev_cgroup-behavior = DEVCG_DEFAULT_ALLOW;
+   else {
+   ret = dev_exceptions_copy(dev_cgroup-exceptions,
+ parent_dev_cgroup-exceptions);
+   if (!ret)
+   dev_cgroup-behavior = parent_dev_cgroup-behavior;
+   }
+   mutex_unlock(devcgroup_mutex);
+
+   return ret;
+}
+
+static void devcgroup_offline(struct cgroup *cgroup)
+{
+   struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup);
+
+   mutex_lock(devcgroup_mutex);
+   dev_cgroup-behavior = DEVCG_DEFAULT_NONE;
+   mutex_unlock(devcgroup_mutex);
+}
+
 /*
  * called from kernel/cgroup.c with cgroup_lock() held.
  */
 static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
 {
-   struct dev_cgroup *dev_cgroup, *parent_dev_cgroup;
+   struct dev_cgroup *dev_cgroup;
struct cgroup *parent_cgroup;
-   int ret;
 
dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
if (!dev_cgroup)
@@ -245,23 +282,9 @@ static struct cgroup_subsys_state *devcg
INIT_LIST_HEAD(dev_cgroup-exceptions);
INIT_LIST_HEAD(dev_cgroup-local.exceptions);
dev_cgroup-local.behavior = DEVCG_DEFAULT_NONE;
+   dev_cgroup-behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup-parent;
 
-   if (parent_cgroup == NULL)
-   dev_cgroup-behavior = DEVCG_DEFAULT_ALLOW;
-   else {
-   parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
-   mutex_lock(devcgroup_mutex);
-   ret = dev_exceptions_copy(dev_cgroup-exceptions,
- parent_dev_cgroup-exceptions);
-   dev_cgroup-behavior = parent_dev_cgroup-behavior;
-   mutex_unlock(devcgroup_mutex);
-   if (ret) {
-   kfree(dev_cgroup);
-   return ERR_PTR(ret);
-   }
-   }
-
return dev_cgroup-css;
 }
 
@@ -635,6 +658,8 @@ struct cgroup_subsys devices_subsys = {
.can_attach = devcgroup_can_attach,
.css_alloc = devcgroup_css_alloc,
.css_free = devcgroup_css_free,
+   .css_online = devcgroup_online,
+   .css_offline = devcgroup_offline,
.subsys_id = devices_subsys_id,
.base_cftypes = dev_cgroup_files,
 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 7/9] devcg: split single exception copy from dev_exceptions_copy()

2013-01-29 Thread aris

This patch is in preparation for hierarchy support

Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:16.076677425 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:16.374681863 -0500
@@ -89,20 +89,30 @@ static int devcgroup_can_attach(struct c
return 0;
 }
 
+static int dev_exception_copy(struct list_head *dest,
+ struct dev_exception_item *ex)
+{
+   struct dev_exception_item *new;
+
+   new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
+   if (!new)
+   return -ENOMEM;
+   list_add_tail(new-list, dest);
+   return 0;
+}
+
 /*
  * called under devcgroup_mutex
  */
 static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
 {
-   struct dev_exception_item *ex, *tmp, *new;
+   struct dev_exception_item *ex, *tmp;
 
lockdep_assert_held(devcgroup_mutex);
 
list_for_each_entry(ex, orig, list) {
-   new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
-   if (!new)
+   if (dev_exception_copy(dest, ex))
goto free_and_exit;
-   list_add_tail(new-list, dest);
}
 
return 0;

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 0/9] devcg: introduce proper hierarchy support

2013-01-29 Thread aris

This patchset implements device cgroup hierarchy. Behaviors and exceptions
will be propagated down in the tree and local preferences will be re-evaluated
everytime a change in its parent occours, reapplying them if it's still
possible.

git://github.com/aristeu/linux-2.6.git
branch: devcg_hierarchy_review

v3:
- update documentation
- move css_online/css_offline changes to a new patch
- use cgroup_for_each_descendant_pre() instead of own descendant walk
- move exception_copy rework to a separared patch
- move exception_clean rework to a separated patch
- new patch to just move dev_exception_rm() before dev_exception_add()
  as requested by Tejun.
- updated patch description for may_access() changes
- new patch to expand the may_access() logic before changing it
- fixed argument description order in may_access()

v2:
- rebase on top device_cgroup: don't grab mutex in rcu callback
- in case parent changes behavior or exceptions and the local exceptions won't
  apply anymore, remove them instead of keeping them around.

Cc: Tejun Heo t...@kernel.org 
  
Cc: Serge Hallyn serge.hal...@canonical.com   

Signed-off-by: Aristeu Rozanski a...@redhat.com   


-- 
Aristeu
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 3/9] device_cgroup: keep track of local group settings

2013-01-29 Thread aris

In preparation for better hierarchy support, it's needed to retain the local
settings in order to try to reapply them after a propagated change if they're
still valid.

v2: split this patch in two, one to just move dev_exception_rm() before
dev_exception_add() while keeping functional changes in this patch as
requested by Tejun.

Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com
 
---
 security/device_cgroup.c |   83 +--
 1 file changed, 67 insertions(+), 16 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:14.987661210 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:15.244665037 -0500
@@ -39,13 +39,27 @@ struct dev_exception_item {
struct rcu_head rcu;
 };
 
+enum devcg_behavior {
+   DEVCG_DEFAULT_NONE,
+   DEVCG_DEFAULT_ALLOW,
+   DEVCG_DEFAULT_DENY,
+};
+
 struct dev_cgroup {
struct cgroup_subsys_state css;
+
+   /* result of merging the parent's rules with local ones */
struct list_head exceptions;
-   enum {
-   DEVCG_DEFAULT_ALLOW,
-   DEVCG_DEFAULT_DENY,
-   } behavior;
+   enum devcg_behavior behavior;
+
+   /*
+* local set rules, saved so when a parent propagates new rules, the
+* local preferences can be preserved
+*/
+   struct {
+   struct list_head exceptions;
+   enum devcg_behavior behavior;
+   } local;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state 
*s)
@@ -104,8 +118,8 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static void dev_exception_rm(struct list_head *exceptions,
-struct dev_exception_item *ex)
+static void __dev_exception_rm(struct list_head *exceptions,
+  struct dev_exception_item *ex)
 {
struct dev_exception_item *walk, *tmp;
 
@@ -127,11 +141,18 @@ static void dev_exception_rm(struct list
}
 }
 
+static void dev_exception_rm(struct dev_cgroup *devcgroup,
+struct dev_exception_item *ex)
+{
+   __dev_exception_rm(devcgroup-local.exceptions, ex);
+   __dev_exception_rm(devcgroup-exceptions, ex);
+}
+
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct list_head *exceptions,
-struct dev_exception_item *ex)
+static int __dev_exception_add(struct list_head *exceptions,
+  struct dev_exception_item *ex)
 {
struct dev_exception_item *excopy, *walk;
 
@@ -159,6 +180,28 @@ static int dev_exception_add(struct list
return 0;
 }
 
+static int dev_exception_add(struct dev_cgroup *devcgroup,
+struct dev_exception_item *ex)
+{
+   int rc;
+
+   lockdep_assert_held(devcgroup_mutex);
+
+   /*
+* we add to the local list so we can preserve local preferences if
+* the parent propagates down new rules
+*/
+   rc = __dev_exception_add(devcgroup-local.exceptions, ex);
+   if (rc)
+   return rc;
+
+   rc = __dev_exception_add(devcgroup-exceptions, ex);
+   if (rc)
+   __dev_exception_rm(devcgroup-local.exceptions, ex);
+
+   return rc;
+}
+
 static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
 {
struct dev_exception_item *ex, *tmp;
@@ -167,6 +210,11 @@ static void __dev_exception_clean(struct
list_del_rcu(ex-list);
kfree_rcu(ex, rcu);
}
+   list_for_each_entry_safe(ex, tmp, dev_cgroup-local.exceptions,
+list) {
+   list_del_rcu(ex-list);
+   kfree_rcu(ex, rcu);
+   }
 }
 
 /**
@@ -195,6 +243,8 @@ static struct cgroup_subsys_state *devcg
if (!dev_cgroup)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(dev_cgroup-exceptions);
+   INIT_LIST_HEAD(dev_cgroup-local.exceptions);
+   dev_cgroup-local.behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup-parent;
 
if (parent_cgroup == NULL)
@@ -413,18 +463,19 @@   memset(ex, 0, sizeof(ex));
if (!may_allow_all(parent))
return -EPERM;
dev_exception_clean(devcgroup);
+   if (parent)
+   rc = dev_exceptions_copy(devcgroup-exceptions,
+parent-exceptions);
devcgroup-behavior = DEVCG_DEFAULT_ALLOW;
-   if (!parent)
-   break;
+   devcgroup-local.behavior = DEVCG_DEFAULT_ALLOW;
 
-   rc = dev_exceptions_copy(devcgroup-exceptions,
-parent-exceptions);
if (rc)

[PATCH v3 1/9] device_cgroup: prepare exception list handling functions for two lists

2013-01-29 Thread aris

In the following patches, device_cgroup structure will have two sets of
behavior and exceptions list (actual one, another with the local settings)
so rework the functions to use exception list, not a device_cgroup.

Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:48:50.603298122 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:14.739657516 -0500
@@ -104,7 +104,7 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct dev_cgroup *dev_cgroup,
+static int dev_exception_add(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
struct dev_exception_item *excopy, *walk;
@@ -115,7 +115,7 @@ static int dev_exception_add(struct dev_
if (!excopy)
return -ENOMEM;
 
-   list_for_each_entry(walk, dev_cgroup-exceptions, list) {
+   list_for_each_entry(walk, exceptions, list) {
if (walk-type != ex-type)
continue;
if (walk-major != ex-major)
@@ -129,21 +129,21 @@ static int dev_exception_add(struct dev_
}
 
if (excopy != NULL)
-   list_add_tail_rcu(excopy-list, dev_cgroup-exceptions);
+   list_add_tail_rcu(excopy-list, exceptions);
return 0;
 }
 
 /*
  * called under devcgroup_mutex
  */
-static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
+static void dev_exception_rm(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
struct dev_exception_item *walk, *tmp;
 
lockdep_assert_held(devcgroup_mutex);
 
-   list_for_each_entry_safe(walk, tmp, dev_cgroup-exceptions, list) {
+   list_for_each_entry_safe(walk, tmp, exceptions, list) {
if (walk-type != ex-type)
continue;
if (walk-major != ex-major)
@@ -514,10 +514,10 @@   case '\0':
 * don't want to break compatibility
 */
if (devcgroup-behavior == DEVCG_DEFAULT_ALLOW) {
-   dev_exception_rm(devcgroup, ex);
+   dev_exception_rm(devcgroup-exceptions, ex);
return 0;
}
-   return dev_exception_add(devcgroup, ex);
+   return dev_exception_add(devcgroup-exceptions, ex);
case DEVCG_DENY:
/*
 * If the default policy is to deny by default, try to remove
@@ -525,10 +525,10 @@   return 0;
 * don't want to break compatibility
 */
if (devcgroup-behavior == DEVCG_DEFAULT_DENY) {
-   dev_exception_rm(devcgroup, ex);
+   dev_exception_rm(devcgroup-exceptions, ex);
return 0;
}
-   return dev_exception_add(devcgroup, ex);
+   return dev_exception_add(devcgroup-exceptions, ex);
default:
return -EINVAL;
}

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 8/9] devcg: refactor dev_exception_clean()

2013-01-29 Thread aris

This patch is in preparation for hierarchy support.

Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   34 --
 1 file changed, 16 insertions(+), 18 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:16.374681863 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:16.653686016 -0500
@@ -212,32 +212,33 @@ static int dev_exception_add(struct dev_
return rc;
 }
 
-static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
+static void dev_exception_clean(struct list_head *exceptions)
 {
struct dev_exception_item *ex, *tmp;
 
-   list_for_each_entry_safe(ex, tmp, dev_cgroup-exceptions, list) {
-   list_del_rcu(ex-list);
-   kfree_rcu(ex, rcu);
-   }
-   list_for_each_entry_safe(ex, tmp, dev_cgroup-local.exceptions,
-list) {
+   list_for_each_entry_safe(ex, tmp, exceptions, list) {
list_del_rcu(ex-list);
kfree_rcu(ex, rcu);
}
 }
 
+static void __dev_exception_clean_all(struct dev_cgroup *dev_cgroup)
+{
+   dev_exception_clean(dev_cgroup-exceptions);
+   dev_exception_clean(dev_cgroup-local.exceptions);
+}
+
 /**
- * dev_exception_clean - frees all entries of the exception list
+ * dev_exception_clean_all - frees all entries of the exception list
  * @dev_cgroup: dev_cgroup with the exception list to be cleaned
  *
  * called under devcgroup_mutex
  */
-static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
+static void dev_exception_clean_all(struct dev_cgroup *dev_cgroup)
 {
lockdep_assert_held(devcgroup_mutex);
 
-   __dev_exception_clean(dev_cgroup);
+   __dev_exception_clean_all(dev_cgroup);
 }
 
 /**
@@ -303,7 +304,7 @@ static void devcgroup_css_free(struct cg
struct dev_cgroup *dev_cgroup;
 
dev_cgroup = cgroup_to_devcgroup(cgroup);
-   __dev_exception_clean(dev_cgroup);
+   __dev_exception_clean_all(dev_cgroup);
kfree(dev_cgroup);
 }
 
@@ -508,25 +509,22 @@   memset(ex, 0, sizeof(ex));
case DEVCG_ALLOW:
if (!may_allow_all(parent))
return -EPERM;
-   dev_exception_clean(devcgroup);
+   dev_exception_clean_all(devcgroup);
if (parent)
rc = dev_exceptions_copy(devcgroup-exceptions,
 parent-exceptions);
devcgroup-behavior = DEVCG_DEFAULT_ALLOW;
devcgroup-local.behavior = DEVCG_DEFAULT_ALLOW;
-
-   if (rc)
-   return rc;
break;
case DEVCG_DENY:
-   dev_exception_clean(devcgroup);
+   dev_exception_clean_all(devcgroup);
devcgroup-behavior = DEVCG_DEFAULT_DENY;
devcgroup-local.behavior = DEVCG_DEFAULT_DENY;
break;
default:
-   return -EINVAL;
+   rc = -EINVAL;
}
-   return 0;
+   return rc;
case 'b':
ex.type = DEV_BLOCK;
break;

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 2/9] devcg: reorder device exception functions

2013-01-29 Thread aris

In preparation for the next patch, reorder dev_exception_add() and
dev_exception_rm().

Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   44 ++--
 1 file changed, 22 insertions(+), 22 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:14.739657516 
-0500
+++ github/security/device_cgroup.c 2013-01-29 11:49:14.987661210 -0500
@@ -104,18 +104,14 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct list_head *exceptions,
+static void dev_exception_rm(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
-   struct dev_exception_item *excopy, *walk;
+   struct dev_exception_item *walk, *tmp;
 
lockdep_assert_held(devcgroup_mutex);
 
-   excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
-   if (!excopy)
-   return -ENOMEM;
-
-   list_for_each_entry(walk, exceptions, list) {
+   list_for_each_entry_safe(walk, tmp, exceptions, list) {
if (walk-type != ex-type)
continue;
if (walk-major != ex-major)
@@ -123,27 +119,29 @@ static int dev_exception_add(struct list
if (walk-minor != ex-minor)
continue;
 
-   walk-access |= ex-access;
-   kfree(excopy);
-   excopy = NULL;
+   walk-access = ~ex-access;
+   if (!walk-access) {
+   list_del_rcu(walk-list);
+   kfree_rcu(walk, rcu);
+   }
}
-
-   if (excopy != NULL)
-   list_add_tail_rcu(excopy-list, exceptions);
-   return 0;
 }
 
 /*
  * called under devcgroup_mutex
  */
-static void dev_exception_rm(struct list_head *exceptions,
+static int dev_exception_add(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
-   struct dev_exception_item *walk, *tmp;
+   struct dev_exception_item *excopy, *walk;
 
lockdep_assert_held(devcgroup_mutex);
 
-   list_for_each_entry_safe(walk, tmp, exceptions, list) {
+   excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
+   if (!excopy)
+   return -ENOMEM;
+
+   list_for_each_entry(walk, exceptions, list) {
if (walk-type != ex-type)
continue;
if (walk-major != ex-major)
@@ -151,12 +149,14 @@ static void dev_exception_rm(struct list
if (walk-minor != ex-minor)
continue;
 
-   walk-access = ~ex-access;
-   if (!walk-access) {
-   list_del_rcu(walk-list);
-   kfree_rcu(walk, rcu);
-   }
+   walk-access |= ex-access;
+   kfree(excopy);
+   excopy = NULL;
}
+
+   if (excopy != NULL)
+   list_add_tail_rcu(excopy-list, exceptions);
+   return 0;
 }
 
 static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 9/9] devcg: propagate local changes down the hierarchy

2013-01-29 Thread aris

This patch makes all changes propagate down in hierarchy respecting when
possible local configurations.

Behavior changes will clean up exceptions in all the children except when the
parent changes the behavior from allow to deny and the child's behavior was
already deny, in which case the local exceptions will be reused. The inverse
is not possible: you can't have a parent with behavior deny and a child with
behavior accept.

New exceptions allowing additional access to devices won't be propagated, but
it'll be possible to add an exception to access all of part of the newly
allowed device(s).

New exceptions disallowing access to devices will be propagated down and the
local group's exceptions will be revalidated for the new situation.
Example:
  A
 / \
B

groupbehavior  exceptions
Aallow b 8:* rwm, c 116:1 rw
Bdeny  c 1:3 rwm, c 116:2 rwm, b 3:* rwm

If a new exception is added to group A:
# echo c 116:* r  A/devices.deny
it'll propagate down and after revalidating B's local exceptions, the exception
c 116:2 rwm will be removed.

In case parent behavior or exceptions change and local settings are not
allowed anymore, they'll be deleted.

v3:
- update documentation
- move css_online/css_offline changes to a new patch
- use cgroup_for_each_descendant_pre() instead of own descendant walk
- move exception_copy rework to a separared patch
- move exception_clean rework to a separated patch

v2:
- instead of keeping the local settings that won't apply anymore, remove them


Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 Documentation/cgroups/devices.txt |   66 ++
 security/device_cgroup.c  |  170 --
 2 files changed, 230 insertions(+), 6 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-29 11:49:16.653686016 
-0500
+++ github/security/device_cgroup.c 2013-01-29 13:58:07.537099962 -0500
@@ -60,6 +60,9 @@ struct dev_cgroup {
struct list_head exceptions;
enum devcg_behavior behavior;
} local;
+
+   /* temporary list for pending propagation operations */
+   struct list_head propagate_pending;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state 
*s)
@@ -241,6 +244,11 @@ static void dev_exception_clean_all(stru
__dev_exception_clean_all(dev_cgroup);
 }
 
+static inline bool is_devcg_online(const struct dev_cgroup *devcg)
+{
+   return (devcg-behavior != DEVCG_DEFAULT_NONE);
+}
+
 /**
  * devcgroup_online - initializes devcgroup's behavior and exceptions based on
  *   parent's
@@ -292,6 +300,7 @@ static struct cgroup_subsys_state *devcg
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(dev_cgroup-exceptions);
INIT_LIST_HEAD(dev_cgroup-local.exceptions);
+   INIT_LIST_HEAD(dev_cgroup-propagate_pending);
dev_cgroup-local.behavior = DEVCG_DEFAULT_NONE;
dev_cgroup-behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup-parent;
@@ -471,6 +480,139 @@ static inline int may_allow_all(struct d
return parent-behavior == DEVCG_DEFAULT_ALLOW;
 }
 
+/**
+ * revalidate_exceptions - walks through the exception list and revalidates
+ *  the exceptions based on parents' behavior and
+ *  exceptions. Called with devcgroup_mutex held.
+ * @devcg: cgroup which exceptions will be checked
+ *
+ * returns: 0 in success, -ENOMEM in case of out of memory
+ *
+ * This is one of the two key functions for hierarchy implementation.
+ * This function is responsible for re-evaluating all the cgroup's locally
+ * set exceptions due to a parent's behavior or exception change.
+ * Refer to Documentation/cgroups/devices.txt for more details.
+ */
+static int revalidate_exceptions(struct dev_cgroup *devcg)
+{
+   struct dev_exception_item *ex;
+   struct list_head *this, *tmp;
+
+   list_for_each_safe(this, tmp, devcg-local.exceptions) {
+   ex = container_of(this, struct dev_exception_item, list);
+   if (parent_has_perm(devcg, ex)) {
+   if (dev_exception_copy(devcg-exceptions, ex))
+   goto error;
+   } else
+   __dev_exception_rm(devcg-local.exceptions, ex);
+   }
+   return 0;
+
+error:
+   dev_exception_clean(devcg-exceptions);
+   return -ENOMEM;
+}
+
+/**
+ * propagate_behavior - propagates a change in the behavior down in hierarchy
+ * @devcg_root: device cgroup that changed behavior
+ *
+ * returns: 0 in case of success, != 0 in case of error
+ *
+ * This is one of the two key functions for hierarchy implementation.
+ * All cgroup's children recursively will have the behavior changed and
+ * exceptions copied from the parent then its local

[PATCH v2 3/4] device_cgroup: make may_access() stronger

2013-01-24 Thread aris

In order to revalidate local exceptions for the hierarchy change propagation,
make may_access() stronger.

Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   48 +--
 1 file changed, 30 insertions(+), 18 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-24 10:40:46.384253615 
-0500
+++ github/security/device_cgroup.c 2013-01-24 10:41:07.513567697 -0500
@@ -353,13 +353,15 @@   return 0;
  * won't have more privileges than its parent or to
  * verify if a certain access is allowed.
  * @dev_cgroup: dev cgroup to be tested against
+ * @behavior: behavior of the exception
  * @refex: new exception
  */
-static int may_access(struct dev_cgroup *dev_cgroup,
- struct dev_exception_item *refex)
+static bool may_access(struct dev_cgroup *dev_cgroup,
+  struct dev_exception_item *refex,
+  enum devcg_behavior behavior)
 {
struct dev_exception_item *ex;
-   bool match = false;
+   int match = false;
 
rcu_lockdep_assert(rcu_read_lock_held() ||
   lockdep_is_held(_mutex),
@@ -380,18 +382,28 @@   if (ex->minor != ~0 && ex->minor != re
break;
}
 
-   /*
-* In two cases we'll consider this new exception valid:
-* - the dev cgroup has its default policy to allow + exception list:
-*   the new exception should *not* match any of the exceptions
-*   (behavior == DEVCG_DEFAULT_ALLOW, !match)
-* - the dev cgroup has its default policy to deny + exception list:
-*   the new exception *should* match the exceptions
-*   (behavior == DEVCG_DEFAULT_DENY, match)
-*/
-   if ((dev_cgroup->behavior == DEVCG_DEFAULT_DENY) == match)
-   return 1;
-   return 0;
+   if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
+   if (behavior == DEVCG_DEFAULT_ALLOW) {
+   /* the exception will deny access to certain devices */
+   return true;
+   } else {
+   /* the exception will allow access to certain devices */
+   if (match)
+   /*
+* a new exception allowing access shouldn't
+* match an parent's exception
+*/
+   return false;
+   return true;
+   }
+   } else {
+   /* only behavior == DEVCG_DEFAULT_DENY allowed here */
+   if (match)
+   /* parent has an exception that matches the proposed */
+   return true;
+   else
+   return false;
+   }
 }
 
 /*
@@ -408,7 +420,7 @@ static int parent_has_perm(struct dev_cg
if (!pcg)
return 1;
parent = cgroup_to_devcgroup(pcg);
-   return may_access(parent, ex);
+   return may_access(parent, ex, childcg->behavior);
 }
 
 /**
@@ -442,7 +454,7 @@ static int devcgroup_update_access(struc
 {
const char *b;
char temp[12];  /* 11 + 1 characters needed for a u32 */
-   int count, rc;
+   int count, rc = 0;
struct dev_exception_item ex;
struct cgroup *p = devcgroup->css.cgroup;
struct dev_cgroup *parent = NULL;
@@ -660,7 +672,7 @@ memset(, 0, sizeof(ex));
 
rcu_read_lock();
dev_cgroup = task_devcgroup(current);
-   rc = may_access(dev_cgroup, );
+   rc = may_access(dev_cgroup, , dev_cgroup->behavior);
rcu_read_unlock();
 
if (!rc)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 4/4] device_cgroup: propagate local changes down the hierarchy

2013-01-24 Thread aris

This patch makes all changes propagate down in hierarchy respecting when
possible local configurations.

Behavior changes will clean up exceptions in all the children except when the
parent changes the behavior from allow to deny and the child's behavior was
already deny, in which case the local exceptions will be reused. The inverse
is not possible: you can't have a parent with behavior deny and a child with
behavior accept.

New exceptions allowing additional access to devices won't be propagated, but
it'll be possible to add an exception to access all of part of the newly
allowed device(s).

New exceptions disallowing access to devices will be propagated down and the
local group's exceptions will be revalidated for the new situation.
Example:
  A
 / \
B

groupbehavior  exceptions
Aallow "b 8:* rwm", "c 116:1 rw"
Bdeny  "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm"

If a new exception is added to group A:
# echo "c 116:* r" > A/devices.deny
it'll propagate down and after revalidating B's local exceptions, the exception
"c 116:2 rwm" will be removed.

In case parent behavior or exceptions change and local settings are not
allowed anymore, they'll be deleted.

v2: instead of keeping the local settings that won't apply anymore, remove them

Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |  296 ---
 1 file changed, 252 insertions(+), 44 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-24 10:41:07.513567697 
-0500
+++ github/security/device_cgroup.c 2013-01-24 10:41:15.545687094 -0500
@@ -89,28 +89,38 @@ static int devcgroup_can_attach(struct c
return 0;
 }
 
+static int dev_exception_copy(struct list_head *dest,
+ struct dev_exception_item *ex)
+{
+   struct dev_exception_item *new;
+
+   new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
+   if (!new)
+   return -ENOMEM;
+   list_add_tail_rcu(>list, dest);
+   return 0;
+}
+
 /*
  * called under devcgroup_mutex
  */
 static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
 {
-   struct dev_exception_item *ex, *tmp, *new;
+   struct dev_exception_item *ex, *tmp;
 
lockdep_assert_held(_mutex);
 
list_for_each_entry(ex, orig, list) {
-   new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
-   if (!new)
+   if (dev_exception_copy(dest, ex))
goto free_and_exit;
-   list_add_tail(>list, dest);
}
 
return 0;
 
 free_and_exit:
list_for_each_entry_safe(ex, tmp, dest, list) {
-   list_del(>list);
-   kfree(ex);
+   list_del_rcu(>list);
+   kfree_rcu(ex, rcu);
}
return -ENOMEM;
 }
@@ -202,32 +212,80 @@ static int dev_exception_add(struct dev_
return rc;
 }
 
-static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
+static void dev_exception_clean(struct list_head *exceptions)
 {
struct dev_exception_item *ex, *tmp;
 
-   list_for_each_entry_safe(ex, tmp, _cgroup->exceptions, list) {
-   list_del_rcu(>list);
-   kfree_rcu(ex, rcu);
-   }
-   list_for_each_entry_safe(ex, tmp, _cgroup->local.exceptions,
-list) {
+   list_for_each_entry_safe(ex, tmp, exceptions, list) {
list_del_rcu(>list);
kfree_rcu(ex, rcu);
}
 }
 
+static void __dev_exception_clean_all(struct dev_cgroup *dev_cgroup)
+{
+   dev_exception_clean(_cgroup->exceptions);
+   dev_exception_clean(_cgroup->local.exceptions);
+}
+
 /**
  * dev_exception_clean - frees all entries of the exception list
  * @dev_cgroup: dev_cgroup with the exception list to be cleaned
  *
  * called under devcgroup_mutex
  */
-static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
+static void dev_exception_clean_all(struct dev_cgroup *dev_cgroup)
 {
lockdep_assert_held(_mutex);
 
-   __dev_exception_clean(dev_cgroup);
+   __dev_exception_clean_all(dev_cgroup);
+}
+
+static inline bool is_devcg_online(const struct dev_cgroup *devcg)
+{
+   return (devcg->behavior != DEVCG_DEFAULT_NONE);
+}
+
+/**
+ * devcg_for_each_child - traverse online children of a device cgroup
+ * @child_cs: loop cursor pointing to the current child
+ * @pos_cgrp: used for iteration
+ * @parent_cs: target device cgroup to walk children of
+ *
+ * Walk @child_cs through the online children of @parent_cs.  Must be used
+ * with RCU read locked.
+ */
+#define devcg_for_each_child(pos_cgrp, root)   \
+   cgroup_for_each_child((pos_cgrp), (root))   \
+   if (is_devcg_online(cgroup_to_devcgroup((pos_cgrp
+
+static int devcgroup_online(struct cgroup *cgroup)
+{
+

[PATCH v2 2/4] device_cgroup: keep track of local group settings

2013-01-24 Thread aris

In preparation for better hierarchy support, it's needed to retain the local
settings in order to try to reapply them after a propagated change if they're
still valid.

Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 
 
---
 security/device_cgroup.c |  115 +--
 1 file changed, 83 insertions(+), 32 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-24 10:40:33.913068234 
-0500
+++ github/security/device_cgroup.c 2013-01-24 10:40:46.384253615 -0500
@@ -39,13 +39,27 @@ struct dev_exception_item {
struct rcu_head rcu;
 };
 
+enum devcg_behavior {
+   DEVCG_DEFAULT_NONE,
+   DEVCG_DEFAULT_ALLOW,
+   DEVCG_DEFAULT_DENY,
+};
+
 struct dev_cgroup {
struct cgroup_subsys_state css;
+
+   /* result of merging the parent's rules with local ones */
struct list_head exceptions;
-   enum {
-   DEVCG_DEFAULT_ALLOW,
-   DEVCG_DEFAULT_DENY,
-   } behavior;
+   enum devcg_behavior behavior;
+
+   /*
+* local set rules, saved so when a parent propagates new rules, the
+* local preferences can be preserved
+*/
+   struct {
+   struct list_head exceptions;
+   enum devcg_behavior behavior;
+   } local;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state 
*s)
@@ -104,8 +118,41 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct list_head *exceptions,
-struct dev_exception_item *ex)
+static void __dev_exception_rm(struct list_head *exceptions,
+  struct dev_exception_item *ex)
+{
+   struct dev_exception_item *walk, *tmp;
+
+   lockdep_assert_held(_mutex);
+
+   list_for_each_entry_safe(walk, tmp, exceptions, list) {
+   if (walk->type != ex->type)
+   continue;
+   if (walk->major != ex->major)
+   continue;
+   if (walk->minor != ex->minor)
+   continue;
+
+   walk->access &= ~ex->access;
+   if (!walk->access) {
+   list_del_rcu(>list);
+   kfree_rcu(walk, rcu);
+   }
+   }
+}
+
+static void dev_exception_rm(struct dev_cgroup *devcgroup,
+   struct dev_exception_item *ex)
+{
+   __dev_exception_rm(>local.exceptions, ex);
+   __dev_exception_rm(>exceptions, ex);
+}
+
+/*
+ * called under devcgroup_mutex
+ */
+static int __dev_exception_add(struct list_head *exceptions,
+  struct dev_exception_item *ex)
 {
struct dev_exception_item *excopy, *walk;
 
@@ -133,30 +180,26 @@ static int dev_exception_add(struct list
return 0;
 }
 
-/*
- * called under devcgroup_mutex
- */
-static void dev_exception_rm(struct list_head *exceptions,
+static int dev_exception_add(struct dev_cgroup *devcgroup,
 struct dev_exception_item *ex)
 {
-   struct dev_exception_item *walk, *tmp;
+   int rc;
 
lockdep_assert_held(_mutex);
 
-   list_for_each_entry_safe(walk, tmp, exceptions, list) {
-   if (walk->type != ex->type)
-   continue;
-   if (walk->major != ex->major)
-   continue;
-   if (walk->minor != ex->minor)
-   continue;
+   /*
+* we add to the local list so we can preserve local preferences if
+* the parent propagates down new rules
+*/
+   rc = __dev_exception_add(>local.exceptions, ex);
+   if (rc)
+   return rc;
+
+   rc = __dev_exception_add(>exceptions, ex);
+   if (rc)
+   __dev_exception_rm(>local.exceptions, ex);
 
-   walk->access &= ~ex->access;
-   if (!walk->access) {
-   list_del_rcu(>list);
-   kfree_rcu(walk, rcu);
-   }
-   }
+   return rc;
 }
 
 static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
@@ -167,6 +210,11 @@ static void __dev_exception_clean(struct
list_del_rcu(>list);
kfree_rcu(ex, rcu);
}
+   list_for_each_entry_safe(ex, tmp, _cgroup->local.exceptions,
+list) {
+   list_del_rcu(>list);
+   kfree_rcu(ex, rcu);
+   }
 }
 
 /**
@@ -195,6 +243,8 @@ static struct cgroup_subsys_state *devcg
if (!dev_cgroup)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(_cgroup->exceptions);
+   INIT_LIST_HEAD(_cgroup->local.exceptions);
+   dev_cgroup->local.behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup->parent;
 
if (parent_cgroup == NULL)
@@ -413,18 +463,19 @@   memset(, 0, sizeof(ex));
if (!may_allow_all(parent))

[PATCH v2 1/4] device_cgroup: prepare exception list handling functions for two lists

2013-01-24 Thread aris

In the following patches, device_cgroup structure will have two sets of
behavior and exceptions list (actual one, another with the local settings)
so rework the functions to use exception list, not a device_cgroup.

Cc: Tejun Heo 
Cc: Serge Hallyn 
Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-24 10:40:32.814051897 
-0500
+++ github/security/device_cgroup.c 2013-01-24 10:40:33.913068234 -0500
@@ -104,7 +104,7 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct dev_cgroup *dev_cgroup,
+static int dev_exception_add(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
struct dev_exception_item *excopy, *walk;
@@ -115,7 +115,7 @@ static int dev_exception_add(struct dev_
if (!excopy)
return -ENOMEM;
 
-   list_for_each_entry(walk, _cgroup->exceptions, list) {
+   list_for_each_entry(walk, exceptions, list) {
if (walk->type != ex->type)
continue;
if (walk->major != ex->major)
@@ -129,21 +129,21 @@ static int dev_exception_add(struct dev_
}
 
if (excopy != NULL)
-   list_add_tail_rcu(>list, _cgroup->exceptions);
+   list_add_tail_rcu(>list, exceptions);
return 0;
 }
 
 /*
  * called under devcgroup_mutex
  */
-static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
+static void dev_exception_rm(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
struct dev_exception_item *walk, *tmp;
 
lockdep_assert_held(_mutex);
 
-   list_for_each_entry_safe(walk, tmp, _cgroup->exceptions, list) {
+   list_for_each_entry_safe(walk, tmp, exceptions, list) {
if (walk->type != ex->type)
continue;
if (walk->major != ex->major)
@@ -514,10 +514,10 @@   case '\0':
 * don't want to break compatibility
 */
if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
-   dev_exception_rm(devcgroup, );
+   dev_exception_rm(>exceptions, );
return 0;
}
-   return dev_exception_add(devcgroup, );
+   return dev_exception_add(>exceptions, );
case DEVCG_DENY:
/*
 * If the default policy is to deny by default, try to remove
@@ -525,10 +525,10 @@   return 0;
 * don't want to break compatibility
 */
if (devcgroup->behavior == DEVCG_DEFAULT_DENY) {
-   dev_exception_rm(devcgroup, );
+   dev_exception_rm(>exceptions, );
return 0;
}
-   return dev_exception_add(devcgroup, );
+   return dev_exception_add(>exceptions, );
default:
return -EINVAL;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 0/4] devcg: introduce proper hierarchy support

2013-01-24 Thread aris

This patchset implements device cgroup hierarchy. Behaviors and exceptions
will be propagated down in the tree and local preferences will be re-evaluated
everytime a change in its parent occours, reapplying them if it's still
possible.

v2:
- rebase on top "device_cgroup: don't grab mutex in rcu callback"
- in case parent changes behavior or exceptions and the local exceptions won't
  apply anymore, remove them instead of keeping them around.

Cc: Tejun Heo  
  
Cc: Serge Hallyn

Signed-off-by: Aristeu Rozanski


-- 
Aristeu
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 0/4] devcg: introduce proper hierarchy support

2013-01-24 Thread aris

This patchset implements device cgroup hierarchy. Behaviors and exceptions
will be propagated down in the tree and local preferences will be re-evaluated
everytime a change in its parent occours, reapplying them if it's still
possible.

v2:
- rebase on top device_cgroup: don't grab mutex in rcu callback
- in case parent changes behavior or exceptions and the local exceptions won't
  apply anymore, remove them instead of keeping them around.

Cc: Tejun Heo t...@kernel.org 
  
Cc: Serge Hallyn serge.hal...@canonical.com   

Signed-off-by: Aristeu Rozanski a...@redhat.com   


-- 
Aristeu
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 1/4] device_cgroup: prepare exception list handling functions for two lists

2013-01-24 Thread aris

In the following patches, device_cgroup structure will have two sets of
behavior and exceptions list (actual one, another with the local settings)
so rework the functions to use exception list, not a device_cgroup.

Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-24 10:40:32.814051897 
-0500
+++ github/security/device_cgroup.c 2013-01-24 10:40:33.913068234 -0500
@@ -104,7 +104,7 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct dev_cgroup *dev_cgroup,
+static int dev_exception_add(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
struct dev_exception_item *excopy, *walk;
@@ -115,7 +115,7 @@ static int dev_exception_add(struct dev_
if (!excopy)
return -ENOMEM;
 
-   list_for_each_entry(walk, dev_cgroup-exceptions, list) {
+   list_for_each_entry(walk, exceptions, list) {
if (walk-type != ex-type)
continue;
if (walk-major != ex-major)
@@ -129,21 +129,21 @@ static int dev_exception_add(struct dev_
}
 
if (excopy != NULL)
-   list_add_tail_rcu(excopy-list, dev_cgroup-exceptions);
+   list_add_tail_rcu(excopy-list, exceptions);
return 0;
 }
 
 /*
  * called under devcgroup_mutex
  */
-static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
+static void dev_exception_rm(struct list_head *exceptions,
 struct dev_exception_item *ex)
 {
struct dev_exception_item *walk, *tmp;
 
lockdep_assert_held(devcgroup_mutex);
 
-   list_for_each_entry_safe(walk, tmp, dev_cgroup-exceptions, list) {
+   list_for_each_entry_safe(walk, tmp, exceptions, list) {
if (walk-type != ex-type)
continue;
if (walk-major != ex-major)
@@ -514,10 +514,10 @@   case '\0':
 * don't want to break compatibility
 */
if (devcgroup-behavior == DEVCG_DEFAULT_ALLOW) {
-   dev_exception_rm(devcgroup, ex);
+   dev_exception_rm(devcgroup-exceptions, ex);
return 0;
}
-   return dev_exception_add(devcgroup, ex);
+   return dev_exception_add(devcgroup-exceptions, ex);
case DEVCG_DENY:
/*
 * If the default policy is to deny by default, try to remove
@@ -525,10 +525,10 @@   return 0;
 * don't want to break compatibility
 */
if (devcgroup-behavior == DEVCG_DEFAULT_DENY) {
-   dev_exception_rm(devcgroup, ex);
+   dev_exception_rm(devcgroup-exceptions, ex);
return 0;
}
-   return dev_exception_add(devcgroup, ex);
+   return dev_exception_add(devcgroup-exceptions, ex);
default:
return -EINVAL;
}

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 2/4] device_cgroup: keep track of local group settings

2013-01-24 Thread aris

In preparation for better hierarchy support, it's needed to retain the local
settings in order to try to reapply them after a propagated change if they're
still valid.

Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com
 
---
 security/device_cgroup.c |  115 +--
 1 file changed, 83 insertions(+), 32 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-24 10:40:33.913068234 
-0500
+++ github/security/device_cgroup.c 2013-01-24 10:40:46.384253615 -0500
@@ -39,13 +39,27 @@ struct dev_exception_item {
struct rcu_head rcu;
 };
 
+enum devcg_behavior {
+   DEVCG_DEFAULT_NONE,
+   DEVCG_DEFAULT_ALLOW,
+   DEVCG_DEFAULT_DENY,
+};
+
 struct dev_cgroup {
struct cgroup_subsys_state css;
+
+   /* result of merging the parent's rules with local ones */
struct list_head exceptions;
-   enum {
-   DEVCG_DEFAULT_ALLOW,
-   DEVCG_DEFAULT_DENY,
-   } behavior;
+   enum devcg_behavior behavior;
+
+   /*
+* local set rules, saved so when a parent propagates new rules, the
+* local preferences can be preserved
+*/
+   struct {
+   struct list_head exceptions;
+   enum devcg_behavior behavior;
+   } local;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state 
*s)
@@ -104,8 +118,41 @@ free_and_exit:
 /*
  * called under devcgroup_mutex
  */
-static int dev_exception_add(struct list_head *exceptions,
-struct dev_exception_item *ex)
+static void __dev_exception_rm(struct list_head *exceptions,
+  struct dev_exception_item *ex)
+{
+   struct dev_exception_item *walk, *tmp;
+
+   lockdep_assert_held(devcgroup_mutex);
+
+   list_for_each_entry_safe(walk, tmp, exceptions, list) {
+   if (walk-type != ex-type)
+   continue;
+   if (walk-major != ex-major)
+   continue;
+   if (walk-minor != ex-minor)
+   continue;
+
+   walk-access = ~ex-access;
+   if (!walk-access) {
+   list_del_rcu(walk-list);
+   kfree_rcu(walk, rcu);
+   }
+   }
+}
+
+static void dev_exception_rm(struct dev_cgroup *devcgroup,
+   struct dev_exception_item *ex)
+{
+   __dev_exception_rm(devcgroup-local.exceptions, ex);
+   __dev_exception_rm(devcgroup-exceptions, ex);
+}
+
+/*
+ * called under devcgroup_mutex
+ */
+static int __dev_exception_add(struct list_head *exceptions,
+  struct dev_exception_item *ex)
 {
struct dev_exception_item *excopy, *walk;
 
@@ -133,30 +180,26 @@ static int dev_exception_add(struct list
return 0;
 }
 
-/*
- * called under devcgroup_mutex
- */
-static void dev_exception_rm(struct list_head *exceptions,
+static int dev_exception_add(struct dev_cgroup *devcgroup,
 struct dev_exception_item *ex)
 {
-   struct dev_exception_item *walk, *tmp;
+   int rc;
 
lockdep_assert_held(devcgroup_mutex);
 
-   list_for_each_entry_safe(walk, tmp, exceptions, list) {
-   if (walk-type != ex-type)
-   continue;
-   if (walk-major != ex-major)
-   continue;
-   if (walk-minor != ex-minor)
-   continue;
+   /*
+* we add to the local list so we can preserve local preferences if
+* the parent propagates down new rules
+*/
+   rc = __dev_exception_add(devcgroup-local.exceptions, ex);
+   if (rc)
+   return rc;
+
+   rc = __dev_exception_add(devcgroup-exceptions, ex);
+   if (rc)
+   __dev_exception_rm(devcgroup-local.exceptions, ex);
 
-   walk-access = ~ex-access;
-   if (!walk-access) {
-   list_del_rcu(walk-list);
-   kfree_rcu(walk, rcu);
-   }
-   }
+   return rc;
 }
 
 static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
@@ -167,6 +210,11 @@ static void __dev_exception_clean(struct
list_del_rcu(ex-list);
kfree_rcu(ex, rcu);
}
+   list_for_each_entry_safe(ex, tmp, dev_cgroup-local.exceptions,
+list) {
+   list_del_rcu(ex-list);
+   kfree_rcu(ex, rcu);
+   }
 }
 
 /**
@@ -195,6 +243,8 @@ static struct cgroup_subsys_state *devcg
if (!dev_cgroup)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(dev_cgroup-exceptions);
+   INIT_LIST_HEAD(dev_cgroup-local.exceptions);
+   dev_cgroup-local.behavior = DEVCG_DEFAULT_NONE;
parent_cgroup = cgroup-parent;
 
if (parent_cgroup == NULL)
@@ -413,18 +463,19

[PATCH v2 4/4] device_cgroup: propagate local changes down the hierarchy

2013-01-24 Thread aris

This patch makes all changes propagate down in hierarchy respecting when
possible local configurations.

Behavior changes will clean up exceptions in all the children except when the
parent changes the behavior from allow to deny and the child's behavior was
already deny, in which case the local exceptions will be reused. The inverse
is not possible: you can't have a parent with behavior deny and a child with
behavior accept.

New exceptions allowing additional access to devices won't be propagated, but
it'll be possible to add an exception to access all of part of the newly
allowed device(s).

New exceptions disallowing access to devices will be propagated down and the
local group's exceptions will be revalidated for the new situation.
Example:
  A
 / \
B

groupbehavior  exceptions
Aallow b 8:* rwm, c 116:1 rw
Bdeny  c 1:3 rwm, c 116:2 rwm, b 3:* rwm

If a new exception is added to group A:
# echo c 116:* r  A/devices.deny
it'll propagate down and after revalidating B's local exceptions, the exception
c 116:2 rwm will be removed.

In case parent behavior or exceptions change and local settings are not
allowed anymore, they'll be deleted.

v2: instead of keeping the local settings that won't apply anymore, remove them

Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |  296 ---
 1 file changed, 252 insertions(+), 44 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-24 10:41:07.513567697 
-0500
+++ github/security/device_cgroup.c 2013-01-24 10:41:15.545687094 -0500
@@ -89,28 +89,38 @@ static int devcgroup_can_attach(struct c
return 0;
 }
 
+static int dev_exception_copy(struct list_head *dest,
+ struct dev_exception_item *ex)
+{
+   struct dev_exception_item *new;
+
+   new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
+   if (!new)
+   return -ENOMEM;
+   list_add_tail_rcu(new-list, dest);
+   return 0;
+}
+
 /*
  * called under devcgroup_mutex
  */
 static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
 {
-   struct dev_exception_item *ex, *tmp, *new;
+   struct dev_exception_item *ex, *tmp;
 
lockdep_assert_held(devcgroup_mutex);
 
list_for_each_entry(ex, orig, list) {
-   new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
-   if (!new)
+   if (dev_exception_copy(dest, ex))
goto free_and_exit;
-   list_add_tail(new-list, dest);
}
 
return 0;
 
 free_and_exit:
list_for_each_entry_safe(ex, tmp, dest, list) {
-   list_del(ex-list);
-   kfree(ex);
+   list_del_rcu(ex-list);
+   kfree_rcu(ex, rcu);
}
return -ENOMEM;
 }
@@ -202,32 +212,80 @@ static int dev_exception_add(struct dev_
return rc;
 }
 
-static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
+static void dev_exception_clean(struct list_head *exceptions)
 {
struct dev_exception_item *ex, *tmp;
 
-   list_for_each_entry_safe(ex, tmp, dev_cgroup-exceptions, list) {
-   list_del_rcu(ex-list);
-   kfree_rcu(ex, rcu);
-   }
-   list_for_each_entry_safe(ex, tmp, dev_cgroup-local.exceptions,
-list) {
+   list_for_each_entry_safe(ex, tmp, exceptions, list) {
list_del_rcu(ex-list);
kfree_rcu(ex, rcu);
}
 }
 
+static void __dev_exception_clean_all(struct dev_cgroup *dev_cgroup)
+{
+   dev_exception_clean(dev_cgroup-exceptions);
+   dev_exception_clean(dev_cgroup-local.exceptions);
+}
+
 /**
  * dev_exception_clean - frees all entries of the exception list
  * @dev_cgroup: dev_cgroup with the exception list to be cleaned
  *
  * called under devcgroup_mutex
  */
-static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
+static void dev_exception_clean_all(struct dev_cgroup *dev_cgroup)
 {
lockdep_assert_held(devcgroup_mutex);
 
-   __dev_exception_clean(dev_cgroup);
+   __dev_exception_clean_all(dev_cgroup);
+}
+
+static inline bool is_devcg_online(const struct dev_cgroup *devcg)
+{
+   return (devcg-behavior != DEVCG_DEFAULT_NONE);
+}
+
+/**
+ * devcg_for_each_child - traverse online children of a device cgroup
+ * @child_cs: loop cursor pointing to the current child
+ * @pos_cgrp: used for iteration
+ * @parent_cs: target device cgroup to walk children of
+ *
+ * Walk @child_cs through the online children of @parent_cs.  Must be used
+ * with RCU read locked.
+ */
+#define devcg_for_each_child(pos_cgrp, root)   \
+   cgroup_for_each_child((pos_cgrp), (root))   \
+   if

[PATCH v2 3/4] device_cgroup: make may_access() stronger

2013-01-24 Thread aris

In order to revalidate local exceptions for the hierarchy change propagation,
make may_access() stronger.

Cc: Tejun Heo t...@kernel.org
Cc: Serge Hallyn serge.hal...@canonical.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   48 +--
 1 file changed, 30 insertions(+), 18 deletions(-)

--- github.orig/security/device_cgroup.c2013-01-24 10:40:46.384253615 
-0500
+++ github/security/device_cgroup.c 2013-01-24 10:41:07.513567697 -0500
@@ -353,13 +353,15 @@   return 0;
  * won't have more privileges than its parent or to
  * verify if a certain access is allowed.
  * @dev_cgroup: dev cgroup to be tested against
+ * @behavior: behavior of the exception
  * @refex: new exception
  */
-static int may_access(struct dev_cgroup *dev_cgroup,
- struct dev_exception_item *refex)
+static bool may_access(struct dev_cgroup *dev_cgroup,
+  struct dev_exception_item *refex,
+  enum devcg_behavior behavior)
 {
struct dev_exception_item *ex;
-   bool match = false;
+   int match = false;
 
rcu_lockdep_assert(rcu_read_lock_held() ||
   lockdep_is_held(devcgroup_mutex),
@@ -380,18 +382,28 @@   if (ex-minor != ~0  ex-minor != re
break;
}
 
-   /*
-* In two cases we'll consider this new exception valid:
-* - the dev cgroup has its default policy to allow + exception list:
-*   the new exception should *not* match any of the exceptions
-*   (behavior == DEVCG_DEFAULT_ALLOW, !match)
-* - the dev cgroup has its default policy to deny + exception list:
-*   the new exception *should* match the exceptions
-*   (behavior == DEVCG_DEFAULT_DENY, match)
-*/
-   if ((dev_cgroup-behavior == DEVCG_DEFAULT_DENY) == match)
-   return 1;
-   return 0;
+   if (dev_cgroup-behavior == DEVCG_DEFAULT_ALLOW) {
+   if (behavior == DEVCG_DEFAULT_ALLOW) {
+   /* the exception will deny access to certain devices */
+   return true;
+   } else {
+   /* the exception will allow access to certain devices */
+   if (match)
+   /*
+* a new exception allowing access shouldn't
+* match an parent's exception
+*/
+   return false;
+   return true;
+   }
+   } else {
+   /* only behavior == DEVCG_DEFAULT_DENY allowed here */
+   if (match)
+   /* parent has an exception that matches the proposed */
+   return true;
+   else
+   return false;
+   }
 }
 
 /*
@@ -408,7 +420,7 @@ static int parent_has_perm(struct dev_cg
if (!pcg)
return 1;
parent = cgroup_to_devcgroup(pcg);
-   return may_access(parent, ex);
+   return may_access(parent, ex, childcg-behavior);
 }
 
 /**
@@ -442,7 +454,7 @@ static int devcgroup_update_access(struc
 {
const char *b;
char temp[12];  /* 11 + 1 characters needed for a u32 */
-   int count, rc;
+   int count, rc = 0;
struct dev_exception_item ex;
struct cgroup *p = devcgroup-css.cgroup;
struct dev_cgroup *parent = NULL;
@@ -660,7 +672,7 @@ memset(ex, 0, sizeof(ex));
 
rcu_read_lock();
dev_cgroup = task_devcgroup(current);
-   rc = may_access(dev_cgroup, ex);
+   rc = may_access(dev_cgroup, ex, dev_cgroup-behavior);
rcu_read_unlock();
 
if (!rc)

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v7 3/4] cgroup: add xattr support

2012-08-23 Thread aris

From: Li Zefan 

This is one of the items in the plumber's wish list.

For use cases:

>> What would the use case be for this?
>
> Attaching meta information to services, in an easily discoverable
> way. For example, in systemd we create one cgroup for each service, and
> could then store data like the main pid of the specific service as an
> xattr on the cgroup itself. That way we'd have almost all service state
> in the cgroupfs, which would make it possible to terminate systemd and
> later restart it without losing any state information. But there's more:
> for example, some very peculiar services cannot be terminated on
> shutdown (i.e. fakeraid DM stuff) and it would be really nice if the
> services in question could just mark that on their cgroup, by setting an
> xattr. On the more desktopy side of things there are other
> possibilities: for example there are plans defining what an application
> is along the lines of a cgroup (i.e. an app being a collection of
> processes). With xattrs one could then attach an icon or human readable
> program name on the cgroup.
>
> The key idea is that this would allow attaching runtime meta information
> to cgroups and everything they model (services, apps, vms), that doesn't
> need any complex userspace infrastructure, has good access control
> (i.e. because the file system enforces that anyway, and there's the
> "trusted." xattr namespace), notifications (inotify), and can easily be
> shared among applications.
>
> Lennart

v7:
- no changes
v6:
- remove user xattr namespace, only allow trusted and security
v5:
- check for capabilities before setting/removing xattrs
v4:
- no changes
v3:
- instead of config option, use mount option to enable xattr support

Cc: Li Zefan 
Cc: Tejun Heo 
Cc: Hugh Dickins 
Cc: Hillf Danton 
Cc: Lennart Poettering 
Signed-off-by: Li Zefan 
Signed-off-by: Aristeu Rozanski 

---
 include/linux/cgroup.h |   13 --
 kernel/cgroup.c|  100 +
 2 files changed, 103 insertions(+), 10 deletions(-)

Index: github/include/linux/cgroup.h
===
--- github.orig/include/linux/cgroup.h  2012-08-23 15:46:12.465169082 -0400
+++ github/include/linux/cgroup.h   2012-08-23 15:48:42.485258910 -0400
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_CGROUPS
 
@@ -216,6 +217,9 @@
/* List of events which userspace want to receive */
struct list_head event_list;
spinlock_t event_list_lock;
+
+   /* directory xattrs */
+   struct simple_xattrs xattrs;
 };
 
 /*
@@ -309,6 +313,9 @@
/* CFTYPE_* flags */
unsigned int flags;
 
+   /* file xattrs */
+   struct simple_xattrs xattrs;
+
int (*open)(struct inode *inode, struct file *file);
ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
@@ -394,7 +401,7 @@
  */
 struct cftype_set {
struct list_headnode;   /* chained at subsys->cftsets */
-   const struct cftype *cfts;
+   struct cftype   *cfts;
 };
 
 struct cgroup_scanner {
@@ -406,8 +413,8 @@
void *data;
 };
 
-int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts);
-int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts);
+int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
+int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 
 int cgroup_is_removed(const struct cgroup *cgrp);
 
Index: github/kernel/cgroup.c
===
--- github.orig/kernel/cgroup.c 2012-08-23 15:48:33.577016061 -0400
+++ github/kernel/cgroup.c  2012-08-23 15:48:42.485258910 -0400
@@ -276,7 +276,8 @@
 
 /* bits in struct cgroupfs_root flags field */
 enum {
-   ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
+   ROOT_NOPREFIX,  /* mounted subsystems have no named prefix */
+   ROOT_XATTR, /* supports extended attributes */
 };
 
 static int cgroup_is_releasable(const struct cgroup *cgrp)
@@ -913,15 +914,19 @@
 */
BUG_ON(!list_empty(>pidlists));
 
+   simple_xattrs_free(>xattrs);
+
kfree_rcu(cgrp, rcu_head);
} else {
struct cfent *cfe = __d_cfe(dentry);
struct cgroup *cgrp = dentry->d_parent->d_fsdata;
+   struct cftype *cft = cfe->type;
 
WARN_ONCE(!list_empty(>node) &&
  cgrp != >root->top_cgroup,
  "cfe still linked for %s\n", cfe->type->name);
kfree(cfe);
+   simple_xattrs_free(>xattrs);
}
iput(inode);
 }
@@ -1140,6 +1145,8 @@
seq_printf(seq, ",%s", ss->name);
if (test_bit(ROOT_NOPREFIX, >flags))
seq_puts(seq, ",noprefix");
+   if

[PATCH v7 2/4] cgroup: revise how we re-populate root directory

2012-08-23 Thread aris

From: Li Zefan 

When remounting cgroupfs with some subsystems added to it and some
removed, cgroup will remove all the files in root directory and then
re-popluate it.

What I'm doing here is, only remove files which belong to subsystems that
are to be unbinded, and only create files for newly-added subsystems.
The purpose is to have all other files untouched.

This is a preparation for cgroup xattr support.

v7:
- checkpatch warnings fixed
v6:
- no changes
v5:
- no changes
v4:
- refactored cgroup_clear_directory() to not use cgroup_rm_file()
- instead of going thru the list of files, get the file list using the
  subsystems
- use 'subsys_mask' instead of {added,removed}_bits and made
  cgroup_populate_dir() to match the parameters with cgroup_clear_directory()
v3:
- refresh patches after recent refactoring

Cc: Li Zefan 
Cc: Tejun Heo 
Cc: Hugh Dickins 
Cc: Hillf Danton 
Cc: Lennart Poettering 
Signed-off-by: Li Zefan 
Signed-off-by: Aristeu Rozanski 

---
 kernel/cgroup.c |   61 
 1 file changed, 48 insertions(+), 13 deletions(-)

Index: github/kernel/cgroup.c
===
--- github.orig/kernel/cgroup.c 2012-08-23 15:46:12.557171590 -0400
+++ github/kernel/cgroup.c  2012-08-23 15:48:33.577016061 -0400
@@ -824,7 +824,8 @@
 static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t 
mode);
 static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned 
int);
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
-static int cgroup_populate_dir(struct cgroup *cgrp);
+static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
+  unsigned long subsys_mask);
 static const struct inode_operations cgroup_dir_inode_operations;
 static const struct file_operations proc_cgroupstats_operations;
 
@@ -963,12 +964,29 @@
return -ENOENT;
 }
 
-static void cgroup_clear_directory(struct dentry *dir)
+/**
+ * cgroup_clear_directory - selective removal of base and subsystem files
+ * @dir: directory containing the files
+ * @base_files: true if the base files should be removed
+ * @subsys_mask: mask of the subsystem ids whose files should be removed
+ */
+static void cgroup_clear_directory(struct dentry *dir, bool base_files,
+  unsigned long subsys_mask)
 {
struct cgroup *cgrp = __d_cgrp(dir);
+   struct cgroup_subsys *ss;
 
-   while (!list_empty(>files))
-   cgroup_rm_file(cgrp, NULL);
+   for_each_subsys(cgrp->root, ss) {
+   struct cftype_set *set;
+   if (!test_bit(ss->subsys_id, _mask))
+   continue;
+   list_for_each_entry(set, >cftsets, node)
+   cgroup_rm_file(cgrp, set->cfts);
+   }
+   if (base_files) {
+   while (!list_empty(>files))
+   cgroup_rm_file(cgrp, NULL);
+   }
 }
 
 /*
@@ -977,8 +995,9 @@
 static void cgroup_d_remove_dir(struct dentry *dentry)
 {
struct dentry *parent;
+   struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
 
-   cgroup_clear_directory(dentry);
+   cgroup_clear_directory(dentry, true, root->subsys_bits);
 
parent = dentry->d_parent;
spin_lock(>d_lock);
@@ -1339,6 +1358,7 @@
struct cgroupfs_root *root = sb->s_fs_info;
struct cgroup *cgrp = >top_cgroup;
struct cgroup_sb_opts opts;
+   unsigned long added_bits, removed_bits;
 
mutex_lock(>dentry->d_inode->i_mutex);
mutex_lock(_mutex);
@@ -1354,6 +1374,9 @@
pr_warning("cgroup: option changes via remount are deprecated 
(pid=%d comm=%s)\n",
   task_tgid_nr(current), current->comm);
 
+   added_bits = opts.subsys_bits & ~root->subsys_bits;
+   removed_bits = root->subsys_bits & ~opts.subsys_bits;
+
/* Don't allow flags or name to change at remount */
if (opts.flags != root->flags ||
(opts.name && strcmp(opts.name, root->name))) {
@@ -1369,8 +1392,9 @@
}
 
/* clear out any existing files and repopulate subsystem files */
-   cgroup_clear_directory(cgrp->dentry);
-   cgroup_populate_dir(cgrp);
+   cgroup_clear_directory(cgrp->dentry, false, removed_bits);
+   /* re-populate subsystem files */
+   cgroup_populate_dir(cgrp, false, added_bits);
 
if (opts.release_agent)
strcpy(root->release_agent_path, opts.release_agent);
@@ -1669,7 +1693,7 @@
BUG_ON(root->number_of_cgroups != 1);
 
cred = override_creds(_cred);
-   cgroup_populate_dir(root_cgrp);
+   cgroup_populate_dir(root_cgrp, true, root->subsys_bits);
revert_creds(cred);
mutex_unlock(_root_mutex);
mutex_unlock(_mutex);
@@ -3843,18 +3867,29 @@
{ } /* terminate */
 };

[PATCH v7 1/4] xattr: extract simple_xattr code from tmpfs

2012-08-23 Thread aris

From: Li Zefan 

Extract in-memory xattr APIs from tmpfs. Will be used by cgroup.

$ size vmlinux.o
   textdata bss dec hex filename
4658782  880729 5195032 10734543 a3cbcf vmlinux.o
$ size vmlinux.o
   textdata bss dec hex filename
4658957  880729 5195032 10734718 a3cc7e vmlinux.o

v7:
- checkpatch warnings fixed
- Implement the changes requested by Hugh Dickins:
- make simple_xattrs_init and simple_xattrs_free inline
- get rid of locking and list reinitialization in simple_xattrs_free,
  they're not needed
v6:
- no changes
v5:
- no changes
v4:
- move simple_xattrs_free() to fs/xattr.c
v3:
- in kmem_xattrs_free(), reinitialize the list
- use simple_xattr_* prefix
- introduce simple_xattr_add() to prevent direct list usage

Cc: Li Zefan 
Cc: Tejun Heo 
Cc: Hugh Dickins 
Cc: Hillf Danton 
Cc: Lennart Poettering 
Signed-off-by: Li Zefan 
Signed-off-by: Aristeu Rozanski 

---
 fs/xattr.c   |  167 +
 include/linux/shmem_fs.h |3 
 include/linux/xattr.h|   48 +
 mm/shmem.c   |  171 +++
 4 files changed, 230 insertions(+), 159 deletions(-)

Index: github/fs/xattr.c
===
--- github.orig/fs/xattr.c  2012-08-23 15:43:44.305129707 -0400
+++ github/fs/xattr.c   2012-08-23 15:43:45.477161661 -0400
@@ -791,3 +791,170 @@
 EXPORT_SYMBOL(generic_listxattr);
 EXPORT_SYMBOL(generic_setxattr);
 EXPORT_SYMBOL(generic_removexattr);
+
+/*
+ * Allocate new xattr and copy in the value; but leave the name to callers.
+ */
+struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
+{
+   struct simple_xattr *new_xattr;
+   size_t len;
+
+   /* wrap around? */
+   len = sizeof(*new_xattr) + size;
+   if (len <= sizeof(*new_xattr))
+   return NULL;
+
+   new_xattr = kmalloc(len, GFP_KERNEL);
+   if (!new_xattr)
+   return NULL;
+
+   new_xattr->size = size;
+   memcpy(new_xattr->value, value, size);
+   return new_xattr;
+}
+
+/*
+ * xattr GET operation for in-memory/pseudo filesystems
+ */
+int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
+void *buffer, size_t size)
+{
+   struct simple_xattr *xattr;
+   int ret = -ENODATA;
+
+   spin_lock(>lock);
+   list_for_each_entry(xattr, >head, list) {
+   if (strcmp(name, xattr->name))
+   continue;
+
+   ret = xattr->size;
+   if (buffer) {
+   if (size < xattr->size)
+   ret = -ERANGE;
+   else
+   memcpy(buffer, xattr->value, xattr->size);
+   }
+   break;
+   }
+   spin_unlock(>lock);
+   return ret;
+}
+
+static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
+ const void *value, size_t size, int flags)
+{
+   struct simple_xattr *xattr;
+   struct simple_xattr *new_xattr = NULL;
+   int err = 0;
+
+   /* value == NULL means remove */
+   if (value) {
+   new_xattr = simple_xattr_alloc(value, size);
+   if (!new_xattr)
+   return -ENOMEM;
+
+   new_xattr->name = kstrdup(name, GFP_KERNEL);
+   if (!new_xattr->name) {
+   kfree(new_xattr);
+   return -ENOMEM;
+   }
+   }
+
+   spin_lock(>lock);
+   list_for_each_entry(xattr, >head, list) {
+   if (!strcmp(name, xattr->name)) {
+   if (flags & XATTR_CREATE) {
+   xattr = new_xattr;
+   err = -EEXIST;
+   } else if (new_xattr) {
+   list_replace(>list, _xattr->list);
+   } else {
+   list_del(>list);
+   }
+   goto out;
+   }
+   }
+   if (flags & XATTR_REPLACE) {
+   xattr = new_xattr;
+   err = -ENODATA;
+   } else {
+   list_add(_xattr->list, >head);
+   xattr = NULL;
+   }
+out:
+   spin_unlock(>lock);
+   if (xattr) {
+   kfree(xattr->name);
+   kfree(xattr);
+   }
+   return err;
+
+}
+
+/*
+ * xattr SET operation for in-memory/pseudo filesystems
+ */
+int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
+const void *value, size_t size, int flags)
+{
+   if (size == 0)
+   value = ""; /* empty EA, do not remove */
+   return __simple_xattr_set(xattrs, name, value, size, flags);
+}
+
+/*
+ * xattr REMOVE operation for in-memory/pseudo filesystems
+ */
+int

[PATCH v7 4/4] cgroup: rename subsys_bits to subsys_mask

2012-08-23 Thread aris

In a previous discussion, Tejun Heo suggested to rename references to
subsys_bits (added_bits, removed_bits, etc) by something more meaningful.

Cc: Li Zefan 
Cc: Tejun Heo 
Cc: Hugh Dickins 
Cc: Hillf Danton 
Cc: Lennart Poettering 
Signed-off-by: Aristeu Rozanski 

---
 kernel/cgroup.c |   84 
 1 file changed, 42 insertions(+), 42 deletions(-)

Index: github/kernel/cgroup.c
===
--- github.orig/kernel/cgroup.c 2012-08-16 11:33:43.276337899 -0400
+++ github/kernel/cgroup.c  2012-08-16 11:33:43.292338339 -0400
@@ -111,13 +111,13 @@
 * The bitmask of subsystems intended to be attached to this
 * hierarchy
 */
-   unsigned long subsys_bits;
+   unsigned long subsys_mask;
 
/* Unique id for this hierarchy. */
int hierarchy_id;
 
/* The bitmask of subsystems currently attached to this hierarchy */
-   unsigned long actual_subsys_bits;
+   unsigned long actual_subsys_mask;
 
/* A list running through the attached subsystems */
struct list_head subsys_list;
@@ -557,7 +557,7 @@
 * won't change, so no need for locking.
 */
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-   if (root->subsys_bits & (1UL << i)) {
+   if (root->subsys_mask & (1UL << i)) {
/* Subsystem is in this hierarchy. So we want
 * the subsystem state from the new
 * cgroup */
@@ -1002,7 +1002,7 @@
struct dentry *parent;
struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
 
-   cgroup_clear_directory(dentry, true, root->subsys_bits);
+   cgroup_clear_directory(dentry, true, root->subsys_mask);
 
parent = dentry->d_parent;
spin_lock(>d_lock);
@@ -1046,22 +1046,22 @@
  * returns an error, no reference counts are touched.
  */
 static int rebind_subsystems(struct cgroupfs_root *root,
- unsigned long final_bits)
+ unsigned long final_subsys_mask)
 {
-   unsigned long added_bits, removed_bits;
+   unsigned long added_mask, removed_mask;
struct cgroup *cgrp = >top_cgroup;
int i;
 
BUG_ON(!mutex_is_locked(_mutex));
BUG_ON(!mutex_is_locked(_root_mutex));
 
-   removed_bits = root->actual_subsys_bits & ~final_bits;
-   added_bits = final_bits & ~root->actual_subsys_bits;
+   removed_mask = root->actual_subsys_mask & ~final_subsys_mask;
+   added_mask = final_subsys_mask & ~root->actual_subsys_mask;
/* Check that any added subsystems are currently free */
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
unsigned long bit = 1UL << i;
struct cgroup_subsys *ss = subsys[i];
-   if (!(bit & added_bits))
+   if (!(bit & added_mask))
continue;
/*
 * Nobody should tell us to do a subsys that doesn't exist:
@@ -1086,7 +1086,7 @@
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
unsigned long bit = 1UL << i;
-   if (bit & added_bits) {
+   if (bit & added_mask) {
/* We're binding this subsystem to this hierarchy */
BUG_ON(ss == NULL);
BUG_ON(cgrp->subsys[i]);
@@ -1099,7 +1099,7 @@
if (ss->bind)
ss->bind(cgrp);
/* refcount was already taken, and we're keeping it */
-   } else if (bit & removed_bits) {
+   } else if (bit & removed_mask) {
/* We're removing this subsystem */
BUG_ON(ss == NULL);
BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
@@ -1112,7 +1112,7 @@
list_move(>sibling, _list);
/* subsystem is now free - drop reference on module */
module_put(ss->module);
-   } else if (bit & final_bits) {
+   } else if (bit & final_subsys_mask) {
/* Subsystem state should already exist */
BUG_ON(ss == NULL);
BUG_ON(!cgrp->subsys[i]);
@@ -1129,7 +1129,7 @@
BUG_ON(cgrp->subsys[i]);
}
}
-   root->subsys_bits = root->actual_subsys_bits = final_bits;
+   root->subsys_mask = root->actual_subsys_mask = final_subsys_mask;
synchronize_rcu();
 
return 0;
@@ -1158,7 +1158,7 @@
 }
 
 struct cgroup_sb_opts {
-   unsigned long subsys_bits;
+   unsigned long subsys_mask;
unsigned long flags;
char *release_agent;
bool clone_children;
@@ -1267,7 +1267,7 @@
/* Mutually

[PATCH v7 0/4] cgroup: add xattr support

2012-08-23 Thread aris

This series are a refreshed version of a patchset submitted by Li Zefan back
in march:
https://lkml.org/lkml/2012/3/1/13

With Li's permission, I refreshed the patches to apply over the latest upstream
and added the modifications suggested by others in the thread:
- using a mount option instead of config option to enable the xattr support
- reinitialize the list in kmem_xattrs_free()
- renamed functions to simple_xattr_*()

There're two users for this patchset:
- SELinux: to be able to control access to cgroupfs inside containers
- systemd: to store meta information such as main PID in a service cgroup,
  set specific services special options in the cgroup.

While the xattrs will use kernel memory like tmpfs, they're restricted to
'security' (which controls the format of the value) and 'trusted' (which
requires CAP_SYS_ADMIN). If kernel memory usage is still a concern, we're
not far from having memcg account for kernel memory.

v7:
- fix checkpatch.pl warnings
- Implement the changes requested by Hugh Dickins:
- make simple_xattrs_init and simple_xattrs_free inline
- get rid of locking and list reinitialization in simple_xattrs_free,
  they're not needed
v6:
- only allow trusted and security
- replace subsys_bits by something more meaningful
v5:
- check for permissions for user xattr namespace
v4:
- implemented requested changes by Tejun Heo in patch #2

Cc: Li Zefan 
Cc: Tejun Heo 
Cc: Hugh Dickins 
Cc: Hillf Danton 
Cc: Lennart Poettering 
Signed-off-by: Li Zefan 
Signed-off-by: Aristeu Rozanski 

-- 
Aristeu
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v7 0/4] cgroup: add xattr support

2012-08-23 Thread aris

This series are a refreshed version of a patchset submitted by Li Zefan back
in march:
https://lkml.org/lkml/2012/3/1/13

With Li's permission, I refreshed the patches to apply over the latest upstream
and added the modifications suggested by others in the thread:
- using a mount option instead of config option to enable the xattr support
- reinitialize the list in kmem_xattrs_free()
- renamed functions to simple_xattr_*()

There're two users for this patchset:
- SELinux: to be able to control access to cgroupfs inside containers
- systemd: to store meta information such as main PID in a service cgroup,
  set specific services special options in the cgroup.

While the xattrs will use kernel memory like tmpfs, they're restricted to
'security' (which controls the format of the value) and 'trusted' (which
requires CAP_SYS_ADMIN). If kernel memory usage is still a concern, we're
not far from having memcg account for kernel memory.

v7:
- fix checkpatch.pl warnings
- Implement the changes requested by Hugh Dickins:
- make simple_xattrs_init and simple_xattrs_free inline
- get rid of locking and list reinitialization in simple_xattrs_free,
  they're not needed
v6:
- only allow trusted and security
- replace subsys_bits by something more meaningful
v5:
- check for permissions for user xattr namespace
v4:
- implemented requested changes by Tejun Heo in patch #2

Cc: Li Zefan lize...@huawei.com
Cc: Tejun Heo t...@kernel.org
Cc: Hugh Dickins hu...@google.com
Cc: Hillf Danton dhi...@gmail.com
Cc: Lennart Poettering lpoet...@redhat.com
Signed-off-by: Li Zefan lize...@huawei.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

-- 
Aristeu
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v7 4/4] cgroup: rename subsys_bits to subsys_mask

2012-08-23 Thread aris

In a previous discussion, Tejun Heo suggested to rename references to
subsys_bits (added_bits, removed_bits, etc) by something more meaningful.

Cc: Li Zefan lize...@huawei.com
Cc: Tejun Heo t...@kernel.org
Cc: Hugh Dickins hu...@google.com
Cc: Hillf Danton dhi...@gmail.com
Cc: Lennart Poettering lpoet...@redhat.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 kernel/cgroup.c |   84 
 1 file changed, 42 insertions(+), 42 deletions(-)

Index: github/kernel/cgroup.c
===
--- github.orig/kernel/cgroup.c 2012-08-16 11:33:43.276337899 -0400
+++ github/kernel/cgroup.c  2012-08-16 11:33:43.292338339 -0400
@@ -111,13 +111,13 @@
 * The bitmask of subsystems intended to be attached to this
 * hierarchy
 */
-   unsigned long subsys_bits;
+   unsigned long subsys_mask;
 
/* Unique id for this hierarchy. */
int hierarchy_id;
 
/* The bitmask of subsystems currently attached to this hierarchy */
-   unsigned long actual_subsys_bits;
+   unsigned long actual_subsys_mask;
 
/* A list running through the attached subsystems */
struct list_head subsys_list;
@@ -557,7 +557,7 @@
 * won't change, so no need for locking.
 */
for (i = 0; i  CGROUP_SUBSYS_COUNT; i++) {
-   if (root-subsys_bits  (1UL  i)) {
+   if (root-subsys_mask  (1UL  i)) {
/* Subsystem is in this hierarchy. So we want
 * the subsystem state from the new
 * cgroup */
@@ -1002,7 +1002,7 @@
struct dentry *parent;
struct cgroupfs_root *root = dentry-d_sb-s_fs_info;
 
-   cgroup_clear_directory(dentry, true, root-subsys_bits);
+   cgroup_clear_directory(dentry, true, root-subsys_mask);
 
parent = dentry-d_parent;
spin_lock(parent-d_lock);
@@ -1046,22 +1046,22 @@
  * returns an error, no reference counts are touched.
  */
 static int rebind_subsystems(struct cgroupfs_root *root,
- unsigned long final_bits)
+ unsigned long final_subsys_mask)
 {
-   unsigned long added_bits, removed_bits;
+   unsigned long added_mask, removed_mask;
struct cgroup *cgrp = root-top_cgroup;
int i;
 
BUG_ON(!mutex_is_locked(cgroup_mutex));
BUG_ON(!mutex_is_locked(cgroup_root_mutex));
 
-   removed_bits = root-actual_subsys_bits  ~final_bits;
-   added_bits = final_bits  ~root-actual_subsys_bits;
+   removed_mask = root-actual_subsys_mask  ~final_subsys_mask;
+   added_mask = final_subsys_mask  ~root-actual_subsys_mask;
/* Check that any added subsystems are currently free */
for (i = 0; i  CGROUP_SUBSYS_COUNT; i++) {
unsigned long bit = 1UL  i;
struct cgroup_subsys *ss = subsys[i];
-   if (!(bit  added_bits))
+   if (!(bit  added_mask))
continue;
/*
 * Nobody should tell us to do a subsys that doesn't exist:
@@ -1086,7 +1086,7 @@
for (i = 0; i  CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
unsigned long bit = 1UL  i;
-   if (bit  added_bits) {
+   if (bit  added_mask) {
/* We're binding this subsystem to this hierarchy */
BUG_ON(ss == NULL);
BUG_ON(cgrp-subsys[i]);
@@ -1099,7 +1099,7 @@
if (ss-bind)
ss-bind(cgrp);
/* refcount was already taken, and we're keeping it */
-   } else if (bit  removed_bits) {
+   } else if (bit  removed_mask) {
/* We're removing this subsystem */
BUG_ON(ss == NULL);
BUG_ON(cgrp-subsys[i] != dummytop-subsys[i]);
@@ -1112,7 +1112,7 @@
list_move(ss-sibling, rootnode.subsys_list);
/* subsystem is now free - drop reference on module */
module_put(ss-module);
-   } else if (bit  final_bits) {
+   } else if (bit  final_subsys_mask) {
/* Subsystem state should already exist */
BUG_ON(ss == NULL);
BUG_ON(!cgrp-subsys[i]);
@@ -1129,7 +1129,7 @@
BUG_ON(cgrp-subsys[i]);
}
}
-   root-subsys_bits = root-actual_subsys_bits = final_bits;
+   root-subsys_mask = root-actual_subsys_mask = final_subsys_mask;
synchronize_rcu();
 
return 0;
@@ -1158,7 +1158,7 @@
 }
 
 struct cgroup_sb_opts {
-   unsigned long subsys_bits;
+   unsigned long subsys_mask;
unsigned long flags;
char *release_agent;

[PATCH v7 1/4] xattr: extract simple_xattr code from tmpfs

2012-08-23 Thread aris

From: Li Zefan lize...@huawei.com

Extract in-memory xattr APIs from tmpfs. Will be used by cgroup.

$ size vmlinux.o
   textdata bss dec hex filename
4658782  880729 5195032 10734543 a3cbcf vmlinux.o
$ size vmlinux.o
   textdata bss dec hex filename
4658957  880729 5195032 10734718 a3cc7e vmlinux.o

v7:
- checkpatch warnings fixed
- Implement the changes requested by Hugh Dickins:
- make simple_xattrs_init and simple_xattrs_free inline
- get rid of locking and list reinitialization in simple_xattrs_free,
  they're not needed
v6:
- no changes
v5:
- no changes
v4:
- move simple_xattrs_free() to fs/xattr.c
v3:
- in kmem_xattrs_free(), reinitialize the list
- use simple_xattr_* prefix
- introduce simple_xattr_add() to prevent direct list usage

Cc: Li Zefan lize...@huawei.com
Cc: Tejun Heo t...@kernel.org
Cc: Hugh Dickins hu...@google.com
Cc: Hillf Danton dhi...@gmail.com
Cc: Lennart Poettering lpoet...@redhat.com
Signed-off-by: Li Zefan lize...@huawei.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 fs/xattr.c   |  167 +
 include/linux/shmem_fs.h |3 
 include/linux/xattr.h|   48 +
 mm/shmem.c   |  171 +++
 4 files changed, 230 insertions(+), 159 deletions(-)

Index: github/fs/xattr.c
===
--- github.orig/fs/xattr.c  2012-08-23 15:43:44.305129707 -0400
+++ github/fs/xattr.c   2012-08-23 15:43:45.477161661 -0400
@@ -791,3 +791,170 @@
 EXPORT_SYMBOL(generic_listxattr);
 EXPORT_SYMBOL(generic_setxattr);
 EXPORT_SYMBOL(generic_removexattr);
+
+/*
+ * Allocate new xattr and copy in the value; but leave the name to callers.
+ */
+struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
+{
+   struct simple_xattr *new_xattr;
+   size_t len;
+
+   /* wrap around? */
+   len = sizeof(*new_xattr) + size;
+   if (len = sizeof(*new_xattr))
+   return NULL;
+
+   new_xattr = kmalloc(len, GFP_KERNEL);
+   if (!new_xattr)
+   return NULL;
+
+   new_xattr-size = size;
+   memcpy(new_xattr-value, value, size);
+   return new_xattr;
+}
+
+/*
+ * xattr GET operation for in-memory/pseudo filesystems
+ */
+int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
+void *buffer, size_t size)
+{
+   struct simple_xattr *xattr;
+   int ret = -ENODATA;
+
+   spin_lock(xattrs-lock);
+   list_for_each_entry(xattr, xattrs-head, list) {
+   if (strcmp(name, xattr-name))
+   continue;
+
+   ret = xattr-size;
+   if (buffer) {
+   if (size  xattr-size)
+   ret = -ERANGE;
+   else
+   memcpy(buffer, xattr-value, xattr-size);
+   }
+   break;
+   }
+   spin_unlock(xattrs-lock);
+   return ret;
+}
+
+static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
+ const void *value, size_t size, int flags)
+{
+   struct simple_xattr *xattr;
+   struct simple_xattr *new_xattr = NULL;
+   int err = 0;
+
+   /* value == NULL means remove */
+   if (value) {
+   new_xattr = simple_xattr_alloc(value, size);
+   if (!new_xattr)
+   return -ENOMEM;
+
+   new_xattr-name = kstrdup(name, GFP_KERNEL);
+   if (!new_xattr-name) {
+   kfree(new_xattr);
+   return -ENOMEM;
+   }
+   }
+
+   spin_lock(xattrs-lock);
+   list_for_each_entry(xattr, xattrs-head, list) {
+   if (!strcmp(name, xattr-name)) {
+   if (flags  XATTR_CREATE) {
+   xattr = new_xattr;
+   err = -EEXIST;
+   } else if (new_xattr) {
+   list_replace(xattr-list, new_xattr-list);
+   } else {
+   list_del(xattr-list);
+   }
+   goto out;
+   }
+   }
+   if (flags  XATTR_REPLACE) {
+   xattr = new_xattr;
+   err = -ENODATA;
+   } else {
+   list_add(new_xattr-list, xattrs-head);
+   xattr = NULL;
+   }
+out:
+   spin_unlock(xattrs-lock);
+   if (xattr) {
+   kfree(xattr-name);
+   kfree(xattr);
+   }
+   return err;
+
+}
+
+/*
+ * xattr SET operation for in-memory/pseudo filesystems
+ */
+int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
+const void *value, size_t size, int flags)
+{
+   if (size == 0)
+   value = ; /* empty EA,

[PATCH v7 2/4] cgroup: revise how we re-populate root directory

2012-08-23 Thread aris

From: Li Zefan lize...@huawei.com

When remounting cgroupfs with some subsystems added to it and some
removed, cgroup will remove all the files in root directory and then
re-popluate it.

What I'm doing here is, only remove files which belong to subsystems that
are to be unbinded, and only create files for newly-added subsystems.
The purpose is to have all other files untouched.

This is a preparation for cgroup xattr support.

v7:
- checkpatch warnings fixed
v6:
- no changes
v5:
- no changes
v4:
- refactored cgroup_clear_directory() to not use cgroup_rm_file()
- instead of going thru the list of files, get the file list using the
  subsystems
- use 'subsys_mask' instead of {added,removed}_bits and made
  cgroup_populate_dir() to match the parameters with cgroup_clear_directory()
v3:
- refresh patches after recent refactoring

Cc: Li Zefan lize...@huawei.com
Cc: Tejun Heo t...@kernel.org
Cc: Hugh Dickins hu...@google.com
Cc: Hillf Danton dhi...@gmail.com
Cc: Lennart Poettering lpoet...@redhat.com
Signed-off-by: Li Zefan lize...@huawei.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 kernel/cgroup.c |   61 
 1 file changed, 48 insertions(+), 13 deletions(-)

Index: github/kernel/cgroup.c
===
--- github.orig/kernel/cgroup.c 2012-08-23 15:46:12.557171590 -0400
+++ github/kernel/cgroup.c  2012-08-23 15:48:33.577016061 -0400
@@ -824,7 +824,8 @@
 static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t 
mode);
 static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned 
int);
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
-static int cgroup_populate_dir(struct cgroup *cgrp);
+static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
+  unsigned long subsys_mask);
 static const struct inode_operations cgroup_dir_inode_operations;
 static const struct file_operations proc_cgroupstats_operations;
 
@@ -963,12 +964,29 @@
return -ENOENT;
 }
 
-static void cgroup_clear_directory(struct dentry *dir)
+/**
+ * cgroup_clear_directory - selective removal of base and subsystem files
+ * @dir: directory containing the files
+ * @base_files: true if the base files should be removed
+ * @subsys_mask: mask of the subsystem ids whose files should be removed
+ */
+static void cgroup_clear_directory(struct dentry *dir, bool base_files,
+  unsigned long subsys_mask)
 {
struct cgroup *cgrp = __d_cgrp(dir);
+   struct cgroup_subsys *ss;
 
-   while (!list_empty(cgrp-files))
-   cgroup_rm_file(cgrp, NULL);
+   for_each_subsys(cgrp-root, ss) {
+   struct cftype_set *set;
+   if (!test_bit(ss-subsys_id, subsys_mask))
+   continue;
+   list_for_each_entry(set, ss-cftsets, node)
+   cgroup_rm_file(cgrp, set-cfts);
+   }
+   if (base_files) {
+   while (!list_empty(cgrp-files))
+   cgroup_rm_file(cgrp, NULL);
+   }
 }
 
 /*
@@ -977,8 +995,9 @@
 static void cgroup_d_remove_dir(struct dentry *dentry)
 {
struct dentry *parent;
+   struct cgroupfs_root *root = dentry-d_sb-s_fs_info;
 
-   cgroup_clear_directory(dentry);
+   cgroup_clear_directory(dentry, true, root-subsys_bits);
 
parent = dentry-d_parent;
spin_lock(parent-d_lock);
@@ -1339,6 +1358,7 @@
struct cgroupfs_root *root = sb-s_fs_info;
struct cgroup *cgrp = root-top_cgroup;
struct cgroup_sb_opts opts;
+   unsigned long added_bits, removed_bits;
 
mutex_lock(cgrp-dentry-d_inode-i_mutex);
mutex_lock(cgroup_mutex);
@@ -1354,6 +1374,9 @@
pr_warning(cgroup: option changes via remount are deprecated 
(pid=%d comm=%s)\n,
   task_tgid_nr(current), current-comm);
 
+   added_bits = opts.subsys_bits  ~root-subsys_bits;
+   removed_bits = root-subsys_bits  ~opts.subsys_bits;
+
/* Don't allow flags or name to change at remount */
if (opts.flags != root-flags ||
(opts.name  strcmp(opts.name, root-name))) {
@@ -1369,8 +1392,9 @@
}
 
/* clear out any existing files and repopulate subsystem files */
-   cgroup_clear_directory(cgrp-dentry);
-   cgroup_populate_dir(cgrp);
+   cgroup_clear_directory(cgrp-dentry, false, removed_bits);
+   /* re-populate subsystem files */
+   cgroup_populate_dir(cgrp, false, added_bits);
 
if (opts.release_agent)
strcpy(root-release_agent_path, opts.release_agent);
@@ -1669,7 +1693,7 @@
BUG_ON(root-number_of_cgroups != 1);
 
cred = override_creds(init_cred);
-   cgroup_populate_dir(root_cgrp);
+   cgroup_populate_dir(root_cgrp, true, root-subsys_bits);

[PATCH v7 3/4] cgroup: add xattr support

2012-08-23 Thread aris

From: Li Zefan lize...@huawei.com

This is one of the items in the plumber's wish list.

For use cases:

 What would the use case be for this?

 Attaching meta information to services, in an easily discoverable
 way. For example, in systemd we create one cgroup for each service, and
 could then store data like the main pid of the specific service as an
 xattr on the cgroup itself. That way we'd have almost all service state
 in the cgroupfs, which would make it possible to terminate systemd and
 later restart it without losing any state information. But there's more:
 for example, some very peculiar services cannot be terminated on
 shutdown (i.e. fakeraid DM stuff) and it would be really nice if the
 services in question could just mark that on their cgroup, by setting an
 xattr. On the more desktopy side of things there are other
 possibilities: for example there are plans defining what an application
 is along the lines of a cgroup (i.e. an app being a collection of
 processes). With xattrs one could then attach an icon or human readable
 program name on the cgroup.

 The key idea is that this would allow attaching runtime meta information
 to cgroups and everything they model (services, apps, vms), that doesn't
 need any complex userspace infrastructure, has good access control
 (i.e. because the file system enforces that anyway, and there's the
 trusted. xattr namespace), notifications (inotify), and can easily be
 shared among applications.

 Lennart

v7:
- no changes
v6:
- remove user xattr namespace, only allow trusted and security
v5:
- check for capabilities before setting/removing xattrs
v4:
- no changes
v3:
- instead of config option, use mount option to enable xattr support

Cc: Li Zefan lize...@huawei.com
Cc: Tejun Heo t...@kernel.org
Cc: Hugh Dickins hu...@google.com
Cc: Hillf Danton dhi...@gmail.com
Cc: Lennart Poettering lpoet...@redhat.com
Signed-off-by: Li Zefan lize...@huawei.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 include/linux/cgroup.h |   13 --
 kernel/cgroup.c|  100 +
 2 files changed, 103 insertions(+), 10 deletions(-)

Index: github/include/linux/cgroup.h
===
--- github.orig/include/linux/cgroup.h  2012-08-23 15:46:12.465169082 -0400
+++ github/include/linux/cgroup.h   2012-08-23 15:48:42.485258910 -0400
@@ -17,6 +17,7 @@
 #include linux/rwsem.h
 #include linux/idr.h
 #include linux/workqueue.h
+#include linux/xattr.h
 
 #ifdef CONFIG_CGROUPS
 
@@ -216,6 +217,9 @@
/* List of events which userspace want to receive */
struct list_head event_list;
spinlock_t event_list_lock;
+
+   /* directory xattrs */
+   struct simple_xattrs xattrs;
 };
 
 /*
@@ -309,6 +313,9 @@
/* CFTYPE_* flags */
unsigned int flags;
 
+   /* file xattrs */
+   struct simple_xattrs xattrs;
+
int (*open)(struct inode *inode, struct file *file);
ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
@@ -394,7 +401,7 @@
  */
 struct cftype_set {
struct list_headnode;   /* chained at subsys-cftsets */
-   const struct cftype *cfts;
+   struct cftype   *cfts;
 };
 
 struct cgroup_scanner {
@@ -406,8 +413,8 @@
void *data;
 };
 
-int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts);
-int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts);
+int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
+int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 
 int cgroup_is_removed(const struct cgroup *cgrp);
 
Index: github/kernel/cgroup.c
===
--- github.orig/kernel/cgroup.c 2012-08-23 15:48:33.577016061 -0400
+++ github/kernel/cgroup.c  2012-08-23 15:48:42.485258910 -0400
@@ -276,7 +276,8 @@
 
 /* bits in struct cgroupfs_root flags field */
 enum {
-   ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
+   ROOT_NOPREFIX,  /* mounted subsystems have no named prefix */
+   ROOT_XATTR, /* supports extended attributes */
 };
 
 static int cgroup_is_releasable(const struct cgroup *cgrp)
@@ -913,15 +914,19 @@
 */
BUG_ON(!list_empty(cgrp-pidlists));
 
+   simple_xattrs_free(cgrp-xattrs);
+
kfree_rcu(cgrp, rcu_head);
} else {
struct cfent *cfe = __d_cfe(dentry);
struct cgroup *cgrp = dentry-d_parent-d_fsdata;
+   struct cftype *cft = cfe-type;
 
WARN_ONCE(!list_empty(cfe-node) 
  cgrp != cgrp-root-top_cgroup,
  cfe still linked for %s\n, cfe-type-name);
kfree(cfe);
+   simple_xattrs_free(cft-xattrs);
}
iput(inode);
 }
@@

[PATCH v6 1/4] xattr: extract simple_xattr code from tmpfs

2012-08-16 Thread aris

From: Li Zefan 

Extract in-memory xattr APIs from tmpfs. Will be used by cgroup.

$ size vmlinux.o
   textdata bss dec hex filename
4658782  880729 5195032 10734543 a3cbcf vmlinux.o
$ size vmlinux.o
   textdata bss dec hex filename
4658957  880729 5195032 10734718 a3cc7e vmlinux.o

v6:
- no changes
v5:
- no changes
v4:
- move simple_xattrs_free() to fs/xattr.c
v3:
- in kmem_xattrs_free(), reinitialize the list
- use simple_xattr_* prefix
- introduce simple_xattr_add() to prevent direct list usage

Cc: Li Zefan 
Cc: Tejun Heo 
Cc: Hugh Dickins 
Cc: Hillf Danton 
Cc: Lennart Poettering 
Signed-off-by: Li Zefan 
Signed-off-by: Aristeu Rozanski 

---
 fs/xattr.c   |  200 +++
 include/linux/shmem_fs.h |3 
 include/linux/xattr.h|   25 +
 mm/shmem.c   |  171 +++-
 4 files changed, 240 insertions(+), 159 deletions(-)

Index: github/fs/xattr.c
===
--- github.orig/fs/xattr.c  2012-08-16 11:28:12.719273435 -0400
+++ github/fs/xattr.c   2012-08-16 11:28:13.975307743 -0400
@@ -791,3 +791,203 @@
 EXPORT_SYMBOL(generic_listxattr);
 EXPORT_SYMBOL(generic_setxattr);
 EXPORT_SYMBOL(generic_removexattr);
+
+/*
+ * initialize the simple_xattrs structure
+ */
+void simple_xattrs_init(struct simple_xattrs *xattrs)
+{
+   INIT_LIST_HEAD(>head);
+   spin_lock_init(>lock);
+}
+
+/*
+ * Allocate new xattr and copy in the value; but leave the name to callers.
+ */
+struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
+{
+   struct simple_xattr *new_xattr;
+   size_t len;
+
+   /* wrap around? */
+   len = sizeof(*new_xattr) + size;
+   if (len <= sizeof(*new_xattr))
+   return NULL;
+
+   new_xattr = kmalloc(len, GFP_KERNEL);
+   if (!new_xattr)
+   return NULL;
+
+   new_xattr->size = size;
+   memcpy(new_xattr->value, value, size);
+   return new_xattr;
+}
+
+/*
+ * free all the xattrs
+ */
+void simple_xattrs_free(struct simple_xattrs *xattrs)
+{
+   struct simple_xattr *xattr, *node;
+
+   spin_lock(>lock);
+   list_for_each_entry_safe(xattr, node, >head, list) {
+   kfree(xattr->name);
+   kfree(xattr);
+   }
+   INIT_LIST_HEAD(>head);
+   spin_unlock(>lock);
+}
+
+/*
+ * xattr GET operation for in-memory/pseudo filesystems
+ */
+int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
+void *buffer, size_t size)
+{
+   struct simple_xattr *xattr;
+   int ret = -ENODATA;
+
+   spin_lock(>lock);
+   list_for_each_entry(xattr, >head, list) {
+   if (strcmp(name, xattr->name))
+   continue;
+
+   ret = xattr->size;
+   if (buffer) {
+   if (size < xattr->size)
+   ret = -ERANGE;
+   else
+   memcpy(buffer, xattr->value, xattr->size);
+   }
+   break;
+   }
+   spin_unlock(>lock);
+   return ret;
+}
+
+static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
+ const void *value, size_t size, int flags)
+{
+   struct simple_xattr *xattr;
+   struct simple_xattr *new_xattr = NULL;
+   size_t len;
+   int err = 0;
+
+   /* value == NULL means remove */
+   if (value) {
+   /* wrap around? */
+   len = sizeof(*new_xattr) + size;
+   if (len <= sizeof(*new_xattr))
+   return -ENOMEM;
+
+   new_xattr = kmalloc(len, GFP_KERNEL);
+   if (!new_xattr)
+   return -ENOMEM;
+
+   new_xattr->name = kstrdup(name, GFP_KERNEL);
+   if (!new_xattr->name) {
+   kfree(new_xattr);
+   return -ENOMEM;
+   }
+
+   new_xattr->size = size;
+   memcpy(new_xattr->value, value, size);
+   }
+
+   spin_lock(>lock);
+   list_for_each_entry(xattr, >head, list) {
+   if (!strcmp(name, xattr->name)) {
+   if (flags & XATTR_CREATE) {
+   xattr = new_xattr;
+   err = -EEXIST;
+   } else if (new_xattr) {
+   list_replace(>list, _xattr->list);
+   } else {
+   list_del(>list);
+   }
+   goto out;
+   }
+   }
+   if (flags & XATTR_REPLACE) {
+   xattr = new_xattr;
+   err = -ENODATA;
+   } else {
+   list_add(_xattr->list, >head);
+   xattr = NULL;
+   }
+out:
+   spin_unlock(>lock);
+   if

[PATCH v6 2/4] cgroup: revise how we re-populate root directory

2012-08-16 Thread aris

From: Li Zefan 

When remounting cgroupfs with some subsystems added to it and some
removed, cgroup will remove all the files in root directory and then
re-popluate it.

What I'm doing here is, only remove files which belong to subsystems that
are to be unbinded, and only create files for newly-added subsystems.
The purpose is to have all other files untouched.

This is a preparation for cgroup xattr support.

v6:
- no changes
v5:
- no changes
v4:
- refactored cgroup_clear_directory() to not use cgroup_rm_file()
- instead of going thru the list of files, get the file list using the
  subsystems
- use 'subsys_mask' instead of {added,removed}_bits and made
  cgroup_populate_dir() to match the parameters with cgroup_clear_directory()
v3:
- refresh patches after recent refactoring

Cc: Li Zefan 
Cc: Tejun Heo 
Cc: Hugh Dickins 
Cc: Hillf Danton 
Cc: Lennart Poettering 
Signed-off-by: Li Zefan 
Signed-off-by: Aristeu Rozanski 

---
 kernel/cgroup.c |   61 
 1 file changed, 48 insertions(+), 13 deletions(-)

Index: github/kernel/cgroup.c
===
--- github.orig/kernel/cgroup.c 2012-08-16 11:28:12.683272452 -0400
+++ github/kernel/cgroup.c  2012-08-16 11:28:42.592089681 -0400
@@ -824,7 +824,8 @@
 static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t 
mode);
 static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned 
int);
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
-static int cgroup_populate_dir(struct cgroup *cgrp);
+static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
+  unsigned long subsys_mask);
 static const struct inode_operations cgroup_dir_inode_operations;
 static const struct file_operations proc_cgroupstats_operations;
 
@@ -963,12 +964,29 @@
return -ENOENT;
 }
 
-static void cgroup_clear_directory(struct dentry *dir)
+/**
+ * cgroup_clear_directory - selective removal of base and subsystem files
+ * @dir: directory containing the files
+ * @base_files: true if the base files should be removed
+ * @subsys_mask: mask of the subsystem ids whose files should be removed
+ */
+static void cgroup_clear_directory(struct dentry *dir, bool base_files,
+  unsigned long subsys_mask)
 {
struct cgroup *cgrp = __d_cgrp(dir);
+   struct cgroup_subsys *ss;
 
-   while (!list_empty(>files))
-   cgroup_rm_file(cgrp, NULL);
+   for_each_subsys(cgrp->root, ss) {
+   struct cftype_set *set;
+   if (!test_bit(ss->subsys_id, _mask))
+   continue;
+   list_for_each_entry(set, >cftsets, node)
+   cgroup_rm_file(cgrp, set->cfts);
+   }
+   if (base_files) {
+   while (!list_empty(>files))
+   cgroup_rm_file(cgrp, NULL);
+   }
 }
 
 /*
@@ -977,8 +995,9 @@
 static void cgroup_d_remove_dir(struct dentry *dentry)
 {
struct dentry *parent;
+   struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
 
-   cgroup_clear_directory(dentry);
+   cgroup_clear_directory(dentry, true, root->subsys_bits);
 
parent = dentry->d_parent;
spin_lock(>d_lock);
@@ -1339,6 +1358,7 @@
struct cgroupfs_root *root = sb->s_fs_info;
struct cgroup *cgrp = >top_cgroup;
struct cgroup_sb_opts opts;
+   unsigned long added_bits, removed_bits;
 
mutex_lock(>dentry->d_inode->i_mutex);
mutex_lock(_mutex);
@@ -1354,6 +1374,9 @@
pr_warning("cgroup: option changes via remount are deprecated 
(pid=%d comm=%s)\n",
   task_tgid_nr(current), current->comm);
 
+   added_bits = opts.subsys_bits & ~root->subsys_bits;
+   removed_bits = root->subsys_bits & ~opts.subsys_bits;
+
/* Don't allow flags or name to change at remount */
if (opts.flags != root->flags ||
(opts.name && strcmp(opts.name, root->name))) {
@@ -1369,8 +1392,9 @@
}
 
/* clear out any existing files and repopulate subsystem files */
-   cgroup_clear_directory(cgrp->dentry);
-   cgroup_populate_dir(cgrp);
+   cgroup_clear_directory(cgrp->dentry, false, removed_bits);
+   /* re-populate subsystem files */
+   cgroup_populate_dir(cgrp, false, added_bits);
 
if (opts.release_agent)
strcpy(root->release_agent_path, opts.release_agent);
@@ -1669,7 +1693,7 @@
BUG_ON(root->number_of_cgroups != 1);
 
cred = override_creds(_cred);
-   cgroup_populate_dir(root_cgrp);
+   cgroup_populate_dir(root_cgrp, true, root->subsys_bits);
revert_creds(cred);
mutex_unlock(_root_mutex);
mutex_unlock(_mutex);
@@ -3843,18 +3867,29 @@
{ } /* terminate */
 };
 
-static int

[PATCH v6 0/4] cgroup: add xattr support

2012-08-16 Thread aris

This series are a refreshed version of a patchset submitted by Li Zefan back
in march:
https://lkml.org/lkml/2012/3/1/13

With Li's permission, I refreshed the patches to apply over the latest upstream
and added the modifications suggested by others in the thread:
- using a mount option instead of config option to enable the xattr support
- reinitialize the list in kmem_xattrs_free()
- renamed functions to simple_xattr_*()

There're two users for this patchset:
- SELinux: to be able to control access to cgroupfs inside containers
- systemd: to store meta information such as main PID in a service cgroup,
  set specific services special options in the cgroup.

While the xattrs will use kernel memory like tmpfs, they're restricted to
'security' (which controls the format of the value) and 'trusted' (which
requires CAP_SYS_ADMIN). If kernel memory usage is still a concern, we're
not far from having memcg account for kernel memory.

v6:
- only allow trusted and security
- replace subsys_bits by something more meaningful
v5:
- check for permissions for user xattr namespace
v4:
- implemented requested changes by Tejun Heo in patch #2

Cc: Li Zefan 
Cc: Tejun Heo 
Cc: Hugh Dickins 
Cc: Hillf Danton 
Cc: Lennart Poettering 
Signed-off-by: Li Zefan 
Signed-off-by: Aristeu Rozanski 

-- 
Aristeu
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v6 3/4] cgroup: add xattr support

2012-08-16 Thread aris

From: Li Zefan 

This is one of the items in the plumber's wish list.

For use cases:

>> What would the use case be for this?
>
> Attaching meta information to services, in an easily discoverable
> way. For example, in systemd we create one cgroup for each service, and
> could then store data like the main pid of the specific service as an
> xattr on the cgroup itself. That way we'd have almost all service state
> in the cgroupfs, which would make it possible to terminate systemd and
> later restart it without losing any state information. But there's more:
> for example, some very peculiar services cannot be terminated on
> shutdown (i.e. fakeraid DM stuff) and it would be really nice if the
> services in question could just mark that on their cgroup, by setting an
> xattr. On the more desktopy side of things there are other
> possibilities: for example there are plans defining what an application
> is along the lines of a cgroup (i.e. an app being a collection of
> processes). With xattrs one could then attach an icon or human readable
> program name on the cgroup.
>
> The key idea is that this would allow attaching runtime meta information
> to cgroups and everything they model (services, apps, vms), that doesn't
> need any complex userspace infrastructure, has good access control
> (i.e. because the file system enforces that anyway, and there's the
> "trusted." xattr namespace), notifications (inotify), and can easily be
> shared among applications.
>
> Lennart

v6:
- remove user xattr namespace, only allow trusted and security
v5:
- check for capabilities before setting/removing xattrs
v4:
- no changes
v3:
- instead of config option, use mount option to enable xattr support

Cc: Li Zefan 
Cc: Tejun Heo 
Cc: Hugh Dickins 
Cc: Hillf Danton 
Cc: Lennart Poettering 
Signed-off-by: Li Zefan 
Signed-off-by: Aristeu Rozanski 

---
 include/linux/cgroup.h |   13 --
 kernel/cgroup.c|  100 +
 2 files changed, 103 insertions(+), 10 deletions(-)

Index: github/include/linux/cgroup.h
===
--- github.orig/include/linux/cgroup.h  2012-08-16 10:24:50.0 -0400
+++ github/include/linux/cgroup.h   2012-08-16 10:27:53.975223786 -0400
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_CGROUPS
 
@@ -216,6 +217,9 @@
/* List of events which userspace want to receive */
struct list_head event_list;
spinlock_t event_list_lock;
+
+   /* directory xattrs */
+   struct simple_xattrs xattrs;
 };
 
 /*
@@ -309,6 +313,9 @@
/* CFTYPE_* flags */
unsigned int flags;
 
+   /* file xattrs */
+   struct simple_xattrs xattrs;
+
int (*open)(struct inode *inode, struct file *file);
ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
@@ -394,7 +401,7 @@
  */
 struct cftype_set {
struct list_headnode;   /* chained at subsys->cftsets */
-   const struct cftype *cfts;
+   struct cftype   *cfts;
 };
 
 struct cgroup_scanner {
@@ -406,8 +413,8 @@
void *data;
 };
 
-int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts);
-int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts);
+int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
+int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 
 int cgroup_is_removed(const struct cgroup *cgrp);
 
Index: github/kernel/cgroup.c
===
--- github.orig/kernel/cgroup.c 2012-08-16 10:27:45.0 -0400
+++ github/kernel/cgroup.c  2012-08-16 11:10:37.470765933 -0400
@@ -276,7 +276,8 @@
 
 /* bits in struct cgroupfs_root flags field */
 enum {
-   ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
+   ROOT_NOPREFIX,  /* mounted subsystems have no named prefix */
+   ROOT_XATTR, /* supports extended attributes */
 };
 
 static int cgroup_is_releasable(const struct cgroup *cgrp)
@@ -913,15 +914,19 @@
 */
BUG_ON(!list_empty(>pidlists));
 
+   simple_xattrs_free(>xattrs);
+
kfree_rcu(cgrp, rcu_head);
} else {
struct cfent *cfe = __d_cfe(dentry);
struct cgroup *cgrp = dentry->d_parent->d_fsdata;
+   struct cftype *cft = cfe->type;
 
WARN_ONCE(!list_empty(>node) &&
  cgrp != >root->top_cgroup,
  "cfe still linked for %s\n", cfe->type->name);
kfree(cfe);
+   simple_xattrs_free(>xattrs);
}
iput(inode);
 }
@@ -1140,6 +1145,8 @@
seq_printf(seq, ",%s", ss->name);
if (test_bit(ROOT_NOPREFIX, >flags))
seq_puts(seq, ",noprefix");
+   if (test_bit(ROOT_XATTR,

[PATCH v6 4/4] cgroup: rename subsys_bits to subsys_mask

2012-08-16 Thread aris

In a previous discussion, Tejun Heo suggested to rename references to
subsys_bits (added_bits, removed_bits, etc) by something more meaningful.

Cc: Li Zefan 
Cc: Tejun Heo 
Cc: Hugh Dickins 
Cc: Hillf Danton 
Cc: Lennart Poettering 
Signed-off-by: Aristeu Rozanski subsys_bits & (1UL << i)) {
+   if (root->subsys_mask & (1UL << i)) {
/* Subsystem is in this hierarchy. So we want
 * the subsystem state from the new
 * cgroup */
@@ -1002,7 +1002,7 @@
struct dentry *parent;
struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
 
-   cgroup_clear_directory(dentry, true, root->subsys_bits);
+   cgroup_clear_directory(dentry, true, root->subsys_mask);
 
parent = dentry->d_parent;
spin_lock(>d_lock);
@@ -1046,22 +1046,22 @@
  * returns an error, no reference counts are touched.
  */
 static int rebind_subsystems(struct cgroupfs_root *root,
- unsigned long final_bits)
+ unsigned long final_subsys_mask)
 {
-   unsigned long added_bits, removed_bits;
+   unsigned long added_mask, removed_mask;
struct cgroup *cgrp = >top_cgroup;
int i;
 
BUG_ON(!mutex_is_locked(_mutex));
BUG_ON(!mutex_is_locked(_root_mutex));
 
-   removed_bits = root->actual_subsys_bits & ~final_bits;
-   added_bits = final_bits & ~root->actual_subsys_bits;
+   removed_mask = root->actual_subsys_mask & ~final_subsys_mask;
+   added_mask = final_subsys_mask & ~root->actual_subsys_mask;
/* Check that any added subsystems are currently free */
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
unsigned long bit = 1UL << i;
struct cgroup_subsys *ss = subsys[i];
-   if (!(bit & added_bits))
+   if (!(bit & added_mask))
continue;
/*
 * Nobody should tell us to do a subsys that doesn't exist:
@@ -1086,7 +1086,7 @@
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
unsigned long bit = 1UL << i;
-   if (bit & added_bits) {
+   if (bit & added_mask) {
/* We're binding this subsystem to this hierarchy */
BUG_ON(ss == NULL);
BUG_ON(cgrp->subsys[i]);
@@ -1099,7 +1099,7 @@
if (ss->bind)
ss->bind(cgrp);
/* refcount was already taken, and we're keeping it */
-   } else if (bit & removed_bits) {
+   } else if (bit & removed_mask) {
/* We're removing this subsystem */
BUG_ON(ss == NULL);
BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
@@ -1112,7 +1112,7 @@
list_move(>sibling, _list);
/* subsystem is now free - drop reference on module */
module_put(ss->module);
-   } else if (bit & final_bits) {
+   } else if (bit & final_subsys_mask) {
/* Subsystem state should already exist */
BUG_ON(ss == NULL);
BUG_ON(!cgrp->subsys[i]);
@@ -1129,7 +1129,7 @@
BUG_ON(cgrp->subsys[i]);
}
}
-   root->subsys_bits = root->actual_subsys_bits = final_bits;
+   root->subsys_mask = root->actual_subsys_mask = final_subsys_mask;
synchronize_rcu();
 
return 0;
@@ -1158,7 +1158,7 @@
 }
 
 struct cgroup_sb_opts {
-   unsigned long subsys_bits;
+   unsigned long subsys_mask;
unsigned long flags;
char *release_agent;
bool clone_children;
@@ -1267,7 +1267,7 @@
/* Mutually exclusive option 'all' + subsystem name */
if (all_ss)
return -EINVAL;
-   set_bit(i, >subsys_bits);
+   set_bit(i, >subsys_mask);
one_ss = true;
 
break;
@@ -1288,7 +1288,7 @@
continue;
if (ss->disabled)
continue;
-   set_bit(i, >subsys_bits);
+   set_bit(i, >subsys_mask);
}
}
 
@@ -1300,19 +1300,19 @@
 * the cpuset subsystem.
 */
if (test_bit(ROOT_NOPREFIX, >flags) &&
-   (opts->subsys_bits & mask))
+   (opts->subsys_mask & mask))
return -EINVAL;
 
 
/* Can't specify "none" and some subsystems */
-   if (opts->subsys_bits && opts->none)
+   if (opts->subsys_mask && opts->none)
return -EINVAL;
 
/*
 * We either have to specify by name or by subsystems. (So

[PATCH v6 4/4] cgroup: rename subsys_bits to subsys_mask

2012-08-16 Thread aris

In a previous discussion, Tejun Heo suggested to rename references to
subsys_bits (added_bits, removed_bits, etc) by something more meaningful.

Cc: Li Zefan lize...@huawei.com
Cc: Tejun Heo t...@kernel.org
Cc: Hugh Dickins hu...@google.com
Cc: Hillf Danton dhi...@gmail.com
Cc: Lennart Poettering lpoet...@redhat.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 kernel/cgroup.c |   84 
 1 file changed, 42 insertions(+), 42 deletions(-)

Index: github/kernel/cgroup.c
===
--- github.orig/kernel/cgroup.c 2012-08-16 11:33:43.276337899 -0400
+++ github/kernel/cgroup.c  2012-08-16 11:33:43.292338339 -0400
@@ -111,13 +111,13 @@
 * The bitmask of subsystems intended to be attached to this
 * hierarchy
 */
-   unsigned long subsys_bits;
+   unsigned long subsys_mask;
 
/* Unique id for this hierarchy. */
int hierarchy_id;
 
/* The bitmask of subsystems currently attached to this hierarchy */
-   unsigned long actual_subsys_bits;
+   unsigned long actual_subsys_mask;
 
/* A list running through the attached subsystems */
struct list_head subsys_list;
@@ -557,7 +557,7 @@
 * won't change, so no need for locking.
 */
for (i = 0; i  CGROUP_SUBSYS_COUNT; i++) {
-   if (root-subsys_bits  (1UL  i)) {
+   if (root-subsys_mask  (1UL  i)) {
/* Subsystem is in this hierarchy. So we want
 * the subsystem state from the new
 * cgroup */
@@ -1002,7 +1002,7 @@
struct dentry *parent;
struct cgroupfs_root *root = dentry-d_sb-s_fs_info;
 
-   cgroup_clear_directory(dentry, true, root-subsys_bits);
+   cgroup_clear_directory(dentry, true, root-subsys_mask);
 
parent = dentry-d_parent;
spin_lock(parent-d_lock);
@@ -1046,22 +1046,22 @@
  * returns an error, no reference counts are touched.
  */
 static int rebind_subsystems(struct cgroupfs_root *root,
- unsigned long final_bits)
+ unsigned long final_subsys_mask)
 {
-   unsigned long added_bits, removed_bits;
+   unsigned long added_mask, removed_mask;
struct cgroup *cgrp = root-top_cgroup;
int i;
 
BUG_ON(!mutex_is_locked(cgroup_mutex));
BUG_ON(!mutex_is_locked(cgroup_root_mutex));
 
-   removed_bits = root-actual_subsys_bits  ~final_bits;
-   added_bits = final_bits  ~root-actual_subsys_bits;
+   removed_mask = root-actual_subsys_mask  ~final_subsys_mask;
+   added_mask = final_subsys_mask  ~root-actual_subsys_mask;
/* Check that any added subsystems are currently free */
for (i = 0; i  CGROUP_SUBSYS_COUNT; i++) {
unsigned long bit = 1UL  i;
struct cgroup_subsys *ss = subsys[i];
-   if (!(bit  added_bits))
+   if (!(bit  added_mask))
continue;
/*
 * Nobody should tell us to do a subsys that doesn't exist:
@@ -1086,7 +1086,7 @@
for (i = 0; i  CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
unsigned long bit = 1UL  i;
-   if (bit  added_bits) {
+   if (bit  added_mask) {
/* We're binding this subsystem to this hierarchy */
BUG_ON(ss == NULL);
BUG_ON(cgrp-subsys[i]);
@@ -1099,7 +1099,7 @@
if (ss-bind)
ss-bind(cgrp);
/* refcount was already taken, and we're keeping it */
-   } else if (bit  removed_bits) {
+   } else if (bit  removed_mask) {
/* We're removing this subsystem */
BUG_ON(ss == NULL);
BUG_ON(cgrp-subsys[i] != dummytop-subsys[i]);
@@ -1112,7 +1112,7 @@
list_move(ss-sibling, rootnode.subsys_list);
/* subsystem is now free - drop reference on module */
module_put(ss-module);
-   } else if (bit  final_bits) {
+   } else if (bit  final_subsys_mask) {
/* Subsystem state should already exist */
BUG_ON(ss == NULL);
BUG_ON(!cgrp-subsys[i]);
@@ -1129,7 +1129,7 @@
BUG_ON(cgrp-subsys[i]);
}
}
-   root-subsys_bits = root-actual_subsys_bits = final_bits;
+   root-subsys_mask = root-actual_subsys_mask = final_subsys_mask;
synchronize_rcu();
 
return 0;
@@ -1158,7 +1158,7 @@
 }
 
 struct cgroup_sb_opts {
-   unsigned long subsys_bits;
+   unsigned long subsys_mask;
unsigned long flags;
char *release_agent;

[PATCH v6 3/4] cgroup: add xattr support

2012-08-16 Thread aris

From: Li Zefan lize...@huawei.com

This is one of the items in the plumber's wish list.

For use cases:

 What would the use case be for this?

 Attaching meta information to services, in an easily discoverable
 way. For example, in systemd we create one cgroup for each service, and
 could then store data like the main pid of the specific service as an
 xattr on the cgroup itself. That way we'd have almost all service state
 in the cgroupfs, which would make it possible to terminate systemd and
 later restart it without losing any state information. But there's more:
 for example, some very peculiar services cannot be terminated on
 shutdown (i.e. fakeraid DM stuff) and it would be really nice if the
 services in question could just mark that on their cgroup, by setting an
 xattr. On the more desktopy side of things there are other
 possibilities: for example there are plans defining what an application
 is along the lines of a cgroup (i.e. an app being a collection of
 processes). With xattrs one could then attach an icon or human readable
 program name on the cgroup.

 The key idea is that this would allow attaching runtime meta information
 to cgroups and everything they model (services, apps, vms), that doesn't
 need any complex userspace infrastructure, has good access control
 (i.e. because the file system enforces that anyway, and there's the
 trusted. xattr namespace), notifications (inotify), and can easily be
 shared among applications.

 Lennart

v6:
- remove user xattr namespace, only allow trusted and security
v5:
- check for capabilities before setting/removing xattrs
v4:
- no changes
v3:
- instead of config option, use mount option to enable xattr support

Cc: Li Zefan lize...@huawei.com
Cc: Tejun Heo t...@kernel.org
Cc: Hugh Dickins hu...@google.com
Cc: Hillf Danton dhi...@gmail.com
Cc: Lennart Poettering lpoet...@redhat.com
Signed-off-by: Li Zefan lize...@huawei.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 include/linux/cgroup.h |   13 --
 kernel/cgroup.c|  100 +
 2 files changed, 103 insertions(+), 10 deletions(-)

Index: github/include/linux/cgroup.h
===
--- github.orig/include/linux/cgroup.h  2012-08-16 10:24:50.0 -0400
+++ github/include/linux/cgroup.h   2012-08-16 10:27:53.975223786 -0400
@@ -17,6 +17,7 @@
 #include linux/rwsem.h
 #include linux/idr.h
 #include linux/workqueue.h
+#include linux/xattr.h
 
 #ifdef CONFIG_CGROUPS
 
@@ -216,6 +217,9 @@
/* List of events which userspace want to receive */
struct list_head event_list;
spinlock_t event_list_lock;
+
+   /* directory xattrs */
+   struct simple_xattrs xattrs;
 };
 
 /*
@@ -309,6 +313,9 @@
/* CFTYPE_* flags */
unsigned int flags;
 
+   /* file xattrs */
+   struct simple_xattrs xattrs;
+
int (*open)(struct inode *inode, struct file *file);
ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
@@ -394,7 +401,7 @@
  */
 struct cftype_set {
struct list_headnode;   /* chained at subsys-cftsets */
-   const struct cftype *cfts;
+   struct cftype   *cfts;
 };
 
 struct cgroup_scanner {
@@ -406,8 +413,8 @@
void *data;
 };
 
-int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts);
-int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts);
+int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
+int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 
 int cgroup_is_removed(const struct cgroup *cgrp);
 
Index: github/kernel/cgroup.c
===
--- github.orig/kernel/cgroup.c 2012-08-16 10:27:45.0 -0400
+++ github/kernel/cgroup.c  2012-08-16 11:10:37.470765933 -0400
@@ -276,7 +276,8 @@
 
 /* bits in struct cgroupfs_root flags field */
 enum {
-   ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
+   ROOT_NOPREFIX,  /* mounted subsystems have no named prefix */
+   ROOT_XATTR, /* supports extended attributes */
 };
 
 static int cgroup_is_releasable(const struct cgroup *cgrp)
@@ -913,15 +914,19 @@
 */
BUG_ON(!list_empty(cgrp-pidlists));
 
+   simple_xattrs_free(cgrp-xattrs);
+
kfree_rcu(cgrp, rcu_head);
} else {
struct cfent *cfe = __d_cfe(dentry);
struct cgroup *cgrp = dentry-d_parent-d_fsdata;
+   struct cftype *cft = cfe-type;
 
WARN_ONCE(!list_empty(cfe-node) 
  cgrp != cgrp-root-top_cgroup,
  cfe still linked for %s\n, cfe-type-name);
kfree(cfe);
+   simple_xattrs_free(cft-xattrs);
}
iput(inode);
 }
@@ -1140,6 +1145,8 @@

[PATCH v6 0/4] cgroup: add xattr support

2012-08-16 Thread aris

This series are a refreshed version of a patchset submitted by Li Zefan back
in march:
https://lkml.org/lkml/2012/3/1/13

With Li's permission, I refreshed the patches to apply over the latest upstream
and added the modifications suggested by others in the thread:
- using a mount option instead of config option to enable the xattr support
- reinitialize the list in kmem_xattrs_free()
- renamed functions to simple_xattr_*()

There're two users for this patchset:
- SELinux: to be able to control access to cgroupfs inside containers
- systemd: to store meta information such as main PID in a service cgroup,
  set specific services special options in the cgroup.

While the xattrs will use kernel memory like tmpfs, they're restricted to
'security' (which controls the format of the value) and 'trusted' (which
requires CAP_SYS_ADMIN). If kernel memory usage is still a concern, we're
not far from having memcg account for kernel memory.

v6:
- only allow trusted and security
- replace subsys_bits by something more meaningful
v5:
- check for permissions for user xattr namespace
v4:
- implemented requested changes by Tejun Heo in patch #2

Cc: Li Zefan lize...@huawei.com
Cc: Tejun Heo t...@kernel.org
Cc: Hugh Dickins hu...@google.com
Cc: Hillf Danton dhi...@gmail.com
Cc: Lennart Poettering lpoet...@redhat.com
Signed-off-by: Li Zefan lize...@huawei.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

-- 
Aristeu
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v6 2/4] cgroup: revise how we re-populate root directory

2012-08-16 Thread aris

From: Li Zefan lize...@huawei.com

When remounting cgroupfs with some subsystems added to it and some
removed, cgroup will remove all the files in root directory and then
re-popluate it.

What I'm doing here is, only remove files which belong to subsystems that
are to be unbinded, and only create files for newly-added subsystems.
The purpose is to have all other files untouched.

This is a preparation for cgroup xattr support.

v6:
- no changes
v5:
- no changes
v4:
- refactored cgroup_clear_directory() to not use cgroup_rm_file()
- instead of going thru the list of files, get the file list using the
  subsystems
- use 'subsys_mask' instead of {added,removed}_bits and made
  cgroup_populate_dir() to match the parameters with cgroup_clear_directory()
v3:
- refresh patches after recent refactoring

Cc: Li Zefan lize...@huawei.com
Cc: Tejun Heo t...@kernel.org
Cc: Hugh Dickins hu...@google.com
Cc: Hillf Danton dhi...@gmail.com
Cc: Lennart Poettering lpoet...@redhat.com
Signed-off-by: Li Zefan lize...@huawei.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 kernel/cgroup.c |   61 
 1 file changed, 48 insertions(+), 13 deletions(-)

Index: github/kernel/cgroup.c
===
--- github.orig/kernel/cgroup.c 2012-08-16 11:28:12.683272452 -0400
+++ github/kernel/cgroup.c  2012-08-16 11:28:42.592089681 -0400
@@ -824,7 +824,8 @@
 static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t 
mode);
 static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned 
int);
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
-static int cgroup_populate_dir(struct cgroup *cgrp);
+static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
+  unsigned long subsys_mask);
 static const struct inode_operations cgroup_dir_inode_operations;
 static const struct file_operations proc_cgroupstats_operations;
 
@@ -963,12 +964,29 @@
return -ENOENT;
 }
 
-static void cgroup_clear_directory(struct dentry *dir)
+/**
+ * cgroup_clear_directory - selective removal of base and subsystem files
+ * @dir: directory containing the files
+ * @base_files: true if the base files should be removed
+ * @subsys_mask: mask of the subsystem ids whose files should be removed
+ */
+static void cgroup_clear_directory(struct dentry *dir, bool base_files,
+  unsigned long subsys_mask)
 {
struct cgroup *cgrp = __d_cgrp(dir);
+   struct cgroup_subsys *ss;
 
-   while (!list_empty(cgrp-files))
-   cgroup_rm_file(cgrp, NULL);
+   for_each_subsys(cgrp-root, ss) {
+   struct cftype_set *set;
+   if (!test_bit(ss-subsys_id, subsys_mask))
+   continue;
+   list_for_each_entry(set, ss-cftsets, node)
+   cgroup_rm_file(cgrp, set-cfts);
+   }
+   if (base_files) {
+   while (!list_empty(cgrp-files))
+   cgroup_rm_file(cgrp, NULL);
+   }
 }
 
 /*
@@ -977,8 +995,9 @@
 static void cgroup_d_remove_dir(struct dentry *dentry)
 {
struct dentry *parent;
+   struct cgroupfs_root *root = dentry-d_sb-s_fs_info;
 
-   cgroup_clear_directory(dentry);
+   cgroup_clear_directory(dentry, true, root-subsys_bits);
 
parent = dentry-d_parent;
spin_lock(parent-d_lock);
@@ -1339,6 +1358,7 @@
struct cgroupfs_root *root = sb-s_fs_info;
struct cgroup *cgrp = root-top_cgroup;
struct cgroup_sb_opts opts;
+   unsigned long added_bits, removed_bits;
 
mutex_lock(cgrp-dentry-d_inode-i_mutex);
mutex_lock(cgroup_mutex);
@@ -1354,6 +1374,9 @@
pr_warning(cgroup: option changes via remount are deprecated 
(pid=%d comm=%s)\n,
   task_tgid_nr(current), current-comm);
 
+   added_bits = opts.subsys_bits  ~root-subsys_bits;
+   removed_bits = root-subsys_bits  ~opts.subsys_bits;
+
/* Don't allow flags or name to change at remount */
if (opts.flags != root-flags ||
(opts.name  strcmp(opts.name, root-name))) {
@@ -1369,8 +1392,9 @@
}
 
/* clear out any existing files and repopulate subsystem files */
-   cgroup_clear_directory(cgrp-dentry);
-   cgroup_populate_dir(cgrp);
+   cgroup_clear_directory(cgrp-dentry, false, removed_bits);
+   /* re-populate subsystem files */
+   cgroup_populate_dir(cgrp, false, added_bits);
 
if (opts.release_agent)
strcpy(root-release_agent_path, opts.release_agent);
@@ -1669,7 +1693,7 @@
BUG_ON(root-number_of_cgroups != 1);
 
cred = override_creds(init_cred);
-   cgroup_populate_dir(root_cgrp);
+   cgroup_populate_dir(root_cgrp, true, root-subsys_bits);
revert_creds(cred);

[PATCH v6 1/4] xattr: extract simple_xattr code from tmpfs

2012-08-16 Thread aris

From: Li Zefan lize...@huawei.com

Extract in-memory xattr APIs from tmpfs. Will be used by cgroup.

$ size vmlinux.o
   textdata bss dec hex filename
4658782  880729 5195032 10734543 a3cbcf vmlinux.o
$ size vmlinux.o
   textdata bss dec hex filename
4658957  880729 5195032 10734718 a3cc7e vmlinux.o

v6:
- no changes
v5:
- no changes
v4:
- move simple_xattrs_free() to fs/xattr.c
v3:
- in kmem_xattrs_free(), reinitialize the list
- use simple_xattr_* prefix
- introduce simple_xattr_add() to prevent direct list usage

Cc: Li Zefan lize...@huawei.com
Cc: Tejun Heo t...@kernel.org
Cc: Hugh Dickins hu...@google.com
Cc: Hillf Danton dhi...@gmail.com
Cc: Lennart Poettering lpoet...@redhat.com
Signed-off-by: Li Zefan lize...@huawei.com
Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 fs/xattr.c   |  200 +++
 include/linux/shmem_fs.h |3 
 include/linux/xattr.h|   25 +
 mm/shmem.c   |  171 +++-
 4 files changed, 240 insertions(+), 159 deletions(-)

Index: github/fs/xattr.c
===
--- github.orig/fs/xattr.c  2012-08-16 11:28:12.719273435 -0400
+++ github/fs/xattr.c   2012-08-16 11:28:13.975307743 -0400
@@ -791,3 +791,203 @@
 EXPORT_SYMBOL(generic_listxattr);
 EXPORT_SYMBOL(generic_setxattr);
 EXPORT_SYMBOL(generic_removexattr);
+
+/*
+ * initialize the simple_xattrs structure
+ */
+void simple_xattrs_init(struct simple_xattrs *xattrs)
+{
+   INIT_LIST_HEAD(xattrs-head);
+   spin_lock_init(xattrs-lock);
+}
+
+/*
+ * Allocate new xattr and copy in the value; but leave the name to callers.
+ */
+struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
+{
+   struct simple_xattr *new_xattr;
+   size_t len;
+
+   /* wrap around? */
+   len = sizeof(*new_xattr) + size;
+   if (len = sizeof(*new_xattr))
+   return NULL;
+
+   new_xattr = kmalloc(len, GFP_KERNEL);
+   if (!new_xattr)
+   return NULL;
+
+   new_xattr-size = size;
+   memcpy(new_xattr-value, value, size);
+   return new_xattr;
+}
+
+/*
+ * free all the xattrs
+ */
+void simple_xattrs_free(struct simple_xattrs *xattrs)
+{
+   struct simple_xattr *xattr, *node;
+
+   spin_lock(xattrs-lock);
+   list_for_each_entry_safe(xattr, node, xattrs-head, list) {
+   kfree(xattr-name);
+   kfree(xattr);
+   }
+   INIT_LIST_HEAD(xattrs-head);
+   spin_unlock(xattrs-lock);
+}
+
+/*
+ * xattr GET operation for in-memory/pseudo filesystems
+ */
+int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
+void *buffer, size_t size)
+{
+   struct simple_xattr *xattr;
+   int ret = -ENODATA;
+
+   spin_lock(xattrs-lock);
+   list_for_each_entry(xattr, xattrs-head, list) {
+   if (strcmp(name, xattr-name))
+   continue;
+
+   ret = xattr-size;
+   if (buffer) {
+   if (size  xattr-size)
+   ret = -ERANGE;
+   else
+   memcpy(buffer, xattr-value, xattr-size);
+   }
+   break;
+   }
+   spin_unlock(xattrs-lock);
+   return ret;
+}
+
+static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
+ const void *value, size_t size, int flags)
+{
+   struct simple_xattr *xattr;
+   struct simple_xattr *new_xattr = NULL;
+   size_t len;
+   int err = 0;
+
+   /* value == NULL means remove */
+   if (value) {
+   /* wrap around? */
+   len = sizeof(*new_xattr) + size;
+   if (len = sizeof(*new_xattr))
+   return -ENOMEM;
+
+   new_xattr = kmalloc(len, GFP_KERNEL);
+   if (!new_xattr)
+   return -ENOMEM;
+
+   new_xattr-name = kstrdup(name, GFP_KERNEL);
+   if (!new_xattr-name) {
+   kfree(new_xattr);
+   return -ENOMEM;
+   }
+
+   new_xattr-size = size;
+   memcpy(new_xattr-value, value, size);
+   }
+
+   spin_lock(xattrs-lock);
+   list_for_each_entry(xattr, xattrs-head, list) {
+   if (!strcmp(name, xattr-name)) {
+   if (flags  XATTR_CREATE) {
+   xattr = new_xattr;
+   err = -EEXIST;
+   } else if (new_xattr) {
+   list_replace(xattr-list, new_xattr-list);
+   } else {
+   list_del(xattr-list);
+   }
+   goto out;
+   }
+   }
+   if (flags  XATTR_REPLACE) {
+

[PATCH RESEND 0/4] device_cgroup: replace internally whitelist with exception list

2012-08-09 Thread aris

The original model of device_cgroup is having a whitelist where all the
allowed devices are listed. The problem with this approach is that is
impossible to have the case of allowing everything but few devices.

The reason for that lies in the way the whitelist is handled internally:
since there's only a whitelist, the "all devices" entry would have to be
removed and replaced by the entire list of possible devices but the ones
that are being denied.  Since dev_t is 32 bits long, representing the allowed
devices as a bitfield is not memory efficient.

This patch replaces the "whitelist" by a "exceptions" list and the default
policy is kept as "deny_all" variable in dev_cgroup structure.

The current interface determines that whenever "a" is written to devices.allow
or devices.deny, the entry masking all devices will be added or removed,
respectively. This behavior is kept and it's what will determine the default
policy:

# cat devices.list 
a *:* rwm
# echo a >devices.deny
# cat devices.list 
# echo a >devices.allow
# cat devices.list 
a *:* rwm

The interface is also preserved. For example, if one wants to block only access
to /dev/null:
# ls -l /dev/null
crw-rw-rw- 1 root root 1, 3 Jul 24 16:17 /dev/null
# echo a >devices.allow
# echo "c 1:3 rwm" >devices.deny
# cat /dev/null
cat: /dev/null: Operation not permitted
# echo >/dev/null
bash: /dev/null: Operation not permitted
mknod /tmp/null c 1 3
mknod: â/tmp/nullâ: Operation not permitted
# echo "c 1:3 r" >devices.allow
# cat /dev/null
# echo >/dev/null
bash: /dev/null: Operation not permitted
mknod /tmp/null c 1 3
mknod: â/tmp/nullâ: Operation not permitted
# echo "c 1:3 rw" >devices.allow
# echo >/dev/null
# cat /dev/null
# mknod /tmp/null c 1 3
mknod: â/tmp/nullâ: Operation not permitted
# echo "c 1:3 rwm" >devices.allow
# echo >/dev/null
# cat /dev/null
# mknod /tmp/null c 1 3
#

 device_cgroup.c |  371 
 1 file changed, 212 insertions(+), 159 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RESEND 3/4] device_cgroup: convert device_cgroup internally to policy + exceptions

2012-08-09 Thread aris

The original model of device_cgroup is having a whitelist where all the
allowed devices are listed. The problem with this approach is that is
impossible to have the case of allowing everything but few devices.

The reason for that lies in the way the whitelist is handled internally:
since there's only a whitelist, the "all devices" entry would have to be
removed and replaced by the entire list of possible devices but the ones
that are being denied.  Since dev_t is 32 bits long, representing the allowed
devices as a bitfield is not memory efficient.

This patch replaces the "whitelist" by a "exceptions" list and the default
policy is kept as "deny_all" variable in dev_cgroup structure.

The current interface determines that whenever "a" is written to devices.allow
or devices.deny, the entry masking all devices will be added or removed,
respectively. This behavior is kept and it's what will determine the default
policy:

# cat devices.list 
a *:* rwm
# echo a >devices.deny
# cat devices.list 
# echo a >devices.allow
# cat devices.list 
a *:* rwm

The interface is also preserved. For example, if one wants to block only access
to /dev/null:
# ls -l /dev/null
crw-rw-rw- 1 root root 1, 3 Jul 24 16:17 /dev/null
# echo a >devices.allow
# echo "c 1:3 rwm" >devices.deny
# cat /dev/null
cat: /dev/null: Operation not permitted
# echo >/dev/null
bash: /dev/null: Operation not permitted
mknod /tmp/null c 1 3
mknod: â/tmp/nullâ: Operation not permitted
# echo "c 1:3 r" >devices.allow
# cat /dev/null
# echo >/dev/null
bash: /dev/null: Operation not permitted
mknod /tmp/null c 1 3
mknod: â/tmp/nullâ: Operation not permitted
# echo "c 1:3 rw" >devices.allow
# echo >/dev/null
# cat /dev/null
# mknod /tmp/null c 1 3
mknod: â/tmp/nullâ: Operation not permitted
# echo "c 1:3 rwm" >devices.allow
# echo >/dev/null
# cat /dev/null
# mknod /tmp/null c 1 3
#

Note that I didn't rename the functions/variables in this patch, but in the
next one to make reviewing easier.

Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |  228 +++
 1 file changed, 132 insertions(+), 96 deletions(-)

Index: github/security/device_cgroup.c
===
--- github.orig/security/device_cgroup.c2012-07-24 17:58:42.984081909 
-0400
+++ github/security/device_cgroup.c 2012-07-24 17:59:11.324823460 -0400
@@ -96,7 +96,6 @@
return -ENOMEM;
 }
 
-/* Stupid prototype - don't bother combining existing entries */
 /*
  * called under devcgroup_mutex
  */
@@ -136,16 +135,13 @@
struct dev_whitelist_item *walk, *tmp;
 
list_for_each_entry_safe(walk, tmp, _cgroup->whitelist, list) {
-   if (walk->type == DEV_ALL)
-   goto remove;
if (walk->type != wh->type)
continue;
-   if (walk->major != ~0 && walk->major != wh->major)
+   if (walk->major != wh->major)
continue;
-   if (walk->minor != ~0 && walk->minor != wh->minor)
+   if (walk->minor != wh->minor)
continue;
 
-remove:
walk->access &= ~wh->access;
if (!walk->access) {
list_del_rcu(>list);
@@ -185,19 +181,9 @@
INIT_LIST_HEAD(_cgroup->whitelist);
parent_cgroup = cgroup->parent;
 
-   if (parent_cgroup == NULL) {
-   struct dev_whitelist_item *wh;
-   wh = kmalloc(sizeof(*wh), GFP_KERNEL);
-   if (!wh) {
-   kfree(dev_cgroup);
-   return ERR_PTR(-ENOMEM);
-   }
-   wh->minor = wh->major = ~0;
-   wh->type = DEV_ALL;
-   wh->access = ACC_MASK;
+   if (parent_cgroup == NULL)
dev_cgroup->deny_all = false;
-   list_add(>list, _cgroup->whitelist);
-   } else {
+   else {
parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
mutex_lock(_mutex);
ret = dev_whitelist_copy(_cgroup->whitelist,
@@ -268,33 +254,48 @@
char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
 
rcu_read_lock();
-   list_for_each_entry_rcu(wh, >whitelist, list) {
-   set_access(acc, wh->access);
-   set_majmin(maj, wh->major);
-   set_majmin(min, wh->minor);
-   seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type),
+   /*
+* To preserve the compatibility:
+* - Only show the "all devices" when the default policy is to allow
+* - List the exceptions in case the default policy is to deny
+*

[PATCH RESEND 4/4] device_cgroup: rename whitelist to exception list

2012-08-09 Thread aris

This patch replaces the "whitelist" usage in the code and comments and replace
them by exception list related information.

Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |  198 +++
 1 file changed, 99 insertions(+), 99 deletions(-)

Index: github/security/device_cgroup.c
===
--- github.orig/security/device_cgroup.c2012-07-24 17:50:14.0 
-0400
+++ github/security/device_cgroup.c 2012-07-24 17:50:48.343663036 -0400
@@ -26,12 +26,12 @@
 static DEFINE_MUTEX(devcgroup_mutex);
 
 /*
- * whitelist locking rules:
+ * exception list locking rules:
  * hold devcgroup_mutex for update/read.
  * hold rcu_read_lock() for read.
  */
 
-struct dev_whitelist_item {
+struct dev_exception_item {
u32 major, minor;
short type;
short access;
@@ -41,7 +41,7 @@
 
 struct dev_cgroup {
struct cgroup_subsys_state css;
-   struct list_head whitelist;
+   struct list_head exceptions;
bool deny_all;
 };
 
@@ -75,12 +75,12 @@
 /*
  * called under devcgroup_mutex
  */
-static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig)
+static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
 {
-   struct dev_whitelist_item *wh, *tmp, *new;
+   struct dev_exception_item *ex, *tmp, *new;
 
-   list_for_each_entry(wh, orig, list) {
-   new = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
+   list_for_each_entry(ex, orig, list) {
+   new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
if (!new)
goto free_and_exit;
list_add_tail(>list, dest);
@@ -89,9 +89,9 @@
return 0;
 
 free_and_exit:
-   list_for_each_entry_safe(wh, tmp, dest, list) {
-   list_del(>list);
-   kfree(wh);
+   list_for_each_entry_safe(ex, tmp, dest, list) {
+   list_del(>list);
+   kfree(ex);
}
return -ENOMEM;
 }
@@ -99,50 +99,50 @@
 /*
  * called under devcgroup_mutex
  */
-static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
-   struct dev_whitelist_item *wh)
+static int dev_exception_add(struct dev_cgroup *dev_cgroup,
+struct dev_exception_item *ex)
 {
-   struct dev_whitelist_item *whcopy, *walk;
+   struct dev_exception_item *excopy, *walk;
 
-   whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
-   if (!whcopy)
+   excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
+   if (!excopy)
return -ENOMEM;
 
-   list_for_each_entry(walk, _cgroup->whitelist, list) {
-   if (walk->type != wh->type)
+   list_for_each_entry(walk, _cgroup->exceptions, list) {
+   if (walk->type != ex->type)
continue;
-   if (walk->major != wh->major)
+   if (walk->major != ex->major)
continue;
-   if (walk->minor != wh->minor)
+   if (walk->minor != ex->minor)
continue;
 
-   walk->access |= wh->access;
-   kfree(whcopy);
-   whcopy = NULL;
+   walk->access |= ex->access;
+   kfree(excopy);
+   excopy = NULL;
}
 
-   if (whcopy != NULL)
-   list_add_tail_rcu(>list, _cgroup->whitelist);
+   if (excopy != NULL)
+   list_add_tail_rcu(>list, _cgroup->exceptions);
return 0;
 }
 
 /*
  * called under devcgroup_mutex
  */
-static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup,
-   struct dev_whitelist_item *wh)
+static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
+struct dev_exception_item *ex)
 {
-   struct dev_whitelist_item *walk, *tmp;
+   struct dev_exception_item *walk, *tmp;
 
-   list_for_each_entry_safe(walk, tmp, _cgroup->whitelist, list) {
-   if (walk->type != wh->type)
+   list_for_each_entry_safe(walk, tmp, _cgroup->exceptions, list) {
+   if (walk->type != ex->type)
continue;
-   if (walk->major != wh->major)
+   if (walk->major != ex->major)
continue;
-   if (walk->minor != wh->minor)
+   if (walk->minor != ex->minor)
continue;
 
-   walk->access &= ~wh->access;
+   walk->access &= ~ex->access;
if (!walk->access) {
list_del_rcu(>list);
kfree_rcu(walk, rcu);
@@ -151,18 +151,18 @@
 }
 
 /**
- * dev_whitelist_clean - frees all entries of the whitelist
- * @dev_cgroup: dev_cgroup with the whitelist to be cleaned
+ * dev_exception_clean - frees all entries of the exception list
+ * @dev_cgroup: dev_cgroup with the exception list to be cleaned
  *
  * called

[PATCH RESEND 2/4] device_cgroup: introduce dev_whitelist_clean()

2012-08-09 Thread aris

This function cleans all the items in a whitelist and will be used by the next
patches.

Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |   22 +-
 1 file changed, 17 insertions(+), 5 deletions(-)

Index: github/security/device_cgroup.c
===
--- github.orig/security/device_cgroup.c2012-07-24 17:16:00.085156162 
-0400
+++ github/security/device_cgroup.c 2012-07-24 17:58:42.984081909 -0400
@@ -154,6 +154,22 @@
}
 }
 
+/**
+ * dev_whitelist_clean - frees all entries of the whitelist
+ * @dev_cgroup: dev_cgroup with the whitelist to be cleaned
+ *
+ * called under devcgroup_mutex
+ */
+static void dev_whitelist_clean(struct dev_cgroup *dev_cgroup)
+{
+   struct dev_whitelist_item *wh, *tmp;
+
+   list_for_each_entry_safe(wh, tmp, _cgroup->whitelist, list) {
+   list_del(>list);
+   kfree(wh);
+   }
+}
+
 /*
  * called from kernel/cgroup.c with cgroup_lock() held.
  */
@@ -200,13 +216,9 @@
 static void devcgroup_destroy(struct cgroup *cgroup)
 {
struct dev_cgroup *dev_cgroup;
-   struct dev_whitelist_item *wh, *tmp;
 
dev_cgroup = cgroup_to_devcgroup(cgroup);
-   list_for_each_entry_safe(wh, tmp, _cgroup->whitelist, list) {
-   list_del(>list);
-   kfree(wh);
-   }
+   dev_whitelist_clean(dev_cgroup);
kfree(dev_cgroup);
 }
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RESEND 1/4] device_cgroup: add "deny_all" in dev_cgroup structure

2012-08-09 Thread aris

deny_all will determine if the default policy is to deny all device access
unless for the ones in the exception list.

This variable will be used in the next patches to convert device_cgroup
internally into a default policy + rules.

Signed-off-by: Aristeu Rozanski 

---
 security/device_cgroup.c |5 +
 1 file changed, 5 insertions(+)

Index: github/security/device_cgroup.c
===
--- github.orig/security/device_cgroup.c2012-07-24 17:15:58.277108951 
-0400
+++ github/security/device_cgroup.c 2012-07-24 17:16:00.085156162 -0400
@@ -42,6 +42,7 @@
 struct dev_cgroup {
struct cgroup_subsys_state css;
struct list_head whitelist;
+   bool deny_all;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state 
*s)
@@ -178,12 +179,14 @@
wh->minor = wh->major = ~0;
wh->type = DEV_ALL;
wh->access = ACC_MASK;
+   dev_cgroup->deny_all = false;
list_add(>list, _cgroup->whitelist);
} else {
parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
mutex_lock(_mutex);
ret = dev_whitelist_copy(_cgroup->whitelist,
_dev_cgroup->whitelist);
+   dev_cgroup->deny_all = parent_dev_cgroup->deny_all;
mutex_unlock(_mutex);
if (ret) {
kfree(dev_cgroup);
@@ -409,9 +412,11 @@
case DEVCG_ALLOW:
if (!parent_has_perm(devcgroup, ))
return -EPERM;
+   devcgroup->deny_all = false;
return dev_whitelist_add(devcgroup, );
case DEVCG_DENY:
dev_whitelist_rm(devcgroup, );
+   devcgroup->deny_all = true;
break;
default:
return -EINVAL;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RESEND 1/4] device_cgroup: add deny_all in dev_cgroup structure

2012-08-09 Thread aris

deny_all will determine if the default policy is to deny all device access
unless for the ones in the exception list.

This variable will be used in the next patches to convert device_cgroup
internally into a default policy + rules.

Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |5 +
 1 file changed, 5 insertions(+)

Index: github/security/device_cgroup.c
===
--- github.orig/security/device_cgroup.c2012-07-24 17:15:58.277108951 
-0400
+++ github/security/device_cgroup.c 2012-07-24 17:16:00.085156162 -0400
@@ -42,6 +42,7 @@
 struct dev_cgroup {
struct cgroup_subsys_state css;
struct list_head whitelist;
+   bool deny_all;
 };
 
 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state 
*s)
@@ -178,12 +179,14 @@
wh-minor = wh-major = ~0;
wh-type = DEV_ALL;
wh-access = ACC_MASK;
+   dev_cgroup-deny_all = false;
list_add(wh-list, dev_cgroup-whitelist);
} else {
parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
mutex_lock(devcgroup_mutex);
ret = dev_whitelist_copy(dev_cgroup-whitelist,
parent_dev_cgroup-whitelist);
+   dev_cgroup-deny_all = parent_dev_cgroup-deny_all;
mutex_unlock(devcgroup_mutex);
if (ret) {
kfree(dev_cgroup);
@@ -409,9 +412,11 @@
case DEVCG_ALLOW:
if (!parent_has_perm(devcgroup, wh))
return -EPERM;
+   devcgroup-deny_all = false;
return dev_whitelist_add(devcgroup, wh);
case DEVCG_DENY:
dev_whitelist_rm(devcgroup, wh);
+   devcgroup-deny_all = true;
break;
default:
return -EINVAL;

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RESEND 2/4] device_cgroup: introduce dev_whitelist_clean()

2012-08-09 Thread aris

This function cleans all the items in a whitelist and will be used by the next
patches.

Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |   22 +-
 1 file changed, 17 insertions(+), 5 deletions(-)

Index: github/security/device_cgroup.c
===
--- github.orig/security/device_cgroup.c2012-07-24 17:16:00.085156162 
-0400
+++ github/security/device_cgroup.c 2012-07-24 17:58:42.984081909 -0400
@@ -154,6 +154,22 @@
}
 }
 
+/**
+ * dev_whitelist_clean - frees all entries of the whitelist
+ * @dev_cgroup: dev_cgroup with the whitelist to be cleaned
+ *
+ * called under devcgroup_mutex
+ */
+static void dev_whitelist_clean(struct dev_cgroup *dev_cgroup)
+{
+   struct dev_whitelist_item *wh, *tmp;
+
+   list_for_each_entry_safe(wh, tmp, dev_cgroup-whitelist, list) {
+   list_del(wh-list);
+   kfree(wh);
+   }
+}
+
 /*
  * called from kernel/cgroup.c with cgroup_lock() held.
  */
@@ -200,13 +216,9 @@
 static void devcgroup_destroy(struct cgroup *cgroup)
 {
struct dev_cgroup *dev_cgroup;
-   struct dev_whitelist_item *wh, *tmp;
 
dev_cgroup = cgroup_to_devcgroup(cgroup);
-   list_for_each_entry_safe(wh, tmp, dev_cgroup-whitelist, list) {
-   list_del(wh-list);
-   kfree(wh);
-   }
+   dev_whitelist_clean(dev_cgroup);
kfree(dev_cgroup);
 }
 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RESEND 4/4] device_cgroup: rename whitelist to exception list

2012-08-09 Thread aris

This patch replaces the whitelist usage in the code and comments and replace
them by exception list related information.

Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |  198 +++
 1 file changed, 99 insertions(+), 99 deletions(-)

Index: github/security/device_cgroup.c
===
--- github.orig/security/device_cgroup.c2012-07-24 17:50:14.0 
-0400
+++ github/security/device_cgroup.c 2012-07-24 17:50:48.343663036 -0400
@@ -26,12 +26,12 @@
 static DEFINE_MUTEX(devcgroup_mutex);
 
 /*
- * whitelist locking rules:
+ * exception list locking rules:
  * hold devcgroup_mutex for update/read.
  * hold rcu_read_lock() for read.
  */
 
-struct dev_whitelist_item {
+struct dev_exception_item {
u32 major, minor;
short type;
short access;
@@ -41,7 +41,7 @@
 
 struct dev_cgroup {
struct cgroup_subsys_state css;
-   struct list_head whitelist;
+   struct list_head exceptions;
bool deny_all;
 };
 
@@ -75,12 +75,12 @@
 /*
  * called under devcgroup_mutex
  */
-static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig)
+static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
 {
-   struct dev_whitelist_item *wh, *tmp, *new;
+   struct dev_exception_item *ex, *tmp, *new;
 
-   list_for_each_entry(wh, orig, list) {
-   new = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
+   list_for_each_entry(ex, orig, list) {
+   new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
if (!new)
goto free_and_exit;
list_add_tail(new-list, dest);
@@ -89,9 +89,9 @@
return 0;
 
 free_and_exit:
-   list_for_each_entry_safe(wh, tmp, dest, list) {
-   list_del(wh-list);
-   kfree(wh);
+   list_for_each_entry_safe(ex, tmp, dest, list) {
+   list_del(ex-list);
+   kfree(ex);
}
return -ENOMEM;
 }
@@ -99,50 +99,50 @@
 /*
  * called under devcgroup_mutex
  */
-static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
-   struct dev_whitelist_item *wh)
+static int dev_exception_add(struct dev_cgroup *dev_cgroup,
+struct dev_exception_item *ex)
 {
-   struct dev_whitelist_item *whcopy, *walk;
+   struct dev_exception_item *excopy, *walk;
 
-   whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL);
-   if (!whcopy)
+   excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
+   if (!excopy)
return -ENOMEM;
 
-   list_for_each_entry(walk, dev_cgroup-whitelist, list) {
-   if (walk-type != wh-type)
+   list_for_each_entry(walk, dev_cgroup-exceptions, list) {
+   if (walk-type != ex-type)
continue;
-   if (walk-major != wh-major)
+   if (walk-major != ex-major)
continue;
-   if (walk-minor != wh-minor)
+   if (walk-minor != ex-minor)
continue;
 
-   walk-access |= wh-access;
-   kfree(whcopy);
-   whcopy = NULL;
+   walk-access |= ex-access;
+   kfree(excopy);
+   excopy = NULL;
}
 
-   if (whcopy != NULL)
-   list_add_tail_rcu(whcopy-list, dev_cgroup-whitelist);
+   if (excopy != NULL)
+   list_add_tail_rcu(excopy-list, dev_cgroup-exceptions);
return 0;
 }
 
 /*
  * called under devcgroup_mutex
  */
-static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup,
-   struct dev_whitelist_item *wh)
+static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
+struct dev_exception_item *ex)
 {
-   struct dev_whitelist_item *walk, *tmp;
+   struct dev_exception_item *walk, *tmp;
 
-   list_for_each_entry_safe(walk, tmp, dev_cgroup-whitelist, list) {
-   if (walk-type != wh-type)
+   list_for_each_entry_safe(walk, tmp, dev_cgroup-exceptions, list) {
+   if (walk-type != ex-type)
continue;
-   if (walk-major != wh-major)
+   if (walk-major != ex-major)
continue;
-   if (walk-minor != wh-minor)
+   if (walk-minor != ex-minor)
continue;
 
-   walk-access = ~wh-access;
+   walk-access = ~ex-access;
if (!walk-access) {
list_del_rcu(walk-list);
kfree_rcu(walk, rcu);
@@ -151,18 +151,18 @@
 }
 
 /**
- * dev_whitelist_clean - frees all entries of the whitelist
- * @dev_cgroup: dev_cgroup with the whitelist to be cleaned
+ * dev_exception_clean - frees all entries of the exception list
+ * @dev_cgroup: dev_cgroup with the exception list to be cleaned

[PATCH RESEND 3/4] device_cgroup: convert device_cgroup internally to policy + exceptions

2012-08-09 Thread aris

The original model of device_cgroup is having a whitelist where all the
allowed devices are listed. The problem with this approach is that is
impossible to have the case of allowing everything but few devices.

The reason for that lies in the way the whitelist is handled internally:
since there's only a whitelist, the all devices entry would have to be
removed and replaced by the entire list of possible devices but the ones
that are being denied.  Since dev_t is 32 bits long, representing the allowed
devices as a bitfield is not memory efficient.

This patch replaces the whitelist by a exceptions list and the default
policy is kept as deny_all variable in dev_cgroup structure.

The current interface determines that whenever a is written to devices.allow
or devices.deny, the entry masking all devices will be added or removed,
respectively. This behavior is kept and it's what will determine the default
policy:

# cat devices.list 
a *:* rwm
# echo a devices.deny
# cat devices.list 
# echo a devices.allow
# cat devices.list 
a *:* rwm

The interface is also preserved. For example, if one wants to block only access
to /dev/null:
# ls -l /dev/null
crw-rw-rw- 1 root root 1, 3 Jul 24 16:17 /dev/null
# echo a devices.allow
# echo c 1:3 rwm devices.deny
# cat /dev/null
cat: /dev/null: Operation not permitted
# echo /dev/null
bash: /dev/null: Operation not permitted
mknod /tmp/null c 1 3
mknod: â/tmp/nullâ: Operation not permitted
# echo c 1:3 r devices.allow
# cat /dev/null
# echo /dev/null
bash: /dev/null: Operation not permitted
mknod /tmp/null c 1 3
mknod: â/tmp/nullâ: Operation not permitted
# echo c 1:3 rw devices.allow
# echo /dev/null
# cat /dev/null
# mknod /tmp/null c 1 3
mknod: â/tmp/nullâ: Operation not permitted
# echo c 1:3 rwm devices.allow
# echo /dev/null
# cat /dev/null
# mknod /tmp/null c 1 3
#

Note that I didn't rename the functions/variables in this patch, but in the
next one to make reviewing easier.

Signed-off-by: Aristeu Rozanski a...@redhat.com

---
 security/device_cgroup.c |  228 +++
 1 file changed, 132 insertions(+), 96 deletions(-)

Index: github/security/device_cgroup.c
===
--- github.orig/security/device_cgroup.c2012-07-24 17:58:42.984081909 
-0400
+++ github/security/device_cgroup.c 2012-07-24 17:59:11.324823460 -0400
@@ -96,7 +96,6 @@
return -ENOMEM;
 }
 
-/* Stupid prototype - don't bother combining existing entries */
 /*
  * called under devcgroup_mutex
  */
@@ -136,16 +135,13 @@
struct dev_whitelist_item *walk, *tmp;
 
list_for_each_entry_safe(walk, tmp, dev_cgroup-whitelist, list) {
-   if (walk-type == DEV_ALL)
-   goto remove;
if (walk-type != wh-type)
continue;
-   if (walk-major != ~0  walk-major != wh-major)
+   if (walk-major != wh-major)
continue;
-   if (walk-minor != ~0  walk-minor != wh-minor)
+   if (walk-minor != wh-minor)
continue;
 
-remove:
walk-access = ~wh-access;
if (!walk-access) {
list_del_rcu(walk-list);
@@ -185,19 +181,9 @@
INIT_LIST_HEAD(dev_cgroup-whitelist);
parent_cgroup = cgroup-parent;
 
-   if (parent_cgroup == NULL) {
-   struct dev_whitelist_item *wh;
-   wh = kmalloc(sizeof(*wh), GFP_KERNEL);
-   if (!wh) {
-   kfree(dev_cgroup);
-   return ERR_PTR(-ENOMEM);
-   }
-   wh-minor = wh-major = ~0;
-   wh-type = DEV_ALL;
-   wh-access = ACC_MASK;
+   if (parent_cgroup == NULL)
dev_cgroup-deny_all = false;
-   list_add(wh-list, dev_cgroup-whitelist);
-   } else {
+   else {
parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
mutex_lock(devcgroup_mutex);
ret = dev_whitelist_copy(dev_cgroup-whitelist,
@@ -268,33 +254,48 @@
char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
 
rcu_read_lock();
-   list_for_each_entry_rcu(wh, devcgroup-whitelist, list) {
-   set_access(acc, wh-access);
-   set_majmin(maj, wh-major);
-   set_majmin(min, wh-minor);
-   seq_printf(m, %c %s:%s %s\n, type_to_char(wh-type),
+   /*
+* To preserve the compatibility:
+* - Only show the all devices when the default policy is to allow
+* - List the exceptions in case the default policy is to deny
+* This way, the file

[PATCH RESEND 0/4] device_cgroup: replace internally whitelist with exception list

2012-08-09 Thread aris

The original model of device_cgroup is having a whitelist where all the
allowed devices are listed. The problem with this approach is that is
impossible to have the case of allowing everything but few devices.

The reason for that lies in the way the whitelist is handled internally:
since there's only a whitelist, the all devices entry would have to be
removed and replaced by the entire list of possible devices but the ones
that are being denied.  Since dev_t is 32 bits long, representing the allowed
devices as a bitfield is not memory efficient.

This patch replaces the whitelist by a exceptions list and the default
policy is kept as deny_all variable in dev_cgroup structure.

The current interface determines that whenever a is written to devices.allow
or devices.deny, the entry masking all devices will be added or removed,
respectively. This behavior is kept and it's what will determine the default
policy:

# cat devices.list 
a *:* rwm
# echo a devices.deny
# cat devices.list 
# echo a devices.allow
# cat devices.list 
a *:* rwm

The interface is also preserved. For example, if one wants to block only access
to /dev/null:
# ls -l /dev/null
crw-rw-rw- 1 root root 1, 3 Jul 24 16:17 /dev/null
# echo a devices.allow
# echo c 1:3 rwm devices.deny
# cat /dev/null
cat: /dev/null: Operation not permitted
# echo /dev/null
bash: /dev/null: Operation not permitted
mknod /tmp/null c 1 3
mknod: â/tmp/nullâ: Operation not permitted
# echo c 1:3 r devices.allow
# cat /dev/null
# echo /dev/null
bash: /dev/null: Operation not permitted
mknod /tmp/null c 1 3
mknod: â/tmp/nullâ: Operation not permitted
# echo c 1:3 rw devices.allow
# echo /dev/null
# cat /dev/null
# mknod /tmp/null c 1 3
mknod: â/tmp/nullâ: Operation not permitted
# echo c 1:3 rwm devices.allow
# echo /dev/null
# cat /dev/null
# mknod /tmp/null c 1 3
#

 device_cgroup.c |  371 
 1 file changed, 212 insertions(+), 159 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[OOPS] 2.4.2-ac20

2001-03-18 Thread aris


hi,
i got this oops followed by a freeze

aris


Gnu C  egcs-2.91.66
Gnu make   3.79
binutils   2.9.1.0.25
util-linux 2.10l
modutils   2.4.1
e2fsprogs  1.18
pcmcia-cs  3.1.16
Linux C Library2.1.3
ldd: version 1.9.9
Procps 2.0.6
Net-tools  1.55
Kbd0.99
Sh-utils   2.0
Modules Loaded serial isa-pnp nsc-ircc irda apm vfat fat eepro100 esssolo1


ksymoops 2.3.7 on i686 2.4.2-ac20.  Options used
 -V (default)
 -k /proc/ksyms (default)
 -l /proc/modules (default)
 -o /lib/modules/2.4.2-ac20/ (default)
 -m /boot/System.map.2.4.2.ac20 (specified)

CPU:0
EIP:0010:[]
Using defaults from ksymoops -t elf32-i386 -a i386
EFLAGS: 00010213
eax:    ebx: c290270c   ecx: c28f2000   edx: 0015
esi: c28f2000   edi: 0246   ebp: c28f236c   esp: 0022be64
Process swapper (pid: 0, stackpage=c021b000)
Stack: c01770b4 c28f2020 c016ad00 c28f2000 c28f2768 c28f2368 c28f2568 c021bfa4
   c0181d00  c1121d00 20181834   c28f276c 0001
   0008 3286 c10ccd84 c10ccd84 c021bedc 0082 c10ccd58 c10ccd80
Call Trace: [] [c016ad00>] [] [] []
[] []
   [] [] [] [] []
[] [] []
   [] [] [] [] []
Code: 0f 0b b9 80 a5 20 c0 ff 0d 80 a5 20 c0 0f 88 6e 76 0c 00 c7

>>EIP; c011645b<=
Trace; c01770b4 
Trace; c0119c60 <__run_task_queue+50/60>
Trace; c011c0ee 
Trace; c0119b7f 
Trace; c0119ac0 
Trace; c01199ca 
Trace; c010a175 
Trace; c0107160 
Trace; c0108e50 
Trace; c0107160 
Trace; c0107160 
Trace; c0100018 
Trace; c0107180 
Trace; c01071e2 
Trace; c0105000 
Trace; c0100192 
Code;  c011645b 
 <_EIP>:
Code;  c011645b<=
   0:   0f 0b ud2a  <=
Code;  c011645d 
   2:   b9 80 a5 20 c0movl   $0xc020a580,%ecx
Code;  c0116462 
   7:   ff 0d 80 a5 20 c0 decl   0xc020a580
Code;  c0116468 
   d:   0f 88 6e 76 0c 00 js c7681 <_EIP+0xc7681> c01ddadc 

Code;  c011646e 
  13:   c7 00 00 00 00 00 movl   $0x0,(%eax)

[OOPS] 2.4.2-ac20

2001-03-18 Thread aris


hi,
i got this oops followed by a freeze

aris


Gnu C  egcs-2.91.66
Gnu make   3.79
binutils   2.9.1.0.25
util-linux 2.10l
modutils   2.4.1
e2fsprogs  1.18
pcmcia-cs  3.1.16
Linux C Library2.1.3
ldd: version 1.9.9
Procps 2.0.6
Net-tools  1.55
Kbd0.99
Sh-utils   2.0
Modules Loaded serial isa-pnp nsc-ircc irda apm vfat fat eepro100 esssolo1


ksymoops 2.3.7 on i686 2.4.2-ac20.  Options used
 -V (default)
 -k /proc/ksyms (default)
 -l /proc/modules (default)
 -o /lib/modules/2.4.2-ac20/ (default)
 -m /boot/System.map.2.4.2.ac20 (specified)

CPU:0
EIP:0010:[c011645b]
Using defaults from ksymoops -t elf32-i386 -a i386
EFLAGS: 00010213
eax:    ebx: c290270c   ecx: c28f2000   edx: 0015
esi: c28f2000   edi: 0246   ebp: c28f236c   esp: 0022be64
Process swapper (pid: 0, stackpage=c021b000)
Stack: c01770b4 c28f2020 c016ad00 c28f2000 c28f2768 c28f2368 c28f2568 c021bfa4
   c0181d00  c1121d00 20181834   c28f276c 0001
   0008 3286 c10ccd84 c10ccd84 c021bedc 0082 c10ccd58 c10ccd80
Call Trace: [c01770b4] [c016ad00] [c0181d00] [c016dfa8] [c016933b]
[c0119c60] [c011c0ee]
   [c0119b7f] [c0119ac0] [c01199ca] [c010a175] [c0107160]
[c0108e50] [c0107160] [c0107160]
   [c0100018] [c0107180] [c01071e2] [c0105000] [c0100192]
Code: 0f 0b b9 80 a5 20 c0 ff 0d 80 a5 20 c0 0f 88 6e 76 0c 00 c7

EIP; c011645b acquire_console_sem+f/30   =
Trace; c01770b4 con_flush_chars+10/24
Trace; c0119c60 __run_task_queue+50/60
Trace; c011c0ee tqueue_bh+16/1c
Trace; c0119b7f bh_action+1b/64
Trace; c0119ac0 tasklet_hi_action+38/5c
Trace; c01199ca do_softirq+4a/6c
Trace; c010a175 do_IRQ+a1/b4
Trace; c0107160 default_idle+0/28
Trace; c0108e50 ret_from_intr+0/20
Trace; c0107160 default_idle+0/28
Trace; c0107160 default_idle+0/28
Trace; c0100018 startup_32+18/13a
Trace; c0107180 default_idle+20/28
Trace; c01071e2 cpu_idle+3a/50
Trace; c0105000 empty_bad_page+0/1000
Trace; c0100192 L6+0/2
Code;  c011645b acquire_console_sem+f/30
 _EIP:
Code;  c011645b acquire_console_sem+f/30   =
   0:   0f 0b ud2a  =
Code;  c011645d acquire_console_sem+11/30
   2:   b9 80 a5 20 c0movl   $0xc020a580,%ecx
Code;  c0116462 acquire_console_sem+16/30
   7:   ff 0d 80 a5 20 c0 decl   0xc020a580
Code;  c0116468 acquire_console_sem+1c/30
   d:   0f 88 6e 76 0c 00 js c7681 _EIP+0xc7681 c01ddadc 
stext_lock+18c/10e4
Code;  c011646e acquire_console_sem+22/30
  13:   c7 00 00 00 00 00 movl   $0x0,(%eax)

[PATCH][2.2] eepro 0.12d

2001-01-08 Thread aris


ok,
as i don't have documentation this is the right thing to be
done: restore the default path for old cards and keep the new one to these
blue cards. i hope this finally fixes all problems that my changes (by
guesses and lot of dosemu) introduced on a stable driver.
if it doesn't work for you please email me. i won't rest until i restore
the stability of the old boards and make the blue one supported.

P.S.: (again) if you have any documentation about these boards please send
to me. it will make my work easy and more decent than guesses by sniffing
io.
---
aristeu sergio rozanski filho | www.cathedrallabs.org/~aris
[EMAIL PROTECTED]| [EMAIL PROTECTED] 
---


--- linux/drivers/net/eepro.c.old   Wed Dec 27 21:44:29 2000
+++ linux/drivers/net/eepro.c   Wed Dec 27 22:17:54 2000
@@ -23,6 +23,8 @@
This is a compatibility hardware problem.
 
Versions:
+   0.12d   tottaly isolated old code to new code (blue cards).
+   (aris, 12/27/2000)
0.12c   fixed other multiple cards bug and other cleanups
(aris, 08/21/2000)
0.12b   added reset when the tx interrupt is called and TX isn't done
@@ -103,7 +105,7 @@
 */
 
 static const char *version =
-   "eepro.c: v0.12b 06/20/2000 [EMAIL PROTECTED]\n";
+   "eepro.c: v0.12d 12/27/2000 [EMAIL PROTECTED]\n";
 
 #include 
 
@@ -149,47 +151,28 @@
 #include 
 #include 
 
-
 #include 
 
-/* For linux 2.1.xx */
-#if defined (LINUX_VERSION_CODE) && LINUX_VERSION_CODE > 0x20155
-
 #include 
 #include 
 #include 
 
-#define compat_dev_kfree_skb( skb, mode ) dev_kfree_skb( (skb) )
 /* I had reports of looong delays with SLOW_DOWN defined as udelay(2) */
 #define SLOW_DOWN inb(0x80)
-/* udelay(2) */
-#define compat_init_func(X)  __initfunc(X)
-#define compat_init_data __initdata
-
-#else 
-/* for 2.x */
-
-#define compat_dev_kfree_skb( skb, mode ) dev_kfree_skb( (skb), (mode) )
-#define test_and_set_bit(a,b) set_bit((a),(b))
-#define SLOW_DOWN SLOW_DOWN_IO
-#define compat_init_func(X) X
-#define compat_init_data
-
-#endif
 
-
-/* First, a few definitions that the brave might change. */
-/* A zero-terminated list of I/O addresses to be probed. */
-static unsigned int eepro_portlist[] compat_init_data =
+/* First, a few definitions that the brave might change.
+ * A zero-terminated list of I/O addresses to be probed.
+ * note: 0x300 is default, the 595FX supports ALL IO Ports 
+ * from 0x000 to 0x3F0, some of which are reserved in PCs
+ */
+static unsigned eepro_portlist[] __initdata =
{ 0x300, 0x210, 0x240, 0x280, 0x2C0, 0x200, 0x320, 0x340, 0x360, 0};
-/* note: 0x300 is default, the 595FX supports ALL IO Ports 
-  from 0x000 to 0x3F0, some of which are reserved in PCs */
 
 /* use 0 for production, 1 for verification, >2 for debug */
 #ifndef NET_DEBUG
 #define NET_DEBUG 0
 #endif
-static unsigned int net_debug = NET_DEBUG;
+static unsigned net_debug = NET_DEBUG;
 
 /* The number of low I/O ports used by the ethercard. */
 #define EEPRO_IO_EXTENT16
@@ -204,16 +187,17 @@
 struct eepro_local {
struct enet_statistics stats;
unsigned rx_start;
-   unsigned tx_start; /* start of the transmit chain */
-   int tx_last;  /* pointer to last packet in the transmit chain */
-   unsigned tx_end;   /* end of the transmit chain (plus 1) */
-   int eepro;  /* 1 for the EtherExpress Pro/10,
-  2 for the EtherExpress Pro/10+,
-  0 for other 82595-based lan cards. */
-   int version;/* a flag to indicate if this is a TX or FX
-  version of the 82595 chip. */
+   unsigned tx_start;  /* start of the transmit chain */
+   int tx_last;/* pointer to last packet in the transmit chain */
+   unsigned tx_end;/* end of the transmit chain (plus 1) */
+   int eepro;  /* 1 for the EtherExpress Pro/10,
+* 2 for the EtherExpress Pro/10+,
+* 3 for the blue cards,
+* 0 for other 82595-based lan cards. */
+   int version;/* a flag to indicate if this is a TX or FX
+* version of the 82595 chip. */
int stepping;
-   spinlock_t lock; /* Serializing lock  */ 
+   spinlock_t lock;/* Serializing lock  */ 
unsigned rcv_ram;
unsigned rcv_start;
unsigned xmt_bar;
@@ -322,7 +306,7 @@
 static void set_multicast_list(struct device *dev);
 
 static int read_eeprom(int ioaddr, int location, struct device *dev);
-static void hardware_send_packet(struct device *dev, void *buf, short length);
+static i

[PATCH][2.4] eepro 0.12c

2001-01-08 Thread aris


hi linus,
driver: eepro
problem: the actual state of driver makes old supported board stop
 to function after some time of operation.

please consider applying this patch. the cleanup and cosmetic
changes will be in the next release of driver as you asked for.

---
aristeu sergio rozanski filho | www.cathedrallabs.org/~aris
[EMAIL PROTECTED]| [EMAIL PROTECTED] 
---



--- linux/drivers/net/eepro.c.old   Tue Dec  5 20:29:38 2000
+++ linux/drivers/net/eepro.c   Mon Jan  8 22:56:44 2001
@@ -23,6 +23,7 @@
This is a compatibility hardware problem.
 
Versions:
+   0.12c   fixing some problems with old cards (aris, 01/08/2001)
0.12b   misc fixes (aris, 06/26/2000)
0.12a   port of version 0.12a of 2.2.x kernels to 2.3.x
(aris ([EMAIL PROTECTED]), 05/19/2000)
@@ -96,7 +97,7 @@
 */
 
 static const char *version =
-   "eepro.c: v0.12b 04/26/2000 [EMAIL PROTECTED]\n";
+   "eepro.c: v0.12c 01/08/2000 [EMAIL PROTECTED]\n";
 
 #include 
 
@@ -501,8 +502,10 @@
 /* set diagnose flag */
 #define eepro_diag(ioaddr) outb(DIAGNOSE_CMD, ioaddr)
 
+#ifdef ANSWER_TX_AND_RX/* experimental way of handling interrupts */
 /* ack for rx/tx int */
 #define eepro_ack_rxtx(ioaddr) outb (RX_INT | TX_INT, ioaddr + STATUS_REG)
+#endif
 
 /* ack for rx int */
 #define eepro_ack_rx(ioaddr) outb (RX_INT, ioaddr + STATUS_REG)
@@ -1067,6 +1070,8 @@
}

eepro_sel_reset(ioaddr);
+   SLOW_DOWN;
+   SLOW_DOWN;
 
lp->tx_start = lp->tx_end = XMT_LOWER_LIMIT << 8;
lp->tx_last = 0;
@@ -1162,9 +1167,11 @@
while (((status = inb(ioaddr + STATUS_REG)) & 0x06) && (boguscount--))
{
switch (status & (RX_INT | TX_INT)) {
+#ifdef ANSWER_TX_AND_RX
case (RX_INT | TX_INT):
eepro_ack_rxtx(ioaddr);
break;
+#endif
case RX_INT:
eepro_ack_rx(ioaddr);
break;
@@ -1178,6 +1185,9 @@
 
/* Get the received packets */
eepro_rx(dev);
+#ifndef ANSWER_TX_AND_RX
+   continue;
+#endif
}
if (status & TX_INT) {
if (net_debug > 4)
@@ -1367,7 +1377,11 @@
/* Re-enable RX and TX interrupts */
eepro_en_int(ioaddr);
}
-   eepro_complete_selreset(ioaddr);
+   if (lp->eepro == LAN595FX_10ISA) {
+   eepro_complete_selreset(ioaddr);
+   }
+   else
+   eepro_en_rx(ioaddr);
 }
 
 /* The horrible routine to read a word from the serial EEPROM. */
@@ -1535,7 +1549,9 @@
printk(KERN_DEBUG "%s: exiting hardware_send_packet 
routine.\n", dev->name);
return;
}
-   netif_stop_queue(dev);
+   if (lp->eepro == LAN595FX_10ISA)
+   netif_stop_queue(dev);
+
if (net_debug > 5)
printk(KERN_DEBUG "%s: exiting hardware_send_packet routine.\n", 
dev->name);
 }
@@ -1654,9 +1670,13 @@
xmt_status = inw(ioaddr+IO_PORT);

if ((xmt_status & TX_DONE_BIT) == 0) {
-   udelay(40);
-   boguscount--;
-   continue;
+   if (lp->eepro == LAN595FX_10ISA) {
+   udelay(40);
+   boguscount--;
+   continue;
+   }
+   else
+   break;
}
 
xmt_status = inw(ioaddr+IO_PORT); 
@@ -1723,7 +1743,7 @@
 * interrupt again for tx. in other words: tx timeout what will take
 * a lot of time to happen, so we'll do a complete selreset.
 */
-   if (!boguscount)
+   if (!boguscount && lp->eepro == LAN595FX_10ISA)
eepro_complete_selreset(ioaddr);
 }

[PATCH][2.4] eepro 0.12c

2001-01-08 Thread aris


hi linus,
driver: eepro
problem: the actual state of driver makes old supported board stop
 to function after some time of operation.

please consider applying this patch. the cleanup and cosmetic
changes will be in the next release of driver as you asked for.

---
aristeu sergio rozanski filho | www.cathedrallabs.org/~aris
[EMAIL PROTECTED]| [EMAIL PROTECTED] 
---



--- linux/drivers/net/eepro.c.old   Tue Dec  5 20:29:38 2000
+++ linux/drivers/net/eepro.c   Mon Jan  8 22:56:44 2001
@@ -23,6 +23,7 @@
This is a compatibility hardware problem.
 
Versions:
+   0.12c   fixing some problems with old cards (aris, 01/08/2001)
0.12b   misc fixes (aris, 06/26/2000)
0.12a   port of version 0.12a of 2.2.x kernels to 2.3.x
(aris ([EMAIL PROTECTED]), 05/19/2000)
@@ -96,7 +97,7 @@
 */
 
 static const char *version =
-   "eepro.c: v0.12b 04/26/2000 [EMAIL PROTECTED]\n";
+   "eepro.c: v0.12c 01/08/2000 [EMAIL PROTECTED]\n";
 
 #include linux/module.h
 
@@ -501,8 +502,10 @@
 /* set diagnose flag */
 #define eepro_diag(ioaddr) outb(DIAGNOSE_CMD, ioaddr)
 
+#ifdef ANSWER_TX_AND_RX/* experimental way of handling interrupts */
 /* ack for rx/tx int */
 #define eepro_ack_rxtx(ioaddr) outb (RX_INT | TX_INT, ioaddr + STATUS_REG)
+#endif
 
 /* ack for rx int */
 #define eepro_ack_rx(ioaddr) outb (RX_INT, ioaddr + STATUS_REG)
@@ -1067,6 +1070,8 @@
}

eepro_sel_reset(ioaddr);
+   SLOW_DOWN;
+   SLOW_DOWN;
 
lp-tx_start = lp-tx_end = XMT_LOWER_LIMIT  8;
lp-tx_last = 0;
@@ -1162,9 +1167,11 @@
while (((status = inb(ioaddr + STATUS_REG))  0x06)  (boguscount--))
{
switch (status  (RX_INT | TX_INT)) {
+#ifdef ANSWER_TX_AND_RX
case (RX_INT | TX_INT):
eepro_ack_rxtx(ioaddr);
break;
+#endif
case RX_INT:
eepro_ack_rx(ioaddr);
break;
@@ -1178,6 +1185,9 @@
 
/* Get the received packets */
eepro_rx(dev);
+#ifndef ANSWER_TX_AND_RX
+   continue;
+#endif
}
if (status  TX_INT) {
if (net_debug  4)
@@ -1367,7 +1377,11 @@
/* Re-enable RX and TX interrupts */
eepro_en_int(ioaddr);
}
-   eepro_complete_selreset(ioaddr);
+   if (lp-eepro == LAN595FX_10ISA) {
+   eepro_complete_selreset(ioaddr);
+   }
+   else
+   eepro_en_rx(ioaddr);
 }
 
 /* The horrible routine to read a word from the serial EEPROM. */
@@ -1535,7 +1549,9 @@
printk(KERN_DEBUG "%s: exiting hardware_send_packet 
routine.\n", dev-name);
return;
}
-   netif_stop_queue(dev);
+   if (lp-eepro == LAN595FX_10ISA)
+   netif_stop_queue(dev);
+
if (net_debug  5)
printk(KERN_DEBUG "%s: exiting hardware_send_packet routine.\n", 
dev-name);
 }
@@ -1654,9 +1670,13 @@
xmt_status = inw(ioaddr+IO_PORT);

if ((xmt_status  TX_DONE_BIT) == 0) {
-   udelay(40);
-   boguscount--;
-   continue;
+   if (lp-eepro == LAN595FX_10ISA) {
+   udelay(40);
+   boguscount--;
+   continue;
+   }
+   else
+   break;
}
 
xmt_status = inw(ioaddr+IO_PORT); 
@@ -1723,7 +1743,7 @@
 * interrupt again for tx. in other words: tx timeout what will take
 * a lot of time to happen, so we'll do a complete selreset.
 */
-   if (!boguscount)
+   if (!boguscount  lp-eepro == LAN595FX_10ISA)
eepro_complete_selreset(ioaddr);
 }

[PATCH][2.2] eepro 0.12d

2001-01-08 Thread aris


ok,
as i don't have documentation this is the right thing to be
done: restore the default path for old cards and keep the new one to these
blue cards. i hope this finally fixes all problems that my changes (by
guesses and lot of dosemu) introduced on a stable driver.
if it doesn't work for you please email me. i won't rest until i restore
the stability of the old boards and make the blue one supported.

P.S.: (again) if you have any documentation about these boards please send
to me. it will make my work easy and more decent than guesses by sniffing
io.
---
aristeu sergio rozanski filho | www.cathedrallabs.org/~aris
[EMAIL PROTECTED]| [EMAIL PROTECTED] 
---


--- linux/drivers/net/eepro.c.old   Wed Dec 27 21:44:29 2000
+++ linux/drivers/net/eepro.c   Wed Dec 27 22:17:54 2000
@@ -23,6 +23,8 @@
This is a compatibility hardware problem.
 
Versions:
+   0.12d   tottaly isolated old code to new code (blue cards).
+   (aris, 12/27/2000)
0.12c   fixed other multiple cards bug and other cleanups
(aris, 08/21/2000)
0.12b   added reset when the tx interrupt is called and TX isn't done
@@ -103,7 +105,7 @@
 */
 
 static const char *version =
-   "eepro.c: v0.12b 06/20/2000 [EMAIL PROTECTED]\n";
+   "eepro.c: v0.12d 12/27/2000 [EMAIL PROTECTED]\n";
 
 #include linux/module.h
 
@@ -149,47 +151,28 @@
 #include linux/etherdevice.h
 #include linux/skbuff.h
 
-
 #include linux/version.h
 
-/* For linux 2.1.xx */
-#if defined (LINUX_VERSION_CODE)  LINUX_VERSION_CODE  0x20155
-
 #include asm/spinlock.h
 #include linux/init.h
 #include linux/delay.h
 
-#define compat_dev_kfree_skb( skb, mode ) dev_kfree_skb( (skb) )
 /* I had reports of looong delays with SLOW_DOWN defined as udelay(2) */
 #define SLOW_DOWN inb(0x80)
-/* udelay(2) */
-#define compat_init_func(X)  __initfunc(X)
-#define compat_init_data __initdata
-
-#else 
-/* for 2.x */
-
-#define compat_dev_kfree_skb( skb, mode ) dev_kfree_skb( (skb), (mode) )
-#define test_and_set_bit(a,b) set_bit((a),(b))
-#define SLOW_DOWN SLOW_DOWN_IO
-#define compat_init_func(X) X
-#define compat_init_data
-
-#endif
 
-
-/* First, a few definitions that the brave might change. */
-/* A zero-terminated list of I/O addresses to be probed. */
-static unsigned int eepro_portlist[] compat_init_data =
+/* First, a few definitions that the brave might change.
+ * A zero-terminated list of I/O addresses to be probed.
+ * note: 0x300 is default, the 595FX supports ALL IO Ports 
+ * from 0x000 to 0x3F0, some of which are reserved in PCs
+ */
+static unsigned eepro_portlist[] __initdata =
{ 0x300, 0x210, 0x240, 0x280, 0x2C0, 0x200, 0x320, 0x340, 0x360, 0};
-/* note: 0x300 is default, the 595FX supports ALL IO Ports 
-  from 0x000 to 0x3F0, some of which are reserved in PCs */
 
 /* use 0 for production, 1 for verification, 2 for debug */
 #ifndef NET_DEBUG
 #define NET_DEBUG 0
 #endif
-static unsigned int net_debug = NET_DEBUG;
+static unsigned net_debug = NET_DEBUG;
 
 /* The number of low I/O ports used by the ethercard. */
 #define EEPRO_IO_EXTENT16
@@ -204,16 +187,17 @@
 struct eepro_local {
struct enet_statistics stats;
unsigned rx_start;
-   unsigned tx_start; /* start of the transmit chain */
-   int tx_last;  /* pointer to last packet in the transmit chain */
-   unsigned tx_end;   /* end of the transmit chain (plus 1) */
-   int eepro;  /* 1 for the EtherExpress Pro/10,
-  2 for the EtherExpress Pro/10+,
-  0 for other 82595-based lan cards. */
-   int version;/* a flag to indicate if this is a TX or FX
-  version of the 82595 chip. */
+   unsigned tx_start;  /* start of the transmit chain */
+   int tx_last;/* pointer to last packet in the transmit chain */
+   unsigned tx_end;/* end of the transmit chain (plus 1) */
+   int eepro;  /* 1 for the EtherExpress Pro/10,
+* 2 for the EtherExpress Pro/10+,
+* 3 for the blue cards,
+* 0 for other 82595-based lan cards. */
+   int version;/* a flag to indicate if this is a TX or FX
+* version of the 82595 chip. */
int stepping;
-   spinlock_t lock; /* Serializing lock  */ 
+   spinlock_t lock;/* Serializing lock  */ 
unsigned rcv_ram;
unsigned rcv_start;
unsigned xmt_bar;
@@ -322,7 +306,7 @@
 static void set_multicast_list(struct device *dev);
 
 static int read_eeprom(int ioaddr, int location, struct device

[PATCH] remove warnings from drivers/net/eepro.c (240-test12-pre7)(fwd)

2000-12-19 Thread aris


hi,
as my patches for eepro are getting late, here is a patch from
Rasmus that i would apply with mine, please apply

-- 
Aris
---
Aristeu Sergio Rozanski Filho [EMAIL PROTECTED]
---

--

--- linux-240-t12-pre7-clean/drivers/net/eepro.cFri Dec  8 00:44:58 2000
+++ linux/drivers/net/eepro.c   Fri Dec  8 21:02:50 2000
@@ -1727,6 +1727,8 @@
eepro_complete_selreset(ioaddr);
 }
 
+#ifdef MODULE
+
 #define MAX_EEPRO 8
 static struct net_device dev_eepro[MAX_EEPRO];
 
@@ -1737,7 +1739,7 @@
 };
 static int autodetect;
 
-static int n_eepro = 0;
+static int n_eepro;
 /* For linux 2.1.xx */
 
 MODULE_AUTHOR("Pascal Dupuis <[EMAIL PROTECTED]> for the 2.1 stuff (locking,...)");
@@ -1746,8 +1748,6 @@
 MODULE_PARM(irq, "1-" __MODULE_STRING(MAX_EEPRO) "i");
 MODULE_PARM(mem, "1-" __MODULE_STRING(MAX_EEPRO) "i");
 MODULE_PARM(autodetect, "1-" __MODULE_STRING(1) "i");
-
-#ifdef MODULE
 
 int 
 init_module(void)


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
Please read the FAQ at http://www.tux.org/lkml/

[PATCH] remove warnings from drivers/net/eepro.c (240-test12-pre7)(fwd)

2000-12-19 Thread aris


hi,
as my patches for eepro are getting late, here is a patch from
Rasmus that i would apply with mine, please apply

-- 
Aris
---
Aristeu Sergio Rozanski Filho [EMAIL PROTECTED]
---

--

--- linux-240-t12-pre7-clean/drivers/net/eepro.cFri Dec  8 00:44:58 2000
+++ linux/drivers/net/eepro.c   Fri Dec  8 21:02:50 2000
@@ -1727,6 +1727,8 @@
eepro_complete_selreset(ioaddr);
 }
 
+#ifdef MODULE
+
 #define MAX_EEPRO 8
 static struct net_device dev_eepro[MAX_EEPRO];
 
@@ -1737,7 +1739,7 @@
 };
 static int autodetect;
 
-static int n_eepro = 0;
+static int n_eepro;
 /* For linux 2.1.xx */
 
 MODULE_AUTHOR("Pascal Dupuis [EMAIL PROTECTED] for the 2.1 stuff (locking,...)");
@@ -1746,8 +1748,6 @@
 MODULE_PARM(irq, "1-" __MODULE_STRING(MAX_EEPRO) "i");
 MODULE_PARM(mem, "1-" __MODULE_STRING(MAX_EEPRO) "i");
 MODULE_PARM(autodetect, "1-" __MODULE_STRING(1) "i");
-
-#ifdef MODULE
 
 int 
 init_module(void)


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
Please read the FAQ at http://www.tux.org/lkml/

Re: [PATCH] eepro 0.12c

2000-09-26 Thread aris


hi,
done, thanks Jes!

On 26 Sep 2000, Jes Sorensen wrote:

> >>>>> "aris" == aris  <[EMAIL PROTECTED]> writes:
> 
> aris> hi, misc fixes on eepro driver, please apply
> 
> aris> @@ -212,6 +214,12 @@
> aris>    version of the 82595 chip. */
> aris>   int stepping;
> aris>   spinlock_t lock; /* Serializing lock  */ 
> aris> +     unsigned rcv_ram;
> aris> + unsigned rcv_start;
> aris> +   unsigned xmt_bar;
> aris> +     unsigned xmt_lower_limit_reg;
> aris> + unsigned xmt_upper_limit_reg;
> aris> +   unsigned eeprom_reg;
> aris>  };
> 
> Please don't use unsigned without specifying the size, use either
> unsigned int or unsigned long.
> 
> Cheers,
> Jes
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> Please read the FAQ at http://www.tux.org/lkml/
> 

-- 
Aris
---
Aristeu Sergio Rozanski Filho [EMAIL PROTECTED]
---


--- linux/drivers/net/eepro.c.old   Tue Sep 12 15:41:50 2000
+++ linux/drivers/net/eepro.c   Tue Sep 26 12:46:27 2000
@@ -23,6 +23,8 @@
    This is a compatibility hardware problem.
 
Versions:
+   0.12c   fixed other multiple cards bug and other cleanups
+   (aris, 08/21/2000)
0.12b   added reset when the tx interrupt is called and TX isn't done
and other minor fixes. this may fix a problem found after
initialization that delays tx until a transmit timeout is 
@@ -201,10 +203,10 @@
 /* Information that need to be kept for each board. */
 struct eepro_local {
struct enet_statistics stats;
-   unsigned rx_start;
-   unsigned tx_start; /* start of the transmit chain */
+   unsigned int rx_start;
+   unsigned int tx_start; /* start of the transmit chain */
int tx_last;  /* pointer to last packet in the transmit chain */
-   unsigned tx_end;   /* end of the transmit chain (plus 1) */
+   unsigned int tx_end;   /* end of the transmit chain (plus 1) */
int eepro;  /* 1 for the EtherExpress Pro/10,
   2 for the EtherExpress Pro/10+,
   0 for other 82595-based lan cards. */
@@ -212,6 +214,12 @@
   version of the 82595 chip. */
int stepping;
spinlock_t lock; /* Serializing lock  */ 
+   unsigned int rcv_ram;
+   unsigned int rcv_start;
+   unsigned int xmt_bar;
+   unsigned int xmt_lower_limit_reg;
+   unsigned int xmt_upper_limit_reg;
+   unsigned int eeprom_reg;
 };
 
 /* The station (ethernet) address prefix, used for IDing the board. */
@@ -356,24 +364,20 @@
 
 #defineRCV_HEADER  8
 #define RCV_DEFAULT_RAM0x6000
-#define RCV_RAMrcv_ram
-
-static unsigned rcv_ram = RCV_DEFAULT_RAM;
+#define RCV_RAMlp->rcv_ram
 
 #define XMT_HEADER 8
 #define XMT_RAM(RAM_SIZE - RCV_RAM)
 
-#define XMT_START  ((rcv_start + RCV_RAM) % RAM_SIZE)
+#define XMT_START  ((lp->rcv_start + RCV_RAM) % RAM_SIZE)
 
-#define RCV_LOWER_LIMIT(rcv_start >> 8)
-#define RCV_UPPER_LIMIT(((rcv_start + RCV_RAM) - 2) >> 8)
+#define RCV_LOWER_LIMIT(lp->rcv_start >> 8)
+#define RCV_UPPER_LIMIT(((lp->rcv_start + RCV_RAM) - 2) >> 8)
 #define XMT_LOWER_LIMIT(XMT_START >> 8)
 #define XMT_UPPER_LIMIT(((XMT_START + XMT_RAM) - 2) >> 8)
 
 #define RCV_START_PRO  0x00
 #define RCV_START_10   XMT_RAM
-   /* by default the old driver */
-static unsigned rcv_start = RCV_START_PRO;
 
 #defineRCV_DONE0x0008
 #defineRX_OK   0x2000
@@ -422,7 +426,6 @@
 
 #defineXMT_BAR_PRO 0x0a
 #defineXMT_BAR_10  0x0b
-static unsigned xmt_bar = XMT_BAR_PRO;
 
 #defineHOST_ADDRESS_REG0x0c
 #defineIO_PORT 0x0e
@@ -440,8 +443,6 @@
 #defineXMT_UPPER_LIMIT_REG_PRO 0x0b
 #defineXMT_LOWER_LIMIT_REG_10  0x0b
 #defineXMT_UPPER_LIMIT_REG_10  0x0a
-static unsigned xmt_lower_limit_reg = XMT_LOWER_LIMIT_REG_PRO;
-static unsigned xmt_upper_limit_reg = XMT_UPPER_LIMIT_REG_PRO;
 
 /* Bank 2 registers */
 #defineXMT_Chain_Int   0x20/* Interrupt at the end of the transmit chain 
*/
@@ -466,7 +467,6 @@
 
 #define EEPROM_REG_PRO 0x0a
 #define EEPROM_REG_10  0x0b
-static unsign

[PATCH] eepro 0.12c

2000-09-26 Thread aris


hi,
misc fixes on eepro driver, please apply

-- 
Aris
---
Aristeu Sergio Rozanski Filho [EMAIL PROTECTED]
---


--- linux/drivers/net/eepro.c.old   Tue Sep 12 15:41:50 2000
+++ linux/drivers/net/eepro.c   Wed Sep 13 15:28:04 2000
@@ -23,6 +23,8 @@
This is a compatibility hardware problem.
 
Versions:
+   0.12c   fixed other multiple cards bug and other cleanups
+   (aris, 08/21/2000)
0.12b   added reset when the tx interrupt is called and TX isn't done
and other minor fixes. this may fix a problem found after
initialization that delays tx until a transmit timeout is 
@@ -212,6 +214,12 @@
   version of the 82595 chip. */
int stepping;
spinlock_t lock; /* Serializing lock  */ 
+   unsigned rcv_ram;
+   unsigned rcv_start;
+   unsigned xmt_bar;
+   unsigned xmt_lower_limit_reg;
+   unsigned xmt_upper_limit_reg;
+   unsigned eeprom_reg;
 };
 
 /* The station (ethernet) address prefix, used for IDing the board. */
@@ -356,24 +364,20 @@
 
 #defineRCV_HEADER  8
 #define RCV_DEFAULT_RAM0x6000
-#define RCV_RAMrcv_ram
-
-static unsigned rcv_ram = RCV_DEFAULT_RAM;
+#define RCV_RAMlp->rcv_ram
 
 #define XMT_HEADER 8
 #define XMT_RAM(RAM_SIZE - RCV_RAM)
 
-#define XMT_START  ((rcv_start + RCV_RAM) % RAM_SIZE)
+#define XMT_START  ((lp->rcv_start + RCV_RAM) % RAM_SIZE)
 
-#define RCV_LOWER_LIMIT(rcv_start >> 8)
-#define RCV_UPPER_LIMIT(((rcv_start + RCV_RAM) - 2) >> 8)
+#define RCV_LOWER_LIMIT(lp->rcv_start >> 8)
+#define RCV_UPPER_LIMIT(((lp->rcv_start + RCV_RAM) - 2) >> 8)
 #define XMT_LOWER_LIMIT(XMT_START >> 8)
 #define XMT_UPPER_LIMIT(((XMT_START + XMT_RAM) - 2) >> 8)
 
 #define RCV_START_PRO  0x00
 #define RCV_START_10   XMT_RAM
-   /* by default the old driver */
-static unsigned rcv_start = RCV_START_PRO;
 
 #defineRCV_DONE0x0008
 #defineRX_OK   0x2000
@@ -422,7 +426,6 @@
 
 #defineXMT_BAR_PRO 0x0a
 #defineXMT_BAR_10  0x0b
-static unsigned xmt_bar = XMT_BAR_PRO;
 
 #defineHOST_ADDRESS_REG0x0c
 #defineIO_PORT 0x0e
@@ -440,8 +443,6 @@
 #defineXMT_UPPER_LIMIT_REG_PRO 0x0b
 #defineXMT_LOWER_LIMIT_REG_10  0x0b
 #defineXMT_UPPER_LIMIT_REG_10  0x0a
-static unsigned xmt_lower_limit_reg = XMT_LOWER_LIMIT_REG_PRO;
-static unsigned xmt_upper_limit_reg = XMT_UPPER_LIMIT_REG_PRO;
 
 /* Bank 2 registers */
 #defineXMT_Chain_Int   0x20/* Interrupt at the end of the transmit chain 
*/
@@ -466,7 +467,6 @@
 
 #define EEPROM_REG_PRO 0x0a
 #define EEPROM_REG_10  0x0b
-static unsigned eeprom_reg = EEPROM_REG_PRO;
 
 #define EESK 0x01
 #define EECS 0x02
@@ -528,7 +528,8 @@
 #define eepro_ack_tx(ioaddr) outb (TX_INT, ioaddr + STATUS_REG)
 
 /* a complete sel reset */
-#define eepro_complete_selreset(ioaddr) {  eepro_dis_int(ioaddr);\
+#define eepro_complete_selreset(ioaddr) {  \
+   /* eepro_dis_int(ioaddr); */ \
lp->stats.tx_errors++;\
eepro_sel_reset(ioaddr);\
lp->tx_end = \
@@ -537,7 +538,7 @@
lp->tx_last = 0;\
dev->tbusy=0;\
dev->trans_start = jiffies;\
-   eepro_en_int(ioaddr);\
+   /*eepro_en_int(ioaddr); */ \
eepro_en_rx(ioaddr);\
}
 
@@ -670,7 +671,15 @@
 
lp = (struct eepro_local *)dev->priv;
 
-   /* Now, get the ethernet hardware address from
+   /* default values */
+   lp->rcv_start = RCV_START_PRO;
+   lp->xmt_bar = XMT_BAR_PRO;
+   lp->xmt_lower_limit_reg = XMT_LOWER_LIMIT_REG_PRO;
+   lp->xmt_upper_limit_reg = XMT_UPPER_LIMIT_REG_PRO;
+   lp->eeprom_reg = EEPROM_REG_PRO;
+   lp->rcv_ram = RCV_DEFAULT_RAM;
+
+   /* Now, get the ethernet hardware address from

Re: [PATCH] eepro 0.12c

2000-09-26 Thread aris


hi,
done, thanks Jes!

On 26 Sep 2000, Jes Sorensen wrote:

  "aris" == aris  [EMAIL PROTECTED] writes:
 
 aris hi, misc fixes on eepro driver, please apply
 
 aris @@ -212,6 +214,12 @@
 arisversion of the 82595 chip. */
 aris   int stepping;
 aris   spinlock_t lock; /* Serializing lock  */ 
 aris + unsigned rcv_ram;
 aris + unsigned rcv_start;
 aris +   unsigned xmt_bar;
 aris + unsigned xmt_lower_limit_reg;
 aris + unsigned xmt_upper_limit_reg;
 aris +   unsigned eeprom_reg;
 aris  };
 
 Please don't use unsigned without specifying the size, use either
 unsigned int or unsigned long.
 
 Cheers,
 Jes
 -
 To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
 the body of a message to [EMAIL PROTECTED]
 Please read the FAQ at http://www.tux.org/lkml/
 

-- 
Aris
---
Aristeu Sergio Rozanski Filho [EMAIL PROTECTED]
---


--- linux/drivers/net/eepro.c.old   Tue Sep 12 15:41:50 2000
+++ linux/drivers/net/eepro.c   Tue Sep 26 12:46:27 2000
@@ -23,6 +23,8 @@
This is a compatibility hardware problem.
 
Versions:
+   0.12c   fixed other multiple cards bug and other cleanups
+   (aris, 08/21/2000)
0.12b   added reset when the tx interrupt is called and TX isn't done
and other minor fixes. this may fix a problem found after
initialization that delays tx until a transmit timeout is 
@@ -201,10 +203,10 @@
 /* Information that need to be kept for each board. */
 struct eepro_local {
struct enet_statistics stats;
-   unsigned rx_start;
-   unsigned tx_start; /* start of the transmit chain */
+   unsigned int rx_start;
+   unsigned int tx_start; /* start of the transmit chain */
int tx_last;  /* pointer to last packet in the transmit chain */
-   unsigned tx_end;   /* end of the transmit chain (plus 1) */
+   unsigned int tx_end;   /* end of the transmit chain (plus 1) */
int eepro;  /* 1 for the EtherExpress Pro/10,
   2 for the EtherExpress Pro/10+,
   0 for other 82595-based lan cards. */
@@ -212,6 +214,12 @@
   version of the 82595 chip. */
int stepping;
spinlock_t lock; /* Serializing lock  */ 
+   unsigned int rcv_ram;
+   unsigned int rcv_start;
+   unsigned int xmt_bar;
+   unsigned int xmt_lower_limit_reg;
+   unsigned int xmt_upper_limit_reg;
+   unsigned int eeprom_reg;
 };
 
 /* The station (ethernet) address prefix, used for IDing the board. */
@@ -356,24 +364,20 @@
 
 #defineRCV_HEADER  8
 #define RCV_DEFAULT_RAM0x6000
-#define RCV_RAMrcv_ram
-
-static unsigned rcv_ram = RCV_DEFAULT_RAM;
+#define RCV_RAMlp-rcv_ram
 
 #define XMT_HEADER 8
 #define XMT_RAM(RAM_SIZE - RCV_RAM)
 
-#define XMT_START  ((rcv_start + RCV_RAM) % RAM_SIZE)
+#define XMT_START  ((lp-rcv_start + RCV_RAM) % RAM_SIZE)
 
-#define RCV_LOWER_LIMIT(rcv_start  8)
-#define RCV_UPPER_LIMIT(((rcv_start + RCV_RAM) - 2)  8)
+#define RCV_LOWER_LIMIT(lp-rcv_start  8)
+#define RCV_UPPER_LIMIT(((lp-rcv_start + RCV_RAM) - 2)  8)
 #define XMT_LOWER_LIMIT(XMT_START  8)
 #define XMT_UPPER_LIMIT(((XMT_START + XMT_RAM) - 2)  8)
 
 #define RCV_START_PRO  0x00
 #define RCV_START_10   XMT_RAM
-   /* by default the old driver */
-static unsigned rcv_start = RCV_START_PRO;
 
 #defineRCV_DONE0x0008
 #defineRX_OK   0x2000
@@ -422,7 +426,6 @@
 
 #defineXMT_BAR_PRO 0x0a
 #defineXMT_BAR_10  0x0b
-static unsigned xmt_bar = XMT_BAR_PRO;
 
 #defineHOST_ADDRESS_REG0x0c
 #defineIO_PORT 0x0e
@@ -440,8 +443,6 @@
 #defineXMT_UPPER_LIMIT_REG_PRO 0x0b
 #defineXMT_LOWER_LIMIT_REG_10  0x0b
 #defineXMT_UPPER_LIMIT_REG_10  0x0a
-static unsigned xmt_lower_limit_reg = XMT_LOWER_LIMIT_REG_PRO;
-static unsigned xmt_upper_limit_reg = XMT_UPPER_LIMIT_REG_PRO;
 
 /* Bank 2 registers */
 #defineXMT_Chain_Int   0x20/* Interrupt at the end of the transmit chain 
*/
@@ -466,7 +467,6 @@
 
 #define EEPROM_REG_PRO 0x0a
 #define EEPROM_REG_10  0x0b
-static unsigned eeprom_reg = EEPROM_REG_PRO;
 
 #define EESK 0x01
 #define EECS 0x02
@@ -528,7 +528,8 @@
 #define eepro_ack_tx(ioaddr) outb (TX_INT, ioaddr + STATUS_REG)
 
 /* a complete sel reset */
-#define eepro_complete_selreset(ioaddr) {  eepro_dis_int

[TEST] patch for eepro driver over 2.2.17

2000-09-13 Thread aris


hi,
some people reported problems in eepro boards with 2.2.17
driver. please apply this patch over 2.2.17 version.
warning: this is a _test_ patch! i've tested with etherexpress 10 (the
only board supported by this driver that i have here - donations are very
welcome ;)) and it works very well under heavy load.
for interested people: test it and tell me what you think.
thanks,

-- 
Aris
---
Aristeu Sergio Rozanski Filho [EMAIL PROTECTED]
---


--- linux/drivers/net/eepro.c.old   Tue Sep 12 15:41:50 2000
+++ linux/drivers/net/eepro.c   Wed Sep 13 12:00:49 2000
@@ -212,6 +214,12 @@
   version of the 82595 chip. */
int stepping;
spinlock_t lock; /* Serializing lock  */ 
+   unsigned rcv_ram;
+   unsigned rcv_start;
+   unsigned xmt_bar;
+   unsigned xmt_lower_limit_reg;
+   unsigned xmt_upper_limit_reg;
+   unsigned eeprom_reg;
 };
 
 /* The station (ethernet) address prefix, used for IDing the board. */
@@ -356,24 +364,20 @@
 
 #defineRCV_HEADER  8
 #define RCV_DEFAULT_RAM0x6000
-#define RCV_RAMrcv_ram
-
-static unsigned rcv_ram = RCV_DEFAULT_RAM;
+#define RCV_RAMlp->rcv_ram
 
 #define XMT_HEADER 8
 #define XMT_RAM(RAM_SIZE - RCV_RAM)
 
-#define XMT_START  ((rcv_start + RCV_RAM) % RAM_SIZE)
+#define XMT_START  ((lp->rcv_start + RCV_RAM) % RAM_SIZE)
 
-#define RCV_LOWER_LIMIT(rcv_start >> 8)
-#define RCV_UPPER_LIMIT(((rcv_start + RCV_RAM) - 2) >> 8)
+#define RCV_LOWER_LIMIT(lp->rcv_start >> 8)
+#define RCV_UPPER_LIMIT(((lp->rcv_start + RCV_RAM) - 2) >> 8)
 #define XMT_LOWER_LIMIT(XMT_START >> 8)
 #define XMT_UPPER_LIMIT(((XMT_START + XMT_RAM) - 2) >> 8)
 
 #define RCV_START_PRO  0x00
 #define RCV_START_10   XMT_RAM
-   /* by default the old driver */
-static unsigned rcv_start = RCV_START_PRO;
 
 #defineRCV_DONE0x0008
 #defineRX_OK   0x2000
@@ -422,7 +426,6 @@
 
 #defineXMT_BAR_PRO 0x0a
 #defineXMT_BAR_10  0x0b
-static unsigned xmt_bar = XMT_BAR_PRO;
 
 #defineHOST_ADDRESS_REG0x0c
 #defineIO_PORT 0x0e
@@ -440,8 +443,6 @@
 #defineXMT_UPPER_LIMIT_REG_PRO 0x0b
 #defineXMT_LOWER_LIMIT_REG_10  0x0b
 #defineXMT_UPPER_LIMIT_REG_10  0x0a
-static unsigned xmt_lower_limit_reg = XMT_LOWER_LIMIT_REG_PRO;
-static unsigned xmt_upper_limit_reg = XMT_UPPER_LIMIT_REG_PRO;
 
 /* Bank 2 registers */
 #defineXMT_Chain_Int   0x20/* Interrupt at the end of the transmit chain 
*/
@@ -466,7 +467,6 @@
 
 #define EEPROM_REG_PRO 0x0a
 #define EEPROM_REG_10  0x0b
-static unsigned eeprom_reg = EEPROM_REG_PRO;
 
 #define EESK 0x01
 #define EECS 0x02
@@ -528,7 +528,8 @@
 #define eepro_ack_tx(ioaddr) outb (TX_INT, ioaddr + STATUS_REG)
 
 /* a complete sel reset */
-#define eepro_complete_selreset(ioaddr) {  eepro_dis_int(ioaddr);\
+#define eepro_complete_selreset(ioaddr) {  \
+   /* eepro_dis_int(ioaddr); */ \
lp->stats.tx_errors++;\
eepro_sel_reset(ioaddr);\
lp->tx_end = \
@@ -537,7 +538,7 @@
lp->tx_last = 0;\
dev->tbusy=0;\
dev->trans_start = jiffies;\
-   eepro_en_int(ioaddr);\
+   /*eepro_en_int(ioaddr); */ \
eepro_en_rx(ioaddr);\
}
 
@@ -670,7 +671,15 @@
 
lp = (struct eepro_local *)dev->priv;
 
-   /* Now, get the ethernet hardware address from
+   /* default values */
+   lp->rcv_start = RCV_START_PRO;
+   lp->xmt_bar = XMT_BAR_PRO;
+   lp->xmt_lower_limit_reg = XMT_LOWER_LIMIT_REG_PRO;
+   lp->xmt_upper_limit_reg = XMT_UPPER_LIMIT_REG_PRO;
+   lp->eeprom_reg = EEPROM_REG_PRO;
+   lp->rcv_ram = RCV_DEFAULT_RAM;
+
+   /* Now, get the ethernet hardware address from
   the EEPROM */
 
station_addr[0] = read_eeprom(ioaddr, 2,

Re: EEPRO Problems in 2.2.17 (sorry!)

2000-09-11 Thread aris


hi,
i'm working on this

On Wed, 6 Sep 2000 [EMAIL PROTECTED] wrote:

> 
> Well,
> I thought the problems with the eepro driver from 2.2.16 were fixed in
> 2.2.17.  Apparently the problems really weren't fixed - it did seem to get
> more stable though.
> 
> I was copying some large over a NFS mount and when it got to about 6 megs,
> the NFS mount hung with symptoms similar to the 2.2.16 problems I had.
> Which was: no errors in the messages log, a hung NFS mount and reloading
> the driver module clears the problem up.
> 
> So I started doing some tests with ping to see how long it would stay up.
> During those tests, I noticed some wierd DUP packets:
> 
> 64 bytes from 1.2.3.4: icmp_seq=28 ttl=255 time=0.4 ms
> 64 bytes from 1.2.3.4: icmp_seq=18 ttl=255 time=10894.5 ms (DUP!)
> 64 bytes from 1.2.3.4: icmp_seq=29 ttl=255 time=0.4 ms
> 
> They were all between 10-20 behind the icmp_seq and had at least a 6000ms
> time.
> 
> In the middle of a 40meg nfs copy, the driver hung at least 4 times - but
> at fairly random intervals.  Sometimes it will run for a long time -
> sometimes it locks up twice in a row fairly fast.
> 
> With the 2.2.14 and previous drivers I did not once notice an error with
> this card.  The only msg/error I see in the messages log is this:
> eth1: set Rx mode to 1 address.
> 
> But I've got that message for years now with the eepro driver.
> 
> If anyone familiar with this driver wishes to work with me in an attempt
> to reproduce this problem, I'd be happy to help.
> 
> Thanks,
> Brian Hayward
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> Please read the FAQ at http://www.tux.org/lkml/
> 

-- 
Aris
---
Aristeu Sergio Rozanski Filho [EMAIL PROTECTED]
---

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
Please read the FAQ at http://www.tux.org/lkml/

Re: EEPRO Problems in 2.2.17 (sorry!)

2000-09-11 Thread aris


hi,
i'm working on this

On Wed, 6 Sep 2000 [EMAIL PROTECTED] wrote:

 
 Well,
 I thought the problems with the eepro driver from 2.2.16 were fixed in
 2.2.17.  Apparently the problems really weren't fixed - it did seem to get
 more stable though.
 
 I was copying some large over a NFS mount and when it got to about 6 megs,
 the NFS mount hung with symptoms similar to the 2.2.16 problems I had.
 Which was: no errors in the messages log, a hung NFS mount and reloading
 the driver module clears the problem up.
 
 So I started doing some tests with ping to see how long it would stay up.
 During those tests, I noticed some wierd DUP packets:
 
 64 bytes from 1.2.3.4: icmp_seq=28 ttl=255 time=0.4 ms
 64 bytes from 1.2.3.4: icmp_seq=18 ttl=255 time=10894.5 ms (DUP!)
 64 bytes from 1.2.3.4: icmp_seq=29 ttl=255 time=0.4 ms
 
 They were all between 10-20 behind the icmp_seq and had at least a 6000ms
 time.
 
 In the middle of a 40meg nfs copy, the driver hung at least 4 times - but
 at fairly random intervals.  Sometimes it will run for a long time -
 sometimes it locks up twice in a row fairly fast.
 
 With the 2.2.14 and previous drivers I did not once notice an error with
 this card.  The only msg/error I see in the messages log is this:
 eth1: set Rx mode to 1 address.
 
 But I've got that message for years now with the eepro driver.
 
 If anyone familiar with this driver wishes to work with me in an attempt
 to reproduce this problem, I'd be happy to help.
 
 Thanks,
 Brian Hayward
 
 -
 To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
 the body of a message to [EMAIL PROTECTED]
 Please read the FAQ at http://www.tux.org/lkml/
 

-- 
    Aris
---
Aristeu Sergio Rozanski Filho [EMAIL PROTECTED]
---

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
Please read the FAQ at http://www.tux.org/lkml/

94 matches

Mail list logo