[PATCH v4 2/9] devcg: reorder device exception functions
In preparation for the next patch, reorder dev_exception_add() and dev_exception_rm(). This patch doesn't introduce any functional changes. Acked-by: Tejun Heo Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 44 ++-- 1 file changed, 22 insertions(+), 22 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:14.739657516 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:14.987661210 -0500 @@ -104,18 +104,14 @@ free_and_exit: /* * called under devcgroup_mutex */ -static int dev_exception_add(struct list_head *exceptions, +static void dev_exception_rm(struct list_head *exceptions, struct dev_exception_item *ex) { - struct dev_exception_item *excopy, *walk; + struct dev_exception_item *walk, *tmp; lockdep_assert_held(_mutex); - excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL); - if (!excopy) - return -ENOMEM; - - list_for_each_entry(walk, exceptions, list) { + list_for_each_entry_safe(walk, tmp, exceptions, list) { if (walk->type != ex->type) continue; if (walk->major != ex->major) @@ -123,27 +119,29 @@ static int dev_exception_add(struct list if (walk->minor != ex->minor) continue; - walk->access |= ex->access; - kfree(excopy); - excopy = NULL; + walk->access &= ~ex->access; + if (!walk->access) { + list_del_rcu(>list); + kfree_rcu(walk, rcu); + } } - - if (excopy != NULL) - list_add_tail_rcu(>list, exceptions); - return 0; } /* * called under devcgroup_mutex */ -static void dev_exception_rm(struct list_head *exceptions, +static int dev_exception_add(struct list_head *exceptions, struct dev_exception_item *ex) { - struct dev_exception_item *walk, *tmp; + struct dev_exception_item *excopy, *walk; lockdep_assert_held(_mutex); - list_for_each_entry_safe(walk, tmp, exceptions, list) { + excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL); + if (!excopy) + return -ENOMEM; + + list_for_each_entry(walk, exceptions, list) { if (walk->type != ex->type) continue; if (walk->major != ex->major) @@ -151,12 +149,14 @@ static void dev_exception_rm(struct list if (walk->minor != ex->minor) continue; - walk->access &= ~ex->access; - if (!walk->access) { - list_del_rcu(>list); - kfree_rcu(walk, rcu); - } + walk->access |= ex->access; + kfree(excopy); + excopy = NULL; } + + if (excopy != NULL) + list_add_tail_rcu(>list, exceptions); + return 0; } static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 8/9] devcg: refactor dev_exception_clean()
This patch is in preparation for hierarchy support. This patch doesn't introduce any functional changes. Acked-by: Tejun Heo Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 34 -- 1 file changed, 16 insertions(+), 18 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:16.374681863 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:16.653686016 -0500 @@ -212,32 +212,33 @@ static int dev_exception_add(struct dev_ return rc; } -static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) +static void dev_exception_clean(struct list_head *exceptions) { struct dev_exception_item *ex, *tmp; - list_for_each_entry_safe(ex, tmp, _cgroup->exceptions, list) { - list_del_rcu(>list); - kfree_rcu(ex, rcu); - } - list_for_each_entry_safe(ex, tmp, _cgroup->local.exceptions, -list) { + list_for_each_entry_safe(ex, tmp, exceptions, list) { list_del_rcu(>list); kfree_rcu(ex, rcu); } } +static void __dev_exception_clean_all(struct dev_cgroup *dev_cgroup) +{ + dev_exception_clean(_cgroup->exceptions); + dev_exception_clean(_cgroup->local.exceptions); +} + /** - * dev_exception_clean - frees all entries of the exception list + * dev_exception_clean_all - frees all entries of the exception list * @dev_cgroup: dev_cgroup with the exception list to be cleaned * * called under devcgroup_mutex */ -static void dev_exception_clean(struct dev_cgroup *dev_cgroup) +static void dev_exception_clean_all(struct dev_cgroup *dev_cgroup) { lockdep_assert_held(_mutex); - __dev_exception_clean(dev_cgroup); + __dev_exception_clean_all(dev_cgroup); } /** @@ -303,7 +304,7 @@ static void devcgroup_css_free(struct cg struct dev_cgroup *dev_cgroup; dev_cgroup = cgroup_to_devcgroup(cgroup); - __dev_exception_clean(dev_cgroup); + __dev_exception_clean_all(dev_cgroup); kfree(dev_cgroup); } @@ -508,25 +509,22 @@ memset(, 0, sizeof(ex)); case DEVCG_ALLOW: if (!may_allow_all(parent)) return -EPERM; - dev_exception_clean(devcgroup); + dev_exception_clean_all(devcgroup); if (parent) rc = dev_exceptions_copy(>exceptions, >exceptions); devcgroup->behavior = DEVCG_DEFAULT_ALLOW; devcgroup->local.behavior = DEVCG_DEFAULT_ALLOW; - - if (rc) - return rc; break; case DEVCG_DENY: - dev_exception_clean(devcgroup); + dev_exception_clean_all(devcgroup); devcgroup->behavior = DEVCG_DEFAULT_DENY; devcgroup->local.behavior = DEVCG_DEFAULT_DENY; break; default: - return -EINVAL; + rc = -EINVAL; } - return 0; + return rc; case 'b': ex.type = DEV_BLOCK; break; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 3/9] device_cgroup: keep track of local group settings
In preparation for better hierarchy support, it's needed to retain the local settings in order to try to reapply them after a propagated change if they're still valid. v2: split this patch in two, one to just move dev_exception_rm() before dev_exception_add() while keeping functional changes in this patch as requested by Tejun. Acked-by: Tejun Heo Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 83 +-- 1 file changed, 67 insertions(+), 16 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:14.987661210 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:15.244665037 -0500 @@ -39,13 +39,27 @@ struct dev_exception_item { struct rcu_head rcu; }; +enum devcg_behavior { + DEVCG_DEFAULT_NONE, + DEVCG_DEFAULT_ALLOW, + DEVCG_DEFAULT_DENY, +}; + struct dev_cgroup { struct cgroup_subsys_state css; + + /* result of merging the parent's rules with local ones */ struct list_head exceptions; - enum { - DEVCG_DEFAULT_ALLOW, - DEVCG_DEFAULT_DENY, - } behavior; + enum devcg_behavior behavior; + + /* +* local set rules, saved so when a parent propagates new rules, the +* local preferences can be preserved +*/ + struct { + struct list_head exceptions; + enum devcg_behavior behavior; + } local; }; static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) @@ -104,8 +118,8 @@ free_and_exit: /* * called under devcgroup_mutex */ -static void dev_exception_rm(struct list_head *exceptions, -struct dev_exception_item *ex) +static void __dev_exception_rm(struct list_head *exceptions, + struct dev_exception_item *ex) { struct dev_exception_item *walk, *tmp; @@ -127,11 +141,18 @@ static void dev_exception_rm(struct list } } +static void dev_exception_rm(struct dev_cgroup *devcgroup, +struct dev_exception_item *ex) +{ + __dev_exception_rm(>local.exceptions, ex); + __dev_exception_rm(>exceptions, ex); +} + /* * called under devcgroup_mutex */ -static int dev_exception_add(struct list_head *exceptions, -struct dev_exception_item *ex) +static int __dev_exception_add(struct list_head *exceptions, + struct dev_exception_item *ex) { struct dev_exception_item *excopy, *walk; @@ -159,6 +180,28 @@ static int dev_exception_add(struct list return 0; } +static int dev_exception_add(struct dev_cgroup *devcgroup, +struct dev_exception_item *ex) +{ + int rc; + + lockdep_assert_held(_mutex); + + /* +* we add to the local list so we can preserve local preferences if +* the parent propagates down new rules +*/ + rc = __dev_exception_add(>local.exceptions, ex); + if (rc) + return rc; + + rc = __dev_exception_add(>exceptions, ex); + if (rc) + __dev_exception_rm(>local.exceptions, ex); + + return rc; +} + static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) { struct dev_exception_item *ex, *tmp; @@ -167,6 +210,11 @@ static void __dev_exception_clean(struct list_del_rcu(>list); kfree_rcu(ex, rcu); } + list_for_each_entry_safe(ex, tmp, _cgroup->local.exceptions, +list) { + list_del_rcu(>list); + kfree_rcu(ex, rcu); + } } /** @@ -195,6 +243,8 @@ static struct cgroup_subsys_state *devcg if (!dev_cgroup) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(_cgroup->exceptions); + INIT_LIST_HEAD(_cgroup->local.exceptions); + dev_cgroup->local.behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup->parent; if (parent_cgroup == NULL) @@ -413,18 +463,19 @@ memset(, 0, sizeof(ex)); if (!may_allow_all(parent)) return -EPERM; dev_exception_clean(devcgroup); + if (parent) + rc = dev_exceptions_copy(>exceptions, +>exceptions); devcgroup->behavior = DEVCG_DEFAULT_ALLOW; - if (!parent) - break; + devcgroup->local.behavior = DEVCG_DEFAULT_ALLOW; - rc = dev_exceptions_copy(>exceptions, ->exceptions); if (rc) return rc; break; case DEVCG_DENY:
[PATCH v4 7/9] devcg: split single exception copy from dev_exceptions_copy()
This patch is in preparation for hierarchy support This patch doesn't introduce any functional changes. Acked-by: Tejun Heo Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 18 ++ 1 file changed, 14 insertions(+), 4 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:16.076677425 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:16.374681863 -0500 @@ -89,20 +89,30 @@ static int devcgroup_can_attach(struct c return 0; } +static int dev_exception_copy(struct list_head *dest, + struct dev_exception_item *ex) +{ + struct dev_exception_item *new; + + new = kmemdup(ex, sizeof(*ex), GFP_KERNEL); + if (!new) + return -ENOMEM; + list_add_tail(>list, dest); + return 0; +} + /* * called under devcgroup_mutex */ static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig) { - struct dev_exception_item *ex, *tmp, *new; + struct dev_exception_item *ex, *tmp; lockdep_assert_held(_mutex); list_for_each_entry(ex, orig, list) { - new = kmemdup(ex, sizeof(*ex), GFP_KERNEL); - if (!new) + if (dev_exception_copy(dest, ex)) goto free_and_exit; - list_add_tail(>list, dest); } return 0; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 1/9] device_cgroup: prepare exception list handling functions for two lists
In the following patches, device_cgroup structure will have two sets of behavior and exceptions list (actual one, another with the local settings) so rework the functions to use exception list, not a device_cgroup. Acked-by: Tejun Heo Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:48:50.603298122 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:14.739657516 -0500 @@ -104,7 +104,7 @@ free_and_exit: /* * called under devcgroup_mutex */ -static int dev_exception_add(struct dev_cgroup *dev_cgroup, +static int dev_exception_add(struct list_head *exceptions, struct dev_exception_item *ex) { struct dev_exception_item *excopy, *walk; @@ -115,7 +115,7 @@ static int dev_exception_add(struct dev_ if (!excopy) return -ENOMEM; - list_for_each_entry(walk, _cgroup->exceptions, list) { + list_for_each_entry(walk, exceptions, list) { if (walk->type != ex->type) continue; if (walk->major != ex->major) @@ -129,21 +129,21 @@ static int dev_exception_add(struct dev_ } if (excopy != NULL) - list_add_tail_rcu(>list, _cgroup->exceptions); + list_add_tail_rcu(>list, exceptions); return 0; } /* * called under devcgroup_mutex */ -static void dev_exception_rm(struct dev_cgroup *dev_cgroup, +static void dev_exception_rm(struct list_head *exceptions, struct dev_exception_item *ex) { struct dev_exception_item *walk, *tmp; lockdep_assert_held(_mutex); - list_for_each_entry_safe(walk, tmp, _cgroup->exceptions, list) { + list_for_each_entry_safe(walk, tmp, exceptions, list) { if (walk->type != ex->type) continue; if (walk->major != ex->major) @@ -514,10 +514,10 @@ case '\0': * don't want to break compatibility */ if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) { - dev_exception_rm(devcgroup, ); + dev_exception_rm(>exceptions, ); return 0; } - return dev_exception_add(devcgroup, ); + return dev_exception_add(>exceptions, ); case DEVCG_DENY: /* * If the default policy is to deny by default, try to remove @@ -525,10 +525,10 @@ return 0; * don't want to break compatibility */ if (devcgroup->behavior == DEVCG_DEFAULT_DENY) { - dev_exception_rm(devcgroup, ); + dev_exception_rm(>exceptions, ); return 0; } - return dev_exception_add(devcgroup, ); + return dev_exception_add(>exceptions, ); default: return -EINVAL; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 5/9] devcg: prepare may_access() for hierarchy support
Currently may_access() is only able to verify if an exception is valid for the current cgroup, which has the same behavior. With hierarchy, it'll be also used to verify if a cgroup local exception is valid towards its cgroup parent, which might have different behavior. v2: - updated patch description - rebased on top of a new patch to expand the may_access() logic to make it more clear - fixed argument description order in may_access() Acked-by: Tejun Heo Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 38 -- 1 file changed, 24 insertions(+), 14 deletions(-) --- github.orig/security/device_cgroup.c2013-01-30 08:58:02.0 -0500 +++ github/security/device_cgroup.c 2013-01-30 09:00:09.435351867 -0500 @@ -354,9 +354,11 @@return 0; * verify if a certain access is allowed. * @dev_cgroup: dev cgroup to be tested against * @refex: new exception + * @behavior: behavior of the exception */ static bool may_access(struct dev_cgroup *dev_cgroup, - struct dev_exception_item *refex) + struct dev_exception_item *refex, + enum devcg_behavior behavior) { struct dev_exception_item *ex; bool match = false; @@ -380,19 +382,27 @@ if (ex->minor != ~0 && ex->minor != re break; } - /* -* In two cases we'll consider this new exception valid: -* - the dev cgroup has its default policy to deny + exception list: -* the new exception *should* match the exceptions -* - the dev cgroup has its default policy to allow + exception list: -* the new exception should *not* match any of the exceptions -*/ - if (dev_cgroup->behavior == DEVCG_DEFAULT_DENY) { - if (match) + if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) { + if (behavior == DEVCG_DEFAULT_ALLOW) { + /* the exception will deny access to certain devices */ return true; + } else { + /* the exception will allow access to certain devices */ + if (match) + /* +* a new exception allowing access shouldn't +* match an parent's exception +*/ + return false; + return true; + } } else { - if (!match) + /* only behavior == DEVCG_DEFAULT_DENY allowed here */ + if (match) + /* parent has an exception that matches the proposed */ return true; + else + return false; } return false; } @@ -411,7 +421,7 @@ static int parent_has_perm(struct dev_cg if (!pcg) return 1; parent = cgroup_to_devcgroup(pcg); - return may_access(parent, ex); + return may_access(parent, ex, childcg->behavior); } /** @@ -445,7 +455,7 @@ static int devcgroup_update_access(struc { const char *b; char temp[12]; /* 11 + 1 characters needed for a u32 */ - int count, rc; + int count, rc = 0; struct dev_exception_item ex; struct cgroup *p = devcgroup->css.cgroup; struct dev_cgroup *parent = NULL; @@ -663,7 +673,7 @@ memset(, 0, sizeof(ex)); rcu_read_lock(); dev_cgroup = task_devcgroup(current); - rc = may_access(dev_cgroup, ); + rc = may_access(dev_cgroup, , dev_cgroup->behavior); rcu_read_unlock(); if (!rc) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 0/9] devcg: introduce proper hierarchy support
This patchset implements device cgroup hierarchy. Behaviors and exceptions will be propagated down in the tree and local preferences will be re-evaluated everytime a change in its parent occours, reapplying them if it's still possible. git://github.com/aristeu/linux-2.6.git branch: devcg_hierarchy_review v4: - minor fixes pointed by Tejun v3: - update documentation - move css_online/css_offline changes to a new patch - use cgroup_for_each_descendant_pre() instead of own descendant walk - move exception_copy rework to a separared patch - move exception_clean rework to a separated patch - new patch to just move dev_exception_rm() before dev_exception_add() as requested by Tejun. - updated patch description for may_access() changes - new patch to expand the may_access() logic before changing it - fixed argument description order in may_access() v2: - rebase on top "device_cgroup: don't grab mutex in rcu callback" - in case parent changes behavior or exceptions and the local exceptions won't apply anymore, remove them instead of keeping them around. Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski -- Aristeu -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 4/9] devcg: expand may_access() logic
In order to make the next patch more clear, expand may_access() logic. v2: may_access() returns bool now Acked-by: Tejun Heo Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 21 - 1 file changed, 12 insertions(+), 9 deletions(-) --- github.orig/security/device_cgroup.c2013-01-30 08:56:29.532063723 -0500 +++ github/security/device_cgroup.c 2013-01-30 08:58:02.934460404 -0500 @@ -355,8 +355,8 @@ return 0; * @dev_cgroup: dev cgroup to be tested against * @refex: new exception */ -static int may_access(struct dev_cgroup *dev_cgroup, - struct dev_exception_item *refex) +static bool may_access(struct dev_cgroup *dev_cgroup, + struct dev_exception_item *refex) { struct dev_exception_item *ex; bool match = false; @@ -382,16 +382,19 @@ if (ex->minor != ~0 && ex->minor != re /* * In two cases we'll consider this new exception valid: -* - the dev cgroup has its default policy to allow + exception list: -* the new exception should *not* match any of the exceptions -* (behavior == DEVCG_DEFAULT_ALLOW, !match) * - the dev cgroup has its default policy to deny + exception list: * the new exception *should* match the exceptions -* (behavior == DEVCG_DEFAULT_DENY, match) +* - the dev cgroup has its default policy to allow + exception list: +* the new exception should *not* match any of the exceptions */ - if ((dev_cgroup->behavior == DEVCG_DEFAULT_DENY) == match) - return 1; - return 0; + if (dev_cgroup->behavior == DEVCG_DEFAULT_DENY) { + if (match) + return true; + } else { + if (!match) + return true; + } + return false; } /* -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 9/9] devcg: propagate local changes down the hierarchy
This patch makes all changes propagate down in hierarchy respecting when possible local configurations. Behavior changes will clean up exceptions in all the children except when the parent changes the behavior from allow to deny and the child's behavior was already deny, in which case the local exceptions will be reused. The inverse is not possible: you can't have a parent with behavior deny and a child with behavior accept. New exceptions allowing additional access to devices won't be propagated, but it'll be possible to add an exception to access all of part of the newly allowed device(s). New exceptions disallowing access to devices will be propagated down and the local group's exceptions will be revalidated for the new situation. Example: A / \ B groupbehavior exceptions Aallow "b 8:* rwm", "c 116:1 rw" Bdeny "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm" If a new exception is added to group A: # echo "c 116:* r" > A/devices.deny it'll propagate down and after revalidating B's local exceptions, the exception "c 116:2 rwm" will be removed. In case parent behavior or exceptions change and local settings are not allowed anymore, they'll be deleted. v4: - separated function to walk the tree and collect valid propagation targets v3: - update documentation - move css_online/css_offline changes to a new patch - use cgroup_for_each_descendant_pre() instead of own descendant walk - move exception_copy rework to a separared patch - move exception_clean rework to a separated patch v2: - instead of keeping the local settings that won't apply anymore, remove them Acked-by: Tejun Heo Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- Documentation/cgroups/devices.txt | 66 + security/device_cgroup.c | 186 -- 2 files changed, 246 insertions(+), 6 deletions(-) --- github.orig/security/device_cgroup.c2013-01-30 10:03:16.943873992 -0500 +++ github/security/device_cgroup.c 2013-01-30 10:44:23.693586209 -0500 @@ -60,6 +60,9 @@ struct dev_cgroup { struct list_head exceptions; enum devcg_behavior behavior; } local; + + /* temporary list for pending propagation operations */ + struct list_head propagate_pending; }; static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) @@ -241,6 +244,11 @@ static void dev_exception_clean_all(stru __dev_exception_clean_all(dev_cgroup); } +static inline bool is_devcg_online(const struct dev_cgroup *devcg) +{ + return (devcg->behavior != DEVCG_DEFAULT_NONE); +} + /** * devcgroup_online - initializes devcgroup's behavior and exceptions based on * parent's @@ -292,6 +300,7 @@ static struct cgroup_subsys_state *devcg return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(_cgroup->exceptions); INIT_LIST_HEAD(_cgroup->local.exceptions); + INIT_LIST_HEAD(_cgroup->propagate_pending); dev_cgroup->local.behavior = DEVCG_DEFAULT_NONE; dev_cgroup->behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup->parent; @@ -471,6 +480,155 @@ static inline int may_allow_all(struct d return parent->behavior == DEVCG_DEFAULT_ALLOW; } +/** + * revalidate_exceptions - walks through the exception list and revalidates + * the exceptions based on parents' behavior and + * exceptions. Called with devcgroup_mutex held. + * @devcg: cgroup which exceptions will be checked + * + * returns: 0 in success, -ENOMEM in case of out of memory + * + * This is one of the two key functions for hierarchy implementation. + * This function is responsible for re-evaluating all the cgroup's locally + * set exceptions due to a parent's behavior or exception change. + * Refer to Documentation/cgroups/devices.txt for more details. + */ +static int revalidate_exceptions(struct dev_cgroup *devcg) +{ + struct dev_exception_item *ex; + struct list_head *this, *tmp; + + list_for_each_safe(this, tmp, >local.exceptions) { + ex = container_of(this, struct dev_exception_item, list); + if (parent_has_perm(devcg, ex)) { + if (dev_exception_copy(>exceptions, ex)) + goto error; + } else + __dev_exception_rm(>local.exceptions, ex); + } + return 0; + +error: + dev_exception_clean(>exceptions); + return -ENOMEM; +} + +/** + * get_online_devcg - walks the cgroup tree and fills a list with the online + * groups + * @root: cgroup used as starting point + * @online: list that will be filled with online groups + * + * Must be called with devcgroup_mutex held. Grabs RCU lock. + * Because devcgroup_mutex is held, no devcg will become online or offline + * during the
[PATCH v4 6/9] devcg: use css_online and css_offline
Allocate resources and change behavior only when online. This is needed in order to determine if a node is suitable for hierarchy propagation or if it's being removed. Locking: Both functions take devcgroup_mutex to make changes to device_cgroup structure. Hierarchy propagation will also take devcgroup_mutex before walking the tree while walking the tree itself is protected by rcu lock. Acked-by: Tejun Heo Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 59 +-- 1 file changed, 42 insertions(+), 17 deletions(-) --- github.orig/security/device_cgroup.c2013-01-30 09:00:09.435351867 -0500 +++ github/security/device_cgroup.c 2013-01-30 09:09:12.572464122 -0500 @@ -230,14 +230,51 @@ static void dev_exception_clean(struct d __dev_exception_clean(dev_cgroup); } +/** + * devcgroup_online - initializes devcgroup's behavior and exceptions based on + * parent's + * @cgroup: cgroup getting online + * returns 0 in case of success, error code otherwise + */ +static int devcgroup_online(struct cgroup *cgroup) +{ + struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL; + int ret = 0; + + mutex_lock(_mutex); + dev_cgroup = cgroup_to_devcgroup(cgroup); + if (cgroup->parent) + parent_dev_cgroup = cgroup_to_devcgroup(cgroup->parent); + + if (parent_dev_cgroup == NULL) + dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW; + else { + ret = dev_exceptions_copy(_cgroup->exceptions, + _dev_cgroup->exceptions); + if (!ret) + dev_cgroup->behavior = parent_dev_cgroup->behavior; + } + mutex_unlock(_mutex); + + return ret; +} + +static void devcgroup_offline(struct cgroup *cgroup) +{ + struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup); + + mutex_lock(_mutex); + dev_cgroup->behavior = DEVCG_DEFAULT_NONE; + mutex_unlock(_mutex); +} + /* * called from kernel/cgroup.c with cgroup_lock() held. */ static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup) { - struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; + struct dev_cgroup *dev_cgroup; struct cgroup *parent_cgroup; - int ret; dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); if (!dev_cgroup) @@ -245,23 +282,9 @@ static struct cgroup_subsys_state *devcg INIT_LIST_HEAD(_cgroup->exceptions); INIT_LIST_HEAD(_cgroup->local.exceptions); dev_cgroup->local.behavior = DEVCG_DEFAULT_NONE; + dev_cgroup->behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup->parent; - if (parent_cgroup == NULL) - dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW; - else { - parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); - mutex_lock(_mutex); - ret = dev_exceptions_copy(_cgroup->exceptions, - _dev_cgroup->exceptions); - dev_cgroup->behavior = parent_dev_cgroup->behavior; - mutex_unlock(_mutex); - if (ret) { - kfree(dev_cgroup); - return ERR_PTR(ret); - } - } - return _cgroup->css; } @@ -635,6 +658,8 @@ struct cgroup_subsys devices_subsys = { .can_attach = devcgroup_can_attach, .css_alloc = devcgroup_css_alloc, .css_free = devcgroup_css_free, + .css_online = devcgroup_online, + .css_offline = devcgroup_offline, .subsys_id = devices_subsys_id, .base_cftypes = dev_cgroup_files, -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 6/9] devcg: use css_online and css_offline
Allocate resources and change behavior only when online. This is needed in order to determine if a node is suitable for hierarchy propagation or if it's being removed. Locking: Both functions take devcgroup_mutex to make changes to device_cgroup structure. Hierarchy propagation will also take devcgroup_mutex before walking the tree while walking the tree itself is protected by rcu lock. Acked-by: Tejun Heo t...@kernel.org Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 59 +-- 1 file changed, 42 insertions(+), 17 deletions(-) --- github.orig/security/device_cgroup.c2013-01-30 09:00:09.435351867 -0500 +++ github/security/device_cgroup.c 2013-01-30 09:09:12.572464122 -0500 @@ -230,14 +230,51 @@ static void dev_exception_clean(struct d __dev_exception_clean(dev_cgroup); } +/** + * devcgroup_online - initializes devcgroup's behavior and exceptions based on + * parent's + * @cgroup: cgroup getting online + * returns 0 in case of success, error code otherwise + */ +static int devcgroup_online(struct cgroup *cgroup) +{ + struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL; + int ret = 0; + + mutex_lock(devcgroup_mutex); + dev_cgroup = cgroup_to_devcgroup(cgroup); + if (cgroup-parent) + parent_dev_cgroup = cgroup_to_devcgroup(cgroup-parent); + + if (parent_dev_cgroup == NULL) + dev_cgroup-behavior = DEVCG_DEFAULT_ALLOW; + else { + ret = dev_exceptions_copy(dev_cgroup-exceptions, + parent_dev_cgroup-exceptions); + if (!ret) + dev_cgroup-behavior = parent_dev_cgroup-behavior; + } + mutex_unlock(devcgroup_mutex); + + return ret; +} + +static void devcgroup_offline(struct cgroup *cgroup) +{ + struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup); + + mutex_lock(devcgroup_mutex); + dev_cgroup-behavior = DEVCG_DEFAULT_NONE; + mutex_unlock(devcgroup_mutex); +} + /* * called from kernel/cgroup.c with cgroup_lock() held. */ static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup) { - struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; + struct dev_cgroup *dev_cgroup; struct cgroup *parent_cgroup; - int ret; dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); if (!dev_cgroup) @@ -245,23 +282,9 @@ static struct cgroup_subsys_state *devcg INIT_LIST_HEAD(dev_cgroup-exceptions); INIT_LIST_HEAD(dev_cgroup-local.exceptions); dev_cgroup-local.behavior = DEVCG_DEFAULT_NONE; + dev_cgroup-behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup-parent; - if (parent_cgroup == NULL) - dev_cgroup-behavior = DEVCG_DEFAULT_ALLOW; - else { - parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); - mutex_lock(devcgroup_mutex); - ret = dev_exceptions_copy(dev_cgroup-exceptions, - parent_dev_cgroup-exceptions); - dev_cgroup-behavior = parent_dev_cgroup-behavior; - mutex_unlock(devcgroup_mutex); - if (ret) { - kfree(dev_cgroup); - return ERR_PTR(ret); - } - } - return dev_cgroup-css; } @@ -635,6 +658,8 @@ struct cgroup_subsys devices_subsys = { .can_attach = devcgroup_can_attach, .css_alloc = devcgroup_css_alloc, .css_free = devcgroup_css_free, + .css_online = devcgroup_online, + .css_offline = devcgroup_offline, .subsys_id = devices_subsys_id, .base_cftypes = dev_cgroup_files, -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 9/9] devcg: propagate local changes down the hierarchy
This patch makes all changes propagate down in hierarchy respecting when possible local configurations. Behavior changes will clean up exceptions in all the children except when the parent changes the behavior from allow to deny and the child's behavior was already deny, in which case the local exceptions will be reused. The inverse is not possible: you can't have a parent with behavior deny and a child with behavior accept. New exceptions allowing additional access to devices won't be propagated, but it'll be possible to add an exception to access all of part of the newly allowed device(s). New exceptions disallowing access to devices will be propagated down and the local group's exceptions will be revalidated for the new situation. Example: A / \ B groupbehavior exceptions Aallow b 8:* rwm, c 116:1 rw Bdeny c 1:3 rwm, c 116:2 rwm, b 3:* rwm If a new exception is added to group A: # echo c 116:* r A/devices.deny it'll propagate down and after revalidating B's local exceptions, the exception c 116:2 rwm will be removed. In case parent behavior or exceptions change and local settings are not allowed anymore, they'll be deleted. v4: - separated function to walk the tree and collect valid propagation targets v3: - update documentation - move css_online/css_offline changes to a new patch - use cgroup_for_each_descendant_pre() instead of own descendant walk - move exception_copy rework to a separared patch - move exception_clean rework to a separated patch v2: - instead of keeping the local settings that won't apply anymore, remove them Acked-by: Tejun Heo t...@kernel.org Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- Documentation/cgroups/devices.txt | 66 + security/device_cgroup.c | 186 -- 2 files changed, 246 insertions(+), 6 deletions(-) --- github.orig/security/device_cgroup.c2013-01-30 10:03:16.943873992 -0500 +++ github/security/device_cgroup.c 2013-01-30 10:44:23.693586209 -0500 @@ -60,6 +60,9 @@ struct dev_cgroup { struct list_head exceptions; enum devcg_behavior behavior; } local; + + /* temporary list for pending propagation operations */ + struct list_head propagate_pending; }; static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) @@ -241,6 +244,11 @@ static void dev_exception_clean_all(stru __dev_exception_clean_all(dev_cgroup); } +static inline bool is_devcg_online(const struct dev_cgroup *devcg) +{ + return (devcg-behavior != DEVCG_DEFAULT_NONE); +} + /** * devcgroup_online - initializes devcgroup's behavior and exceptions based on * parent's @@ -292,6 +300,7 @@ static struct cgroup_subsys_state *devcg return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(dev_cgroup-exceptions); INIT_LIST_HEAD(dev_cgroup-local.exceptions); + INIT_LIST_HEAD(dev_cgroup-propagate_pending); dev_cgroup-local.behavior = DEVCG_DEFAULT_NONE; dev_cgroup-behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup-parent; @@ -471,6 +480,155 @@ static inline int may_allow_all(struct d return parent-behavior == DEVCG_DEFAULT_ALLOW; } +/** + * revalidate_exceptions - walks through the exception list and revalidates + * the exceptions based on parents' behavior and + * exceptions. Called with devcgroup_mutex held. + * @devcg: cgroup which exceptions will be checked + * + * returns: 0 in success, -ENOMEM in case of out of memory + * + * This is one of the two key functions for hierarchy implementation. + * This function is responsible for re-evaluating all the cgroup's locally + * set exceptions due to a parent's behavior or exception change. + * Refer to Documentation/cgroups/devices.txt for more details. + */ +static int revalidate_exceptions(struct dev_cgroup *devcg) +{ + struct dev_exception_item *ex; + struct list_head *this, *tmp; + + list_for_each_safe(this, tmp, devcg-local.exceptions) { + ex = container_of(this, struct dev_exception_item, list); + if (parent_has_perm(devcg, ex)) { + if (dev_exception_copy(devcg-exceptions, ex)) + goto error; + } else + __dev_exception_rm(devcg-local.exceptions, ex); + } + return 0; + +error: + dev_exception_clean(devcg-exceptions); + return -ENOMEM; +} + +/** + * get_online_devcg - walks the cgroup tree and fills a list with the online + * groups + * @root: cgroup used as starting point + * @online: list that will be filled with online groups + * + * Must be called with devcgroup_mutex held. Grabs RCU lock. + * Because
[PATCH v4 0/9] devcg: introduce proper hierarchy support
This patchset implements device cgroup hierarchy. Behaviors and exceptions will be propagated down in the tree and local preferences will be re-evaluated everytime a change in its parent occours, reapplying them if it's still possible. git://github.com/aristeu/linux-2.6.git branch: devcg_hierarchy_review v4: - minor fixes pointed by Tejun v3: - update documentation - move css_online/css_offline changes to a new patch - use cgroup_for_each_descendant_pre() instead of own descendant walk - move exception_copy rework to a separared patch - move exception_clean rework to a separated patch - new patch to just move dev_exception_rm() before dev_exception_add() as requested by Tejun. - updated patch description for may_access() changes - new patch to expand the may_access() logic before changing it - fixed argument description order in may_access() v2: - rebase on top device_cgroup: don't grab mutex in rcu callback - in case parent changes behavior or exceptions and the local exceptions won't apply anymore, remove them instead of keeping them around. Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com -- Aristeu -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 4/9] devcg: expand may_access() logic
In order to make the next patch more clear, expand may_access() logic. v2: may_access() returns bool now Acked-by: Tejun Heo t...@kernel.org Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 21 - 1 file changed, 12 insertions(+), 9 deletions(-) --- github.orig/security/device_cgroup.c2013-01-30 08:56:29.532063723 -0500 +++ github/security/device_cgroup.c 2013-01-30 08:58:02.934460404 -0500 @@ -355,8 +355,8 @@ return 0; * @dev_cgroup: dev cgroup to be tested against * @refex: new exception */ -static int may_access(struct dev_cgroup *dev_cgroup, - struct dev_exception_item *refex) +static bool may_access(struct dev_cgroup *dev_cgroup, + struct dev_exception_item *refex) { struct dev_exception_item *ex; bool match = false; @@ -382,16 +382,19 @@ if (ex-minor != ~0 ex-minor != re /* * In two cases we'll consider this new exception valid: -* - the dev cgroup has its default policy to allow + exception list: -* the new exception should *not* match any of the exceptions -* (behavior == DEVCG_DEFAULT_ALLOW, !match) * - the dev cgroup has its default policy to deny + exception list: * the new exception *should* match the exceptions -* (behavior == DEVCG_DEFAULT_DENY, match) +* - the dev cgroup has its default policy to allow + exception list: +* the new exception should *not* match any of the exceptions */ - if ((dev_cgroup-behavior == DEVCG_DEFAULT_DENY) == match) - return 1; - return 0; + if (dev_cgroup-behavior == DEVCG_DEFAULT_DENY) { + if (match) + return true; + } else { + if (!match) + return true; + } + return false; } /* -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 5/9] devcg: prepare may_access() for hierarchy support
Currently may_access() is only able to verify if an exception is valid for the current cgroup, which has the same behavior. With hierarchy, it'll be also used to verify if a cgroup local exception is valid towards its cgroup parent, which might have different behavior. v2: - updated patch description - rebased on top of a new patch to expand the may_access() logic to make it more clear - fixed argument description order in may_access() Acked-by: Tejun Heo t...@kernel.org Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 38 -- 1 file changed, 24 insertions(+), 14 deletions(-) --- github.orig/security/device_cgroup.c2013-01-30 08:58:02.0 -0500 +++ github/security/device_cgroup.c 2013-01-30 09:00:09.435351867 -0500 @@ -354,9 +354,11 @@return 0; * verify if a certain access is allowed. * @dev_cgroup: dev cgroup to be tested against * @refex: new exception + * @behavior: behavior of the exception */ static bool may_access(struct dev_cgroup *dev_cgroup, - struct dev_exception_item *refex) + struct dev_exception_item *refex, + enum devcg_behavior behavior) { struct dev_exception_item *ex; bool match = false; @@ -380,19 +382,27 @@ if (ex-minor != ~0 ex-minor != re break; } - /* -* In two cases we'll consider this new exception valid: -* - the dev cgroup has its default policy to deny + exception list: -* the new exception *should* match the exceptions -* - the dev cgroup has its default policy to allow + exception list: -* the new exception should *not* match any of the exceptions -*/ - if (dev_cgroup-behavior == DEVCG_DEFAULT_DENY) { - if (match) + if (dev_cgroup-behavior == DEVCG_DEFAULT_ALLOW) { + if (behavior == DEVCG_DEFAULT_ALLOW) { + /* the exception will deny access to certain devices */ return true; + } else { + /* the exception will allow access to certain devices */ + if (match) + /* +* a new exception allowing access shouldn't +* match an parent's exception +*/ + return false; + return true; + } } else { - if (!match) + /* only behavior == DEVCG_DEFAULT_DENY allowed here */ + if (match) + /* parent has an exception that matches the proposed */ return true; + else + return false; } return false; } @@ -411,7 +421,7 @@ static int parent_has_perm(struct dev_cg if (!pcg) return 1; parent = cgroup_to_devcgroup(pcg); - return may_access(parent, ex); + return may_access(parent, ex, childcg-behavior); } /** @@ -445,7 +455,7 @@ static int devcgroup_update_access(struc { const char *b; char temp[12]; /* 11 + 1 characters needed for a u32 */ - int count, rc; + int count, rc = 0; struct dev_exception_item ex; struct cgroup *p = devcgroup-css.cgroup; struct dev_cgroup *parent = NULL; @@ -663,7 +673,7 @@ memset(ex, 0, sizeof(ex)); rcu_read_lock(); dev_cgroup = task_devcgroup(current); - rc = may_access(dev_cgroup, ex); + rc = may_access(dev_cgroup, ex, dev_cgroup-behavior); rcu_read_unlock(); if (!rc) -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 7/9] devcg: split single exception copy from dev_exceptions_copy()
This patch is in preparation for hierarchy support This patch doesn't introduce any functional changes. Acked-by: Tejun Heo t...@kernel.org Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 18 ++ 1 file changed, 14 insertions(+), 4 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:16.076677425 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:16.374681863 -0500 @@ -89,20 +89,30 @@ static int devcgroup_can_attach(struct c return 0; } +static int dev_exception_copy(struct list_head *dest, + struct dev_exception_item *ex) +{ + struct dev_exception_item *new; + + new = kmemdup(ex, sizeof(*ex), GFP_KERNEL); + if (!new) + return -ENOMEM; + list_add_tail(new-list, dest); + return 0; +} + /* * called under devcgroup_mutex */ static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig) { - struct dev_exception_item *ex, *tmp, *new; + struct dev_exception_item *ex, *tmp; lockdep_assert_held(devcgroup_mutex); list_for_each_entry(ex, orig, list) { - new = kmemdup(ex, sizeof(*ex), GFP_KERNEL); - if (!new) + if (dev_exception_copy(dest, ex)) goto free_and_exit; - list_add_tail(new-list, dest); } return 0; -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 1/9] device_cgroup: prepare exception list handling functions for two lists
In the following patches, device_cgroup structure will have two sets of behavior and exceptions list (actual one, another with the local settings) so rework the functions to use exception list, not a device_cgroup. Acked-by: Tejun Heo t...@kernel.org Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:48:50.603298122 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:14.739657516 -0500 @@ -104,7 +104,7 @@ free_and_exit: /* * called under devcgroup_mutex */ -static int dev_exception_add(struct dev_cgroup *dev_cgroup, +static int dev_exception_add(struct list_head *exceptions, struct dev_exception_item *ex) { struct dev_exception_item *excopy, *walk; @@ -115,7 +115,7 @@ static int dev_exception_add(struct dev_ if (!excopy) return -ENOMEM; - list_for_each_entry(walk, dev_cgroup-exceptions, list) { + list_for_each_entry(walk, exceptions, list) { if (walk-type != ex-type) continue; if (walk-major != ex-major) @@ -129,21 +129,21 @@ static int dev_exception_add(struct dev_ } if (excopy != NULL) - list_add_tail_rcu(excopy-list, dev_cgroup-exceptions); + list_add_tail_rcu(excopy-list, exceptions); return 0; } /* * called under devcgroup_mutex */ -static void dev_exception_rm(struct dev_cgroup *dev_cgroup, +static void dev_exception_rm(struct list_head *exceptions, struct dev_exception_item *ex) { struct dev_exception_item *walk, *tmp; lockdep_assert_held(devcgroup_mutex); - list_for_each_entry_safe(walk, tmp, dev_cgroup-exceptions, list) { + list_for_each_entry_safe(walk, tmp, exceptions, list) { if (walk-type != ex-type) continue; if (walk-major != ex-major) @@ -514,10 +514,10 @@ case '\0': * don't want to break compatibility */ if (devcgroup-behavior == DEVCG_DEFAULT_ALLOW) { - dev_exception_rm(devcgroup, ex); + dev_exception_rm(devcgroup-exceptions, ex); return 0; } - return dev_exception_add(devcgroup, ex); + return dev_exception_add(devcgroup-exceptions, ex); case DEVCG_DENY: /* * If the default policy is to deny by default, try to remove @@ -525,10 +525,10 @@ return 0; * don't want to break compatibility */ if (devcgroup-behavior == DEVCG_DEFAULT_DENY) { - dev_exception_rm(devcgroup, ex); + dev_exception_rm(devcgroup-exceptions, ex); return 0; } - return dev_exception_add(devcgroup, ex); + return dev_exception_add(devcgroup-exceptions, ex); default: return -EINVAL; } -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 8/9] devcg: refactor dev_exception_clean()
This patch is in preparation for hierarchy support. This patch doesn't introduce any functional changes. Acked-by: Tejun Heo t...@kernel.org Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 34 -- 1 file changed, 16 insertions(+), 18 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:16.374681863 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:16.653686016 -0500 @@ -212,32 +212,33 @@ static int dev_exception_add(struct dev_ return rc; } -static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) +static void dev_exception_clean(struct list_head *exceptions) { struct dev_exception_item *ex, *tmp; - list_for_each_entry_safe(ex, tmp, dev_cgroup-exceptions, list) { - list_del_rcu(ex-list); - kfree_rcu(ex, rcu); - } - list_for_each_entry_safe(ex, tmp, dev_cgroup-local.exceptions, -list) { + list_for_each_entry_safe(ex, tmp, exceptions, list) { list_del_rcu(ex-list); kfree_rcu(ex, rcu); } } +static void __dev_exception_clean_all(struct dev_cgroup *dev_cgroup) +{ + dev_exception_clean(dev_cgroup-exceptions); + dev_exception_clean(dev_cgroup-local.exceptions); +} + /** - * dev_exception_clean - frees all entries of the exception list + * dev_exception_clean_all - frees all entries of the exception list * @dev_cgroup: dev_cgroup with the exception list to be cleaned * * called under devcgroup_mutex */ -static void dev_exception_clean(struct dev_cgroup *dev_cgroup) +static void dev_exception_clean_all(struct dev_cgroup *dev_cgroup) { lockdep_assert_held(devcgroup_mutex); - __dev_exception_clean(dev_cgroup); + __dev_exception_clean_all(dev_cgroup); } /** @@ -303,7 +304,7 @@ static void devcgroup_css_free(struct cg struct dev_cgroup *dev_cgroup; dev_cgroup = cgroup_to_devcgroup(cgroup); - __dev_exception_clean(dev_cgroup); + __dev_exception_clean_all(dev_cgroup); kfree(dev_cgroup); } @@ -508,25 +509,22 @@ memset(ex, 0, sizeof(ex)); case DEVCG_ALLOW: if (!may_allow_all(parent)) return -EPERM; - dev_exception_clean(devcgroup); + dev_exception_clean_all(devcgroup); if (parent) rc = dev_exceptions_copy(devcgroup-exceptions, parent-exceptions); devcgroup-behavior = DEVCG_DEFAULT_ALLOW; devcgroup-local.behavior = DEVCG_DEFAULT_ALLOW; - - if (rc) - return rc; break; case DEVCG_DENY: - dev_exception_clean(devcgroup); + dev_exception_clean_all(devcgroup); devcgroup-behavior = DEVCG_DEFAULT_DENY; devcgroup-local.behavior = DEVCG_DEFAULT_DENY; break; default: - return -EINVAL; + rc = -EINVAL; } - return 0; + return rc; case 'b': ex.type = DEV_BLOCK; break; -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v4 3/9] device_cgroup: keep track of local group settings
In preparation for better hierarchy support, it's needed to retain the local settings in order to try to reapply them after a propagated change if they're still valid. v2: split this patch in two, one to just move dev_exception_rm() before dev_exception_add() while keeping functional changes in this patch as requested by Tejun. Acked-by: Tejun Heo t...@kernel.org Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 83 +-- 1 file changed, 67 insertions(+), 16 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:14.987661210 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:15.244665037 -0500 @@ -39,13 +39,27 @@ struct dev_exception_item { struct rcu_head rcu; }; +enum devcg_behavior { + DEVCG_DEFAULT_NONE, + DEVCG_DEFAULT_ALLOW, + DEVCG_DEFAULT_DENY, +}; + struct dev_cgroup { struct cgroup_subsys_state css; + + /* result of merging the parent's rules with local ones */ struct list_head exceptions; - enum { - DEVCG_DEFAULT_ALLOW, - DEVCG_DEFAULT_DENY, - } behavior; + enum devcg_behavior behavior; + + /* +* local set rules, saved so when a parent propagates new rules, the +* local preferences can be preserved +*/ + struct { + struct list_head exceptions; + enum devcg_behavior behavior; + } local; }; static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) @@ -104,8 +118,8 @@ free_and_exit: /* * called under devcgroup_mutex */ -static void dev_exception_rm(struct list_head *exceptions, -struct dev_exception_item *ex) +static void __dev_exception_rm(struct list_head *exceptions, + struct dev_exception_item *ex) { struct dev_exception_item *walk, *tmp; @@ -127,11 +141,18 @@ static void dev_exception_rm(struct list } } +static void dev_exception_rm(struct dev_cgroup *devcgroup, +struct dev_exception_item *ex) +{ + __dev_exception_rm(devcgroup-local.exceptions, ex); + __dev_exception_rm(devcgroup-exceptions, ex); +} + /* * called under devcgroup_mutex */ -static int dev_exception_add(struct list_head *exceptions, -struct dev_exception_item *ex) +static int __dev_exception_add(struct list_head *exceptions, + struct dev_exception_item *ex) { struct dev_exception_item *excopy, *walk; @@ -159,6 +180,28 @@ static int dev_exception_add(struct list return 0; } +static int dev_exception_add(struct dev_cgroup *devcgroup, +struct dev_exception_item *ex) +{ + int rc; + + lockdep_assert_held(devcgroup_mutex); + + /* +* we add to the local list so we can preserve local preferences if +* the parent propagates down new rules +*/ + rc = __dev_exception_add(devcgroup-local.exceptions, ex); + if (rc) + return rc; + + rc = __dev_exception_add(devcgroup-exceptions, ex); + if (rc) + __dev_exception_rm(devcgroup-local.exceptions, ex); + + return rc; +} + static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) { struct dev_exception_item *ex, *tmp; @@ -167,6 +210,11 @@ static void __dev_exception_clean(struct list_del_rcu(ex-list); kfree_rcu(ex, rcu); } + list_for_each_entry_safe(ex, tmp, dev_cgroup-local.exceptions, +list) { + list_del_rcu(ex-list); + kfree_rcu(ex, rcu); + } } /** @@ -195,6 +243,8 @@ static struct cgroup_subsys_state *devcg if (!dev_cgroup) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(dev_cgroup-exceptions); + INIT_LIST_HEAD(dev_cgroup-local.exceptions); + dev_cgroup-local.behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup-parent; if (parent_cgroup == NULL) @@ -413,18 +463,19 @@ memset(ex, 0, sizeof(ex)); if (!may_allow_all(parent)) return -EPERM; dev_exception_clean(devcgroup); + if (parent) + rc = dev_exceptions_copy(devcgroup-exceptions, +parent-exceptions); devcgroup-behavior = DEVCG_DEFAULT_ALLOW; - if (!parent) - break; + devcgroup-local.behavior = DEVCG_DEFAULT_ALLOW; - rc = dev_exceptions_copy(devcgroup-exceptions, -parent-exceptions);
[PATCH v4 2/9] devcg: reorder device exception functions
In preparation for the next patch, reorder dev_exception_add() and dev_exception_rm(). This patch doesn't introduce any functional changes. Acked-by: Tejun Heo t...@kernel.org Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 44 ++-- 1 file changed, 22 insertions(+), 22 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:14.739657516 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:14.987661210 -0500 @@ -104,18 +104,14 @@ free_and_exit: /* * called under devcgroup_mutex */ -static int dev_exception_add(struct list_head *exceptions, +static void dev_exception_rm(struct list_head *exceptions, struct dev_exception_item *ex) { - struct dev_exception_item *excopy, *walk; + struct dev_exception_item *walk, *tmp; lockdep_assert_held(devcgroup_mutex); - excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL); - if (!excopy) - return -ENOMEM; - - list_for_each_entry(walk, exceptions, list) { + list_for_each_entry_safe(walk, tmp, exceptions, list) { if (walk-type != ex-type) continue; if (walk-major != ex-major) @@ -123,27 +119,29 @@ static int dev_exception_add(struct list if (walk-minor != ex-minor) continue; - walk-access |= ex-access; - kfree(excopy); - excopy = NULL; + walk-access = ~ex-access; + if (!walk-access) { + list_del_rcu(walk-list); + kfree_rcu(walk, rcu); + } } - - if (excopy != NULL) - list_add_tail_rcu(excopy-list, exceptions); - return 0; } /* * called under devcgroup_mutex */ -static void dev_exception_rm(struct list_head *exceptions, +static int dev_exception_add(struct list_head *exceptions, struct dev_exception_item *ex) { - struct dev_exception_item *walk, *tmp; + struct dev_exception_item *excopy, *walk; lockdep_assert_held(devcgroup_mutex); - list_for_each_entry_safe(walk, tmp, exceptions, list) { + excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL); + if (!excopy) + return -ENOMEM; + + list_for_each_entry(walk, exceptions, list) { if (walk-type != ex-type) continue; if (walk-major != ex-major) @@ -151,12 +149,14 @@ static void dev_exception_rm(struct list if (walk-minor != ex-minor) continue; - walk-access = ~ex-access; - if (!walk-access) { - list_del_rcu(walk-list); - kfree_rcu(walk, rcu); - } + walk-access |= ex-access; + kfree(excopy); + excopy = NULL; } + + if (excopy != NULL) + list_add_tail_rcu(excopy-list, exceptions); + return 0; } static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 9/9] devcg: propagate local changes down the hierarchy
This patch makes all changes propagate down in hierarchy respecting when possible local configurations. Behavior changes will clean up exceptions in all the children except when the parent changes the behavior from allow to deny and the child's behavior was already deny, in which case the local exceptions will be reused. The inverse is not possible: you can't have a parent with behavior deny and a child with behavior accept. New exceptions allowing additional access to devices won't be propagated, but it'll be possible to add an exception to access all of part of the newly allowed device(s). New exceptions disallowing access to devices will be propagated down and the local group's exceptions will be revalidated for the new situation. Example: A / \ B groupbehavior exceptions Aallow "b 8:* rwm", "c 116:1 rw" Bdeny "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm" If a new exception is added to group A: # echo "c 116:* r" > A/devices.deny it'll propagate down and after revalidating B's local exceptions, the exception "c 116:2 rwm" will be removed. In case parent behavior or exceptions change and local settings are not allowed anymore, they'll be deleted. v3: - update documentation - move css_online/css_offline changes to a new patch - use cgroup_for_each_descendant_pre() instead of own descendant walk - move exception_copy rework to a separared patch - move exception_clean rework to a separated patch v2: - instead of keeping the local settings that won't apply anymore, remove them Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- Documentation/cgroups/devices.txt | 66 ++ security/device_cgroup.c | 170 -- 2 files changed, 230 insertions(+), 6 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:16.653686016 -0500 +++ github/security/device_cgroup.c 2013-01-29 13:58:07.537099962 -0500 @@ -60,6 +60,9 @@ struct dev_cgroup { struct list_head exceptions; enum devcg_behavior behavior; } local; + + /* temporary list for pending propagation operations */ + struct list_head propagate_pending; }; static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) @@ -241,6 +244,11 @@ static void dev_exception_clean_all(stru __dev_exception_clean_all(dev_cgroup); } +static inline bool is_devcg_online(const struct dev_cgroup *devcg) +{ + return (devcg->behavior != DEVCG_DEFAULT_NONE); +} + /** * devcgroup_online - initializes devcgroup's behavior and exceptions based on * parent's @@ -292,6 +300,7 @@ static struct cgroup_subsys_state *devcg return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(_cgroup->exceptions); INIT_LIST_HEAD(_cgroup->local.exceptions); + INIT_LIST_HEAD(_cgroup->propagate_pending); dev_cgroup->local.behavior = DEVCG_DEFAULT_NONE; dev_cgroup->behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup->parent; @@ -471,6 +480,139 @@ static inline int may_allow_all(struct d return parent->behavior == DEVCG_DEFAULT_ALLOW; } +/** + * revalidate_exceptions - walks through the exception list and revalidates + * the exceptions based on parents' behavior and + * exceptions. Called with devcgroup_mutex held. + * @devcg: cgroup which exceptions will be checked + * + * returns: 0 in success, -ENOMEM in case of out of memory + * + * This is one of the two key functions for hierarchy implementation. + * This function is responsible for re-evaluating all the cgroup's locally + * set exceptions due to a parent's behavior or exception change. + * Refer to Documentation/cgroups/devices.txt for more details. + */ +static int revalidate_exceptions(struct dev_cgroup *devcg) +{ + struct dev_exception_item *ex; + struct list_head *this, *tmp; + + list_for_each_safe(this, tmp, >local.exceptions) { + ex = container_of(this, struct dev_exception_item, list); + if (parent_has_perm(devcg, ex)) { + if (dev_exception_copy(>exceptions, ex)) + goto error; + } else + __dev_exception_rm(>local.exceptions, ex); + } + return 0; + +error: + dev_exception_clean(>exceptions); + return -ENOMEM; +} + +/** + * propagate_behavior - propagates a change in the behavior down in hierarchy + * @devcg_root: device cgroup that changed behavior + * + * returns: 0 in case of success, != 0 in case of error + * + * This is one of the two key functions for hierarchy implementation. + * All cgroup's children recursively will have the behavior changed and + * exceptions copied from the parent then its local behavior and exceptions + * re-evaluated and applied if they're still
[PATCH v3 2/9] devcg: reorder device exception functions
In preparation for the next patch, reorder dev_exception_add() and dev_exception_rm(). Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 44 ++-- 1 file changed, 22 insertions(+), 22 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:14.739657516 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:14.987661210 -0500 @@ -104,18 +104,14 @@ free_and_exit: /* * called under devcgroup_mutex */ -static int dev_exception_add(struct list_head *exceptions, +static void dev_exception_rm(struct list_head *exceptions, struct dev_exception_item *ex) { - struct dev_exception_item *excopy, *walk; + struct dev_exception_item *walk, *tmp; lockdep_assert_held(_mutex); - excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL); - if (!excopy) - return -ENOMEM; - - list_for_each_entry(walk, exceptions, list) { + list_for_each_entry_safe(walk, tmp, exceptions, list) { if (walk->type != ex->type) continue; if (walk->major != ex->major) @@ -123,27 +119,29 @@ static int dev_exception_add(struct list if (walk->minor != ex->minor) continue; - walk->access |= ex->access; - kfree(excopy); - excopy = NULL; + walk->access &= ~ex->access; + if (!walk->access) { + list_del_rcu(>list); + kfree_rcu(walk, rcu); + } } - - if (excopy != NULL) - list_add_tail_rcu(>list, exceptions); - return 0; } /* * called under devcgroup_mutex */ -static void dev_exception_rm(struct list_head *exceptions, +static int dev_exception_add(struct list_head *exceptions, struct dev_exception_item *ex) { - struct dev_exception_item *walk, *tmp; + struct dev_exception_item *excopy, *walk; lockdep_assert_held(_mutex); - list_for_each_entry_safe(walk, tmp, exceptions, list) { + excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL); + if (!excopy) + return -ENOMEM; + + list_for_each_entry(walk, exceptions, list) { if (walk->type != ex->type) continue; if (walk->major != ex->major) @@ -151,12 +149,14 @@ static void dev_exception_rm(struct list if (walk->minor != ex->minor) continue; - walk->access &= ~ex->access; - if (!walk->access) { - list_del_rcu(>list); - kfree_rcu(walk, rcu); - } + walk->access |= ex->access; + kfree(excopy); + excopy = NULL; } + + if (excopy != NULL) + list_add_tail_rcu(>list, exceptions); + return 0; } static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 8/9] devcg: refactor dev_exception_clean()
This patch is in preparation for hierarchy support. Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 34 -- 1 file changed, 16 insertions(+), 18 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:16.374681863 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:16.653686016 -0500 @@ -212,32 +212,33 @@ static int dev_exception_add(struct dev_ return rc; } -static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) +static void dev_exception_clean(struct list_head *exceptions) { struct dev_exception_item *ex, *tmp; - list_for_each_entry_safe(ex, tmp, _cgroup->exceptions, list) { - list_del_rcu(>list); - kfree_rcu(ex, rcu); - } - list_for_each_entry_safe(ex, tmp, _cgroup->local.exceptions, -list) { + list_for_each_entry_safe(ex, tmp, exceptions, list) { list_del_rcu(>list); kfree_rcu(ex, rcu); } } +static void __dev_exception_clean_all(struct dev_cgroup *dev_cgroup) +{ + dev_exception_clean(_cgroup->exceptions); + dev_exception_clean(_cgroup->local.exceptions); +} + /** - * dev_exception_clean - frees all entries of the exception list + * dev_exception_clean_all - frees all entries of the exception list * @dev_cgroup: dev_cgroup with the exception list to be cleaned * * called under devcgroup_mutex */ -static void dev_exception_clean(struct dev_cgroup *dev_cgroup) +static void dev_exception_clean_all(struct dev_cgroup *dev_cgroup) { lockdep_assert_held(_mutex); - __dev_exception_clean(dev_cgroup); + __dev_exception_clean_all(dev_cgroup); } /** @@ -303,7 +304,7 @@ static void devcgroup_css_free(struct cg struct dev_cgroup *dev_cgroup; dev_cgroup = cgroup_to_devcgroup(cgroup); - __dev_exception_clean(dev_cgroup); + __dev_exception_clean_all(dev_cgroup); kfree(dev_cgroup); } @@ -508,25 +509,22 @@ memset(, 0, sizeof(ex)); case DEVCG_ALLOW: if (!may_allow_all(parent)) return -EPERM; - dev_exception_clean(devcgroup); + dev_exception_clean_all(devcgroup); if (parent) rc = dev_exceptions_copy(>exceptions, >exceptions); devcgroup->behavior = DEVCG_DEFAULT_ALLOW; devcgroup->local.behavior = DEVCG_DEFAULT_ALLOW; - - if (rc) - return rc; break; case DEVCG_DENY: - dev_exception_clean(devcgroup); + dev_exception_clean_all(devcgroup); devcgroup->behavior = DEVCG_DEFAULT_DENY; devcgroup->local.behavior = DEVCG_DEFAULT_DENY; break; default: - return -EINVAL; + rc = -EINVAL; } - return 0; + return rc; case 'b': ex.type = DEV_BLOCK; break; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 1/9] device_cgroup: prepare exception list handling functions for two lists
In the following patches, device_cgroup structure will have two sets of behavior and exceptions list (actual one, another with the local settings) so rework the functions to use exception list, not a device_cgroup. Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:48:50.603298122 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:14.739657516 -0500 @@ -104,7 +104,7 @@ free_and_exit: /* * called under devcgroup_mutex */ -static int dev_exception_add(struct dev_cgroup *dev_cgroup, +static int dev_exception_add(struct list_head *exceptions, struct dev_exception_item *ex) { struct dev_exception_item *excopy, *walk; @@ -115,7 +115,7 @@ static int dev_exception_add(struct dev_ if (!excopy) return -ENOMEM; - list_for_each_entry(walk, _cgroup->exceptions, list) { + list_for_each_entry(walk, exceptions, list) { if (walk->type != ex->type) continue; if (walk->major != ex->major) @@ -129,21 +129,21 @@ static int dev_exception_add(struct dev_ } if (excopy != NULL) - list_add_tail_rcu(>list, _cgroup->exceptions); + list_add_tail_rcu(>list, exceptions); return 0; } /* * called under devcgroup_mutex */ -static void dev_exception_rm(struct dev_cgroup *dev_cgroup, +static void dev_exception_rm(struct list_head *exceptions, struct dev_exception_item *ex) { struct dev_exception_item *walk, *tmp; lockdep_assert_held(_mutex); - list_for_each_entry_safe(walk, tmp, _cgroup->exceptions, list) { + list_for_each_entry_safe(walk, tmp, exceptions, list) { if (walk->type != ex->type) continue; if (walk->major != ex->major) @@ -514,10 +514,10 @@ case '\0': * don't want to break compatibility */ if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) { - dev_exception_rm(devcgroup, ); + dev_exception_rm(>exceptions, ); return 0; } - return dev_exception_add(devcgroup, ); + return dev_exception_add(>exceptions, ); case DEVCG_DENY: /* * If the default policy is to deny by default, try to remove @@ -525,10 +525,10 @@ return 0; * don't want to break compatibility */ if (devcgroup->behavior == DEVCG_DEFAULT_DENY) { - dev_exception_rm(devcgroup, ); + dev_exception_rm(>exceptions, ); return 0; } - return dev_exception_add(devcgroup, ); + return dev_exception_add(>exceptions, ); default: return -EINVAL; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 3/9] device_cgroup: keep track of local group settings
In preparation for better hierarchy support, it's needed to retain the local settings in order to try to reapply them after a propagated change if they're still valid. v2: split this patch in two, one to just move dev_exception_rm() before dev_exception_add() while keeping functional changes in this patch as requested by Tejun. Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 83 +-- 1 file changed, 67 insertions(+), 16 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:14.987661210 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:15.244665037 -0500 @@ -39,13 +39,27 @@ struct dev_exception_item { struct rcu_head rcu; }; +enum devcg_behavior { + DEVCG_DEFAULT_NONE, + DEVCG_DEFAULT_ALLOW, + DEVCG_DEFAULT_DENY, +}; + struct dev_cgroup { struct cgroup_subsys_state css; + + /* result of merging the parent's rules with local ones */ struct list_head exceptions; - enum { - DEVCG_DEFAULT_ALLOW, - DEVCG_DEFAULT_DENY, - } behavior; + enum devcg_behavior behavior; + + /* +* local set rules, saved so when a parent propagates new rules, the +* local preferences can be preserved +*/ + struct { + struct list_head exceptions; + enum devcg_behavior behavior; + } local; }; static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) @@ -104,8 +118,8 @@ free_and_exit: /* * called under devcgroup_mutex */ -static void dev_exception_rm(struct list_head *exceptions, -struct dev_exception_item *ex) +static void __dev_exception_rm(struct list_head *exceptions, + struct dev_exception_item *ex) { struct dev_exception_item *walk, *tmp; @@ -127,11 +141,18 @@ static void dev_exception_rm(struct list } } +static void dev_exception_rm(struct dev_cgroup *devcgroup, +struct dev_exception_item *ex) +{ + __dev_exception_rm(>local.exceptions, ex); + __dev_exception_rm(>exceptions, ex); +} + /* * called under devcgroup_mutex */ -static int dev_exception_add(struct list_head *exceptions, -struct dev_exception_item *ex) +static int __dev_exception_add(struct list_head *exceptions, + struct dev_exception_item *ex) { struct dev_exception_item *excopy, *walk; @@ -159,6 +180,28 @@ static int dev_exception_add(struct list return 0; } +static int dev_exception_add(struct dev_cgroup *devcgroup, +struct dev_exception_item *ex) +{ + int rc; + + lockdep_assert_held(_mutex); + + /* +* we add to the local list so we can preserve local preferences if +* the parent propagates down new rules +*/ + rc = __dev_exception_add(>local.exceptions, ex); + if (rc) + return rc; + + rc = __dev_exception_add(>exceptions, ex); + if (rc) + __dev_exception_rm(>local.exceptions, ex); + + return rc; +} + static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) { struct dev_exception_item *ex, *tmp; @@ -167,6 +210,11 @@ static void __dev_exception_clean(struct list_del_rcu(>list); kfree_rcu(ex, rcu); } + list_for_each_entry_safe(ex, tmp, _cgroup->local.exceptions, +list) { + list_del_rcu(>list); + kfree_rcu(ex, rcu); + } } /** @@ -195,6 +243,8 @@ static struct cgroup_subsys_state *devcg if (!dev_cgroup) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(_cgroup->exceptions); + INIT_LIST_HEAD(_cgroup->local.exceptions); + dev_cgroup->local.behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup->parent; if (parent_cgroup == NULL) @@ -413,18 +463,19 @@ memset(, 0, sizeof(ex)); if (!may_allow_all(parent)) return -EPERM; dev_exception_clean(devcgroup); + if (parent) + rc = dev_exceptions_copy(>exceptions, +>exceptions); devcgroup->behavior = DEVCG_DEFAULT_ALLOW; - if (!parent) - break; + devcgroup->local.behavior = DEVCG_DEFAULT_ALLOW; - rc = dev_exceptions_copy(>exceptions, ->exceptions); if (rc) return rc; break; case DEVCG_DENY: dev_exception_clean(devcgroup);
[PATCH v3 0/9] devcg: introduce proper hierarchy support
This patchset implements device cgroup hierarchy. Behaviors and exceptions will be propagated down in the tree and local preferences will be re-evaluated everytime a change in its parent occours, reapplying them if it's still possible. git://github.com/aristeu/linux-2.6.git branch: devcg_hierarchy_review v3: - update documentation - move css_online/css_offline changes to a new patch - use cgroup_for_each_descendant_pre() instead of own descendant walk - move exception_copy rework to a separared patch - move exception_clean rework to a separated patch - new patch to just move dev_exception_rm() before dev_exception_add() as requested by Tejun. - updated patch description for may_access() changes - new patch to expand the may_access() logic before changing it - fixed argument description order in may_access() v2: - rebase on top "device_cgroup: don't grab mutex in rcu callback" - in case parent changes behavior or exceptions and the local exceptions won't apply anymore, remove them instead of keeping them around. Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski -- Aristeu -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 7/9] devcg: split single exception copy from dev_exceptions_copy()
This patch is in preparation for hierarchy support Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 18 ++ 1 file changed, 14 insertions(+), 4 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:16.076677425 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:16.374681863 -0500 @@ -89,20 +89,30 @@ static int devcgroup_can_attach(struct c return 0; } +static int dev_exception_copy(struct list_head *dest, + struct dev_exception_item *ex) +{ + struct dev_exception_item *new; + + new = kmemdup(ex, sizeof(*ex), GFP_KERNEL); + if (!new) + return -ENOMEM; + list_add_tail(>list, dest); + return 0; +} + /* * called under devcgroup_mutex */ static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig) { - struct dev_exception_item *ex, *tmp, *new; + struct dev_exception_item *ex, *tmp; lockdep_assert_held(_mutex); list_for_each_entry(ex, orig, list) { - new = kmemdup(ex, sizeof(*ex), GFP_KERNEL); - if (!new) + if (dev_exception_copy(dest, ex)) goto free_and_exit; - list_add_tail(>list, dest); } return 0; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 6/9] devcg: use css_online and css_offline
Allocate resources and change behavior only when online. This patch is in preparation for hierarchy support. Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 59 +-- 1 file changed, 42 insertions(+), 17 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:15.795673240 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:16.076677425 -0500 @@ -230,14 +230,51 @@ static void dev_exception_clean(struct d __dev_exception_clean(dev_cgroup); } +/** + * devcgroup_online - initializes devcgroup's behavior and exceptions based on + * parent's + * @cgroup: cgroup getting online + * returns 0 in case of success, error code otherwise + */ +static int devcgroup_online(struct cgroup *cgroup) +{ + struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL; + int ret = 0; + + mutex_lock(_mutex); + dev_cgroup = cgroup_to_devcgroup(cgroup); + if (cgroup->parent) + parent_dev_cgroup = cgroup_to_devcgroup(cgroup->parent); + + if (parent_dev_cgroup == NULL) + dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW; + else { + ret = dev_exceptions_copy(_cgroup->exceptions, + _dev_cgroup->exceptions); + if (!ret) + dev_cgroup->behavior = parent_dev_cgroup->behavior; + } + mutex_unlock(_mutex); + + return ret; +} + +static void devcgroup_offline(struct cgroup *cgroup) +{ + struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup); + + mutex_lock(_mutex); + dev_cgroup->behavior = DEVCG_DEFAULT_NONE; + mutex_unlock(_mutex); +} + /* * called from kernel/cgroup.c with cgroup_lock() held. */ static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup) { - struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; + struct dev_cgroup *dev_cgroup; struct cgroup *parent_cgroup; - int ret; dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); if (!dev_cgroup) @@ -245,23 +282,9 @@ static struct cgroup_subsys_state *devcg INIT_LIST_HEAD(_cgroup->exceptions); INIT_LIST_HEAD(_cgroup->local.exceptions); dev_cgroup->local.behavior = DEVCG_DEFAULT_NONE; + dev_cgroup->behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup->parent; - if (parent_cgroup == NULL) - dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW; - else { - parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); - mutex_lock(_mutex); - ret = dev_exceptions_copy(_cgroup->exceptions, - _dev_cgroup->exceptions); - dev_cgroup->behavior = parent_dev_cgroup->behavior; - mutex_unlock(_mutex); - if (ret) { - kfree(dev_cgroup); - return ERR_PTR(ret); - } - } - return _cgroup->css; } @@ -635,6 +658,8 @@ struct cgroup_subsys devices_subsys = { .can_attach = devcgroup_can_attach, .css_alloc = devcgroup_css_alloc, .css_free = devcgroup_css_free, + .css_online = devcgroup_online, + .css_offline = devcgroup_offline, .subsys_id = devices_subsys_id, .base_cftypes = dev_cgroup_files, -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 5/9] devcg: prepare may_access() for hierarchy support
Currently may_access() is only able to verify if an exception is valid for the current cgroup, which has the same behavior. With hierarchy, it'll be also used to verify if a cgroup local exception is valid towards its cgroup parent, which might have different behavior. v2: - updated patch description - rebased on top of a new patch to expand the may_access() logic to make it more clear - fixed argument description order in may_access() Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 44 +++- 1 file changed, 27 insertions(+), 17 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:15.514669057 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:15.795673240 -0500 @@ -354,9 +354,11 @@return 0; * verify if a certain access is allowed. * @dev_cgroup: dev cgroup to be tested against * @refex: new exception + * @behavior: behavior of the exception */ -static int may_access(struct dev_cgroup *dev_cgroup, - struct dev_exception_item *refex) +static bool may_access(struct dev_cgroup *dev_cgroup, + struct dev_exception_item *refex, + enum devcg_behavior behavior) { struct dev_exception_item *ex; bool match = false; @@ -380,19 +382,27 @@ if (ex->minor != ~0 && ex->minor != re break; } - /* -* In two cases we'll consider this new exception valid: -* - the dev cgroup has its default policy to deny + exception list: -* the new exception *should* match the exceptions -* - the dev cgroup has its default policy to allow + exception list: -* the new exception should *not* match any of the exceptions -*/ - if (dev_cgroup->behavior == DEVCG_DEFAULT_DENY) { - if (match) - return 1; + if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) { + if (behavior == DEVCG_DEFAULT_ALLOW) { + /* the exception will deny access to certain devices */ + return true; + } else { + /* the exception will allow access to certain devices */ + if (match) + /* +* a new exception allowing access shouldn't +* match an parent's exception +*/ + return false; + return true; + } } else { - if (!match) - return 1; + /* only behavior == DEVCG_DEFAULT_DENY allowed here */ + if (match) + /* parent has an exception that matches the proposed */ + return true; + else + return false; } return 0; } @@ -411,7 +421,7 @@ static int parent_has_perm(struct dev_cg if (!pcg) return 1; parent = cgroup_to_devcgroup(pcg); - return may_access(parent, ex); + return may_access(parent, ex, childcg->behavior); } /** @@ -445,7 +455,7 @@ static int devcgroup_update_access(struc { const char *b; char temp[12]; /* 11 + 1 characters needed for a u32 */ - int count, rc; + int count, rc = 0; struct dev_exception_item ex; struct cgroup *p = devcgroup->css.cgroup; struct dev_cgroup *parent = NULL; @@ -663,7 +673,7 @@ memset(, 0, sizeof(ex)); rcu_read_lock(); dev_cgroup = task_devcgroup(current); - rc = may_access(dev_cgroup, ); + rc = may_access(dev_cgroup, , dev_cgroup->behavior); rcu_read_unlock(); if (!rc) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 4/9] devcg: expand may_access() logic
In order to make the next patch more clear, expand may_access() logic. Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:15.244665037 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:15.514669057 -0500 @@ -382,15 +382,18 @@ if (ex->minor != ~0 && ex->minor != re /* * In two cases we'll consider this new exception valid: -* - the dev cgroup has its default policy to allow + exception list: -* the new exception should *not* match any of the exceptions -* (behavior == DEVCG_DEFAULT_ALLOW, !match) * - the dev cgroup has its default policy to deny + exception list: * the new exception *should* match the exceptions -* (behavior == DEVCG_DEFAULT_DENY, match) +* - the dev cgroup has its default policy to allow + exception list: +* the new exception should *not* match any of the exceptions */ - if ((dev_cgroup->behavior == DEVCG_DEFAULT_DENY) == match) - return 1; + if (dev_cgroup->behavior == DEVCG_DEFAULT_DENY) { + if (match) + return 1; + } else { + if (!match) + return 1; + } return 0; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 4/9] devcg: expand may_access() logic
In order to make the next patch more clear, expand may_access() logic. Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:15.244665037 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:15.514669057 -0500 @@ -382,15 +382,18 @@ if (ex-minor != ~0 ex-minor != re /* * In two cases we'll consider this new exception valid: -* - the dev cgroup has its default policy to allow + exception list: -* the new exception should *not* match any of the exceptions -* (behavior == DEVCG_DEFAULT_ALLOW, !match) * - the dev cgroup has its default policy to deny + exception list: * the new exception *should* match the exceptions -* (behavior == DEVCG_DEFAULT_DENY, match) +* - the dev cgroup has its default policy to allow + exception list: +* the new exception should *not* match any of the exceptions */ - if ((dev_cgroup-behavior == DEVCG_DEFAULT_DENY) == match) - return 1; + if (dev_cgroup-behavior == DEVCG_DEFAULT_DENY) { + if (match) + return 1; + } else { + if (!match) + return 1; + } return 0; } -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 5/9] devcg: prepare may_access() for hierarchy support
Currently may_access() is only able to verify if an exception is valid for the current cgroup, which has the same behavior. With hierarchy, it'll be also used to verify if a cgroup local exception is valid towards its cgroup parent, which might have different behavior. v2: - updated patch description - rebased on top of a new patch to expand the may_access() logic to make it more clear - fixed argument description order in may_access() Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 44 +++- 1 file changed, 27 insertions(+), 17 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:15.514669057 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:15.795673240 -0500 @@ -354,9 +354,11 @@return 0; * verify if a certain access is allowed. * @dev_cgroup: dev cgroup to be tested against * @refex: new exception + * @behavior: behavior of the exception */ -static int may_access(struct dev_cgroup *dev_cgroup, - struct dev_exception_item *refex) +static bool may_access(struct dev_cgroup *dev_cgroup, + struct dev_exception_item *refex, + enum devcg_behavior behavior) { struct dev_exception_item *ex; bool match = false; @@ -380,19 +382,27 @@ if (ex-minor != ~0 ex-minor != re break; } - /* -* In two cases we'll consider this new exception valid: -* - the dev cgroup has its default policy to deny + exception list: -* the new exception *should* match the exceptions -* - the dev cgroup has its default policy to allow + exception list: -* the new exception should *not* match any of the exceptions -*/ - if (dev_cgroup-behavior == DEVCG_DEFAULT_DENY) { - if (match) - return 1; + if (dev_cgroup-behavior == DEVCG_DEFAULT_ALLOW) { + if (behavior == DEVCG_DEFAULT_ALLOW) { + /* the exception will deny access to certain devices */ + return true; + } else { + /* the exception will allow access to certain devices */ + if (match) + /* +* a new exception allowing access shouldn't +* match an parent's exception +*/ + return false; + return true; + } } else { - if (!match) - return 1; + /* only behavior == DEVCG_DEFAULT_DENY allowed here */ + if (match) + /* parent has an exception that matches the proposed */ + return true; + else + return false; } return 0; } @@ -411,7 +421,7 @@ static int parent_has_perm(struct dev_cg if (!pcg) return 1; parent = cgroup_to_devcgroup(pcg); - return may_access(parent, ex); + return may_access(parent, ex, childcg-behavior); } /** @@ -445,7 +455,7 @@ static int devcgroup_update_access(struc { const char *b; char temp[12]; /* 11 + 1 characters needed for a u32 */ - int count, rc; + int count, rc = 0; struct dev_exception_item ex; struct cgroup *p = devcgroup-css.cgroup; struct dev_cgroup *parent = NULL; @@ -663,7 +673,7 @@ memset(ex, 0, sizeof(ex)); rcu_read_lock(); dev_cgroup = task_devcgroup(current); - rc = may_access(dev_cgroup, ex); + rc = may_access(dev_cgroup, ex, dev_cgroup-behavior); rcu_read_unlock(); if (!rc) -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 6/9] devcg: use css_online and css_offline
Allocate resources and change behavior only when online. This patch is in preparation for hierarchy support. Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 59 +-- 1 file changed, 42 insertions(+), 17 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:15.795673240 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:16.076677425 -0500 @@ -230,14 +230,51 @@ static void dev_exception_clean(struct d __dev_exception_clean(dev_cgroup); } +/** + * devcgroup_online - initializes devcgroup's behavior and exceptions based on + * parent's + * @cgroup: cgroup getting online + * returns 0 in case of success, error code otherwise + */ +static int devcgroup_online(struct cgroup *cgroup) +{ + struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL; + int ret = 0; + + mutex_lock(devcgroup_mutex); + dev_cgroup = cgroup_to_devcgroup(cgroup); + if (cgroup-parent) + parent_dev_cgroup = cgroup_to_devcgroup(cgroup-parent); + + if (parent_dev_cgroup == NULL) + dev_cgroup-behavior = DEVCG_DEFAULT_ALLOW; + else { + ret = dev_exceptions_copy(dev_cgroup-exceptions, + parent_dev_cgroup-exceptions); + if (!ret) + dev_cgroup-behavior = parent_dev_cgroup-behavior; + } + mutex_unlock(devcgroup_mutex); + + return ret; +} + +static void devcgroup_offline(struct cgroup *cgroup) +{ + struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup); + + mutex_lock(devcgroup_mutex); + dev_cgroup-behavior = DEVCG_DEFAULT_NONE; + mutex_unlock(devcgroup_mutex); +} + /* * called from kernel/cgroup.c with cgroup_lock() held. */ static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup) { - struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; + struct dev_cgroup *dev_cgroup; struct cgroup *parent_cgroup; - int ret; dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); if (!dev_cgroup) @@ -245,23 +282,9 @@ static struct cgroup_subsys_state *devcg INIT_LIST_HEAD(dev_cgroup-exceptions); INIT_LIST_HEAD(dev_cgroup-local.exceptions); dev_cgroup-local.behavior = DEVCG_DEFAULT_NONE; + dev_cgroup-behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup-parent; - if (parent_cgroup == NULL) - dev_cgroup-behavior = DEVCG_DEFAULT_ALLOW; - else { - parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); - mutex_lock(devcgroup_mutex); - ret = dev_exceptions_copy(dev_cgroup-exceptions, - parent_dev_cgroup-exceptions); - dev_cgroup-behavior = parent_dev_cgroup-behavior; - mutex_unlock(devcgroup_mutex); - if (ret) { - kfree(dev_cgroup); - return ERR_PTR(ret); - } - } - return dev_cgroup-css; } @@ -635,6 +658,8 @@ struct cgroup_subsys devices_subsys = { .can_attach = devcgroup_can_attach, .css_alloc = devcgroup_css_alloc, .css_free = devcgroup_css_free, + .css_online = devcgroup_online, + .css_offline = devcgroup_offline, .subsys_id = devices_subsys_id, .base_cftypes = dev_cgroup_files, -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 7/9] devcg: split single exception copy from dev_exceptions_copy()
This patch is in preparation for hierarchy support Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 18 ++ 1 file changed, 14 insertions(+), 4 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:16.076677425 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:16.374681863 -0500 @@ -89,20 +89,30 @@ static int devcgroup_can_attach(struct c return 0; } +static int dev_exception_copy(struct list_head *dest, + struct dev_exception_item *ex) +{ + struct dev_exception_item *new; + + new = kmemdup(ex, sizeof(*ex), GFP_KERNEL); + if (!new) + return -ENOMEM; + list_add_tail(new-list, dest); + return 0; +} + /* * called under devcgroup_mutex */ static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig) { - struct dev_exception_item *ex, *tmp, *new; + struct dev_exception_item *ex, *tmp; lockdep_assert_held(devcgroup_mutex); list_for_each_entry(ex, orig, list) { - new = kmemdup(ex, sizeof(*ex), GFP_KERNEL); - if (!new) + if (dev_exception_copy(dest, ex)) goto free_and_exit; - list_add_tail(new-list, dest); } return 0; -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 0/9] devcg: introduce proper hierarchy support
This patchset implements device cgroup hierarchy. Behaviors and exceptions will be propagated down in the tree and local preferences will be re-evaluated everytime a change in its parent occours, reapplying them if it's still possible. git://github.com/aristeu/linux-2.6.git branch: devcg_hierarchy_review v3: - update documentation - move css_online/css_offline changes to a new patch - use cgroup_for_each_descendant_pre() instead of own descendant walk - move exception_copy rework to a separared patch - move exception_clean rework to a separated patch - new patch to just move dev_exception_rm() before dev_exception_add() as requested by Tejun. - updated patch description for may_access() changes - new patch to expand the may_access() logic before changing it - fixed argument description order in may_access() v2: - rebase on top device_cgroup: don't grab mutex in rcu callback - in case parent changes behavior or exceptions and the local exceptions won't apply anymore, remove them instead of keeping them around. Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com -- Aristeu -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 3/9] device_cgroup: keep track of local group settings
In preparation for better hierarchy support, it's needed to retain the local settings in order to try to reapply them after a propagated change if they're still valid. v2: split this patch in two, one to just move dev_exception_rm() before dev_exception_add() while keeping functional changes in this patch as requested by Tejun. Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 83 +-- 1 file changed, 67 insertions(+), 16 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:14.987661210 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:15.244665037 -0500 @@ -39,13 +39,27 @@ struct dev_exception_item { struct rcu_head rcu; }; +enum devcg_behavior { + DEVCG_DEFAULT_NONE, + DEVCG_DEFAULT_ALLOW, + DEVCG_DEFAULT_DENY, +}; + struct dev_cgroup { struct cgroup_subsys_state css; + + /* result of merging the parent's rules with local ones */ struct list_head exceptions; - enum { - DEVCG_DEFAULT_ALLOW, - DEVCG_DEFAULT_DENY, - } behavior; + enum devcg_behavior behavior; + + /* +* local set rules, saved so when a parent propagates new rules, the +* local preferences can be preserved +*/ + struct { + struct list_head exceptions; + enum devcg_behavior behavior; + } local; }; static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) @@ -104,8 +118,8 @@ free_and_exit: /* * called under devcgroup_mutex */ -static void dev_exception_rm(struct list_head *exceptions, -struct dev_exception_item *ex) +static void __dev_exception_rm(struct list_head *exceptions, + struct dev_exception_item *ex) { struct dev_exception_item *walk, *tmp; @@ -127,11 +141,18 @@ static void dev_exception_rm(struct list } } +static void dev_exception_rm(struct dev_cgroup *devcgroup, +struct dev_exception_item *ex) +{ + __dev_exception_rm(devcgroup-local.exceptions, ex); + __dev_exception_rm(devcgroup-exceptions, ex); +} + /* * called under devcgroup_mutex */ -static int dev_exception_add(struct list_head *exceptions, -struct dev_exception_item *ex) +static int __dev_exception_add(struct list_head *exceptions, + struct dev_exception_item *ex) { struct dev_exception_item *excopy, *walk; @@ -159,6 +180,28 @@ static int dev_exception_add(struct list return 0; } +static int dev_exception_add(struct dev_cgroup *devcgroup, +struct dev_exception_item *ex) +{ + int rc; + + lockdep_assert_held(devcgroup_mutex); + + /* +* we add to the local list so we can preserve local preferences if +* the parent propagates down new rules +*/ + rc = __dev_exception_add(devcgroup-local.exceptions, ex); + if (rc) + return rc; + + rc = __dev_exception_add(devcgroup-exceptions, ex); + if (rc) + __dev_exception_rm(devcgroup-local.exceptions, ex); + + return rc; +} + static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) { struct dev_exception_item *ex, *tmp; @@ -167,6 +210,11 @@ static void __dev_exception_clean(struct list_del_rcu(ex-list); kfree_rcu(ex, rcu); } + list_for_each_entry_safe(ex, tmp, dev_cgroup-local.exceptions, +list) { + list_del_rcu(ex-list); + kfree_rcu(ex, rcu); + } } /** @@ -195,6 +243,8 @@ static struct cgroup_subsys_state *devcg if (!dev_cgroup) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(dev_cgroup-exceptions); + INIT_LIST_HEAD(dev_cgroup-local.exceptions); + dev_cgroup-local.behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup-parent; if (parent_cgroup == NULL) @@ -413,18 +463,19 @@ memset(ex, 0, sizeof(ex)); if (!may_allow_all(parent)) return -EPERM; dev_exception_clean(devcgroup); + if (parent) + rc = dev_exceptions_copy(devcgroup-exceptions, +parent-exceptions); devcgroup-behavior = DEVCG_DEFAULT_ALLOW; - if (!parent) - break; + devcgroup-local.behavior = DEVCG_DEFAULT_ALLOW; - rc = dev_exceptions_copy(devcgroup-exceptions, -parent-exceptions); if (rc)
[PATCH v3 1/9] device_cgroup: prepare exception list handling functions for two lists
In the following patches, device_cgroup structure will have two sets of behavior and exceptions list (actual one, another with the local settings) so rework the functions to use exception list, not a device_cgroup. Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:48:50.603298122 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:14.739657516 -0500 @@ -104,7 +104,7 @@ free_and_exit: /* * called under devcgroup_mutex */ -static int dev_exception_add(struct dev_cgroup *dev_cgroup, +static int dev_exception_add(struct list_head *exceptions, struct dev_exception_item *ex) { struct dev_exception_item *excopy, *walk; @@ -115,7 +115,7 @@ static int dev_exception_add(struct dev_ if (!excopy) return -ENOMEM; - list_for_each_entry(walk, dev_cgroup-exceptions, list) { + list_for_each_entry(walk, exceptions, list) { if (walk-type != ex-type) continue; if (walk-major != ex-major) @@ -129,21 +129,21 @@ static int dev_exception_add(struct dev_ } if (excopy != NULL) - list_add_tail_rcu(excopy-list, dev_cgroup-exceptions); + list_add_tail_rcu(excopy-list, exceptions); return 0; } /* * called under devcgroup_mutex */ -static void dev_exception_rm(struct dev_cgroup *dev_cgroup, +static void dev_exception_rm(struct list_head *exceptions, struct dev_exception_item *ex) { struct dev_exception_item *walk, *tmp; lockdep_assert_held(devcgroup_mutex); - list_for_each_entry_safe(walk, tmp, dev_cgroup-exceptions, list) { + list_for_each_entry_safe(walk, tmp, exceptions, list) { if (walk-type != ex-type) continue; if (walk-major != ex-major) @@ -514,10 +514,10 @@ case '\0': * don't want to break compatibility */ if (devcgroup-behavior == DEVCG_DEFAULT_ALLOW) { - dev_exception_rm(devcgroup, ex); + dev_exception_rm(devcgroup-exceptions, ex); return 0; } - return dev_exception_add(devcgroup, ex); + return dev_exception_add(devcgroup-exceptions, ex); case DEVCG_DENY: /* * If the default policy is to deny by default, try to remove @@ -525,10 +525,10 @@ return 0; * don't want to break compatibility */ if (devcgroup-behavior == DEVCG_DEFAULT_DENY) { - dev_exception_rm(devcgroup, ex); + dev_exception_rm(devcgroup-exceptions, ex); return 0; } - return dev_exception_add(devcgroup, ex); + return dev_exception_add(devcgroup-exceptions, ex); default: return -EINVAL; } -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 8/9] devcg: refactor dev_exception_clean()
This patch is in preparation for hierarchy support. Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 34 -- 1 file changed, 16 insertions(+), 18 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:16.374681863 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:16.653686016 -0500 @@ -212,32 +212,33 @@ static int dev_exception_add(struct dev_ return rc; } -static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) +static void dev_exception_clean(struct list_head *exceptions) { struct dev_exception_item *ex, *tmp; - list_for_each_entry_safe(ex, tmp, dev_cgroup-exceptions, list) { - list_del_rcu(ex-list); - kfree_rcu(ex, rcu); - } - list_for_each_entry_safe(ex, tmp, dev_cgroup-local.exceptions, -list) { + list_for_each_entry_safe(ex, tmp, exceptions, list) { list_del_rcu(ex-list); kfree_rcu(ex, rcu); } } +static void __dev_exception_clean_all(struct dev_cgroup *dev_cgroup) +{ + dev_exception_clean(dev_cgroup-exceptions); + dev_exception_clean(dev_cgroup-local.exceptions); +} + /** - * dev_exception_clean - frees all entries of the exception list + * dev_exception_clean_all - frees all entries of the exception list * @dev_cgroup: dev_cgroup with the exception list to be cleaned * * called under devcgroup_mutex */ -static void dev_exception_clean(struct dev_cgroup *dev_cgroup) +static void dev_exception_clean_all(struct dev_cgroup *dev_cgroup) { lockdep_assert_held(devcgroup_mutex); - __dev_exception_clean(dev_cgroup); + __dev_exception_clean_all(dev_cgroup); } /** @@ -303,7 +304,7 @@ static void devcgroup_css_free(struct cg struct dev_cgroup *dev_cgroup; dev_cgroup = cgroup_to_devcgroup(cgroup); - __dev_exception_clean(dev_cgroup); + __dev_exception_clean_all(dev_cgroup); kfree(dev_cgroup); } @@ -508,25 +509,22 @@ memset(ex, 0, sizeof(ex)); case DEVCG_ALLOW: if (!may_allow_all(parent)) return -EPERM; - dev_exception_clean(devcgroup); + dev_exception_clean_all(devcgroup); if (parent) rc = dev_exceptions_copy(devcgroup-exceptions, parent-exceptions); devcgroup-behavior = DEVCG_DEFAULT_ALLOW; devcgroup-local.behavior = DEVCG_DEFAULT_ALLOW; - - if (rc) - return rc; break; case DEVCG_DENY: - dev_exception_clean(devcgroup); + dev_exception_clean_all(devcgroup); devcgroup-behavior = DEVCG_DEFAULT_DENY; devcgroup-local.behavior = DEVCG_DEFAULT_DENY; break; default: - return -EINVAL; + rc = -EINVAL; } - return 0; + return rc; case 'b': ex.type = DEV_BLOCK; break; -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 2/9] devcg: reorder device exception functions
In preparation for the next patch, reorder dev_exception_add() and dev_exception_rm(). Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 44 ++-- 1 file changed, 22 insertions(+), 22 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:14.739657516 -0500 +++ github/security/device_cgroup.c 2013-01-29 11:49:14.987661210 -0500 @@ -104,18 +104,14 @@ free_and_exit: /* * called under devcgroup_mutex */ -static int dev_exception_add(struct list_head *exceptions, +static void dev_exception_rm(struct list_head *exceptions, struct dev_exception_item *ex) { - struct dev_exception_item *excopy, *walk; + struct dev_exception_item *walk, *tmp; lockdep_assert_held(devcgroup_mutex); - excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL); - if (!excopy) - return -ENOMEM; - - list_for_each_entry(walk, exceptions, list) { + list_for_each_entry_safe(walk, tmp, exceptions, list) { if (walk-type != ex-type) continue; if (walk-major != ex-major) @@ -123,27 +119,29 @@ static int dev_exception_add(struct list if (walk-minor != ex-minor) continue; - walk-access |= ex-access; - kfree(excopy); - excopy = NULL; + walk-access = ~ex-access; + if (!walk-access) { + list_del_rcu(walk-list); + kfree_rcu(walk, rcu); + } } - - if (excopy != NULL) - list_add_tail_rcu(excopy-list, exceptions); - return 0; } /* * called under devcgroup_mutex */ -static void dev_exception_rm(struct list_head *exceptions, +static int dev_exception_add(struct list_head *exceptions, struct dev_exception_item *ex) { - struct dev_exception_item *walk, *tmp; + struct dev_exception_item *excopy, *walk; lockdep_assert_held(devcgroup_mutex); - list_for_each_entry_safe(walk, tmp, exceptions, list) { + excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL); + if (!excopy) + return -ENOMEM; + + list_for_each_entry(walk, exceptions, list) { if (walk-type != ex-type) continue; if (walk-major != ex-major) @@ -151,12 +149,14 @@ static void dev_exception_rm(struct list if (walk-minor != ex-minor) continue; - walk-access = ~ex-access; - if (!walk-access) { - list_del_rcu(walk-list); - kfree_rcu(walk, rcu); - } + walk-access |= ex-access; + kfree(excopy); + excopy = NULL; } + + if (excopy != NULL) + list_add_tail_rcu(excopy-list, exceptions); + return 0; } static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 9/9] devcg: propagate local changes down the hierarchy
This patch makes all changes propagate down in hierarchy respecting when possible local configurations. Behavior changes will clean up exceptions in all the children except when the parent changes the behavior from allow to deny and the child's behavior was already deny, in which case the local exceptions will be reused. The inverse is not possible: you can't have a parent with behavior deny and a child with behavior accept. New exceptions allowing additional access to devices won't be propagated, but it'll be possible to add an exception to access all of part of the newly allowed device(s). New exceptions disallowing access to devices will be propagated down and the local group's exceptions will be revalidated for the new situation. Example: A / \ B groupbehavior exceptions Aallow b 8:* rwm, c 116:1 rw Bdeny c 1:3 rwm, c 116:2 rwm, b 3:* rwm If a new exception is added to group A: # echo c 116:* r A/devices.deny it'll propagate down and after revalidating B's local exceptions, the exception c 116:2 rwm will be removed. In case parent behavior or exceptions change and local settings are not allowed anymore, they'll be deleted. v3: - update documentation - move css_online/css_offline changes to a new patch - use cgroup_for_each_descendant_pre() instead of own descendant walk - move exception_copy rework to a separared patch - move exception_clean rework to a separated patch v2: - instead of keeping the local settings that won't apply anymore, remove them Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- Documentation/cgroups/devices.txt | 66 ++ security/device_cgroup.c | 170 -- 2 files changed, 230 insertions(+), 6 deletions(-) --- github.orig/security/device_cgroup.c2013-01-29 11:49:16.653686016 -0500 +++ github/security/device_cgroup.c 2013-01-29 13:58:07.537099962 -0500 @@ -60,6 +60,9 @@ struct dev_cgroup { struct list_head exceptions; enum devcg_behavior behavior; } local; + + /* temporary list for pending propagation operations */ + struct list_head propagate_pending; }; static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) @@ -241,6 +244,11 @@ static void dev_exception_clean_all(stru __dev_exception_clean_all(dev_cgroup); } +static inline bool is_devcg_online(const struct dev_cgroup *devcg) +{ + return (devcg-behavior != DEVCG_DEFAULT_NONE); +} + /** * devcgroup_online - initializes devcgroup's behavior and exceptions based on * parent's @@ -292,6 +300,7 @@ static struct cgroup_subsys_state *devcg return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(dev_cgroup-exceptions); INIT_LIST_HEAD(dev_cgroup-local.exceptions); + INIT_LIST_HEAD(dev_cgroup-propagate_pending); dev_cgroup-local.behavior = DEVCG_DEFAULT_NONE; dev_cgroup-behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup-parent; @@ -471,6 +480,139 @@ static inline int may_allow_all(struct d return parent-behavior == DEVCG_DEFAULT_ALLOW; } +/** + * revalidate_exceptions - walks through the exception list and revalidates + * the exceptions based on parents' behavior and + * exceptions. Called with devcgroup_mutex held. + * @devcg: cgroup which exceptions will be checked + * + * returns: 0 in success, -ENOMEM in case of out of memory + * + * This is one of the two key functions for hierarchy implementation. + * This function is responsible for re-evaluating all the cgroup's locally + * set exceptions due to a parent's behavior or exception change. + * Refer to Documentation/cgroups/devices.txt for more details. + */ +static int revalidate_exceptions(struct dev_cgroup *devcg) +{ + struct dev_exception_item *ex; + struct list_head *this, *tmp; + + list_for_each_safe(this, tmp, devcg-local.exceptions) { + ex = container_of(this, struct dev_exception_item, list); + if (parent_has_perm(devcg, ex)) { + if (dev_exception_copy(devcg-exceptions, ex)) + goto error; + } else + __dev_exception_rm(devcg-local.exceptions, ex); + } + return 0; + +error: + dev_exception_clean(devcg-exceptions); + return -ENOMEM; +} + +/** + * propagate_behavior - propagates a change in the behavior down in hierarchy + * @devcg_root: device cgroup that changed behavior + * + * returns: 0 in case of success, != 0 in case of error + * + * This is one of the two key functions for hierarchy implementation. + * All cgroup's children recursively will have the behavior changed and + * exceptions copied from the parent then its local
[PATCH v2 3/4] device_cgroup: make may_access() stronger
In order to revalidate local exceptions for the hierarchy change propagation, make may_access() stronger. Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 48 +-- 1 file changed, 30 insertions(+), 18 deletions(-) --- github.orig/security/device_cgroup.c2013-01-24 10:40:46.384253615 -0500 +++ github/security/device_cgroup.c 2013-01-24 10:41:07.513567697 -0500 @@ -353,13 +353,15 @@ return 0; * won't have more privileges than its parent or to * verify if a certain access is allowed. * @dev_cgroup: dev cgroup to be tested against + * @behavior: behavior of the exception * @refex: new exception */ -static int may_access(struct dev_cgroup *dev_cgroup, - struct dev_exception_item *refex) +static bool may_access(struct dev_cgroup *dev_cgroup, + struct dev_exception_item *refex, + enum devcg_behavior behavior) { struct dev_exception_item *ex; - bool match = false; + int match = false; rcu_lockdep_assert(rcu_read_lock_held() || lockdep_is_held(_mutex), @@ -380,18 +382,28 @@ if (ex->minor != ~0 && ex->minor != re break; } - /* -* In two cases we'll consider this new exception valid: -* - the dev cgroup has its default policy to allow + exception list: -* the new exception should *not* match any of the exceptions -* (behavior == DEVCG_DEFAULT_ALLOW, !match) -* - the dev cgroup has its default policy to deny + exception list: -* the new exception *should* match the exceptions -* (behavior == DEVCG_DEFAULT_DENY, match) -*/ - if ((dev_cgroup->behavior == DEVCG_DEFAULT_DENY) == match) - return 1; - return 0; + if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) { + if (behavior == DEVCG_DEFAULT_ALLOW) { + /* the exception will deny access to certain devices */ + return true; + } else { + /* the exception will allow access to certain devices */ + if (match) + /* +* a new exception allowing access shouldn't +* match an parent's exception +*/ + return false; + return true; + } + } else { + /* only behavior == DEVCG_DEFAULT_DENY allowed here */ + if (match) + /* parent has an exception that matches the proposed */ + return true; + else + return false; + } } /* @@ -408,7 +420,7 @@ static int parent_has_perm(struct dev_cg if (!pcg) return 1; parent = cgroup_to_devcgroup(pcg); - return may_access(parent, ex); + return may_access(parent, ex, childcg->behavior); } /** @@ -442,7 +454,7 @@ static int devcgroup_update_access(struc { const char *b; char temp[12]; /* 11 + 1 characters needed for a u32 */ - int count, rc; + int count, rc = 0; struct dev_exception_item ex; struct cgroup *p = devcgroup->css.cgroup; struct dev_cgroup *parent = NULL; @@ -660,7 +672,7 @@ memset(, 0, sizeof(ex)); rcu_read_lock(); dev_cgroup = task_devcgroup(current); - rc = may_access(dev_cgroup, ); + rc = may_access(dev_cgroup, , dev_cgroup->behavior); rcu_read_unlock(); if (!rc) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 4/4] device_cgroup: propagate local changes down the hierarchy
This patch makes all changes propagate down in hierarchy respecting when possible local configurations. Behavior changes will clean up exceptions in all the children except when the parent changes the behavior from allow to deny and the child's behavior was already deny, in which case the local exceptions will be reused. The inverse is not possible: you can't have a parent with behavior deny and a child with behavior accept. New exceptions allowing additional access to devices won't be propagated, but it'll be possible to add an exception to access all of part of the newly allowed device(s). New exceptions disallowing access to devices will be propagated down and the local group's exceptions will be revalidated for the new situation. Example: A / \ B groupbehavior exceptions Aallow "b 8:* rwm", "c 116:1 rw" Bdeny "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm" If a new exception is added to group A: # echo "c 116:* r" > A/devices.deny it'll propagate down and after revalidating B's local exceptions, the exception "c 116:2 rwm" will be removed. In case parent behavior or exceptions change and local settings are not allowed anymore, they'll be deleted. v2: instead of keeping the local settings that won't apply anymore, remove them Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 296 --- 1 file changed, 252 insertions(+), 44 deletions(-) --- github.orig/security/device_cgroup.c2013-01-24 10:41:07.513567697 -0500 +++ github/security/device_cgroup.c 2013-01-24 10:41:15.545687094 -0500 @@ -89,28 +89,38 @@ static int devcgroup_can_attach(struct c return 0; } +static int dev_exception_copy(struct list_head *dest, + struct dev_exception_item *ex) +{ + struct dev_exception_item *new; + + new = kmemdup(ex, sizeof(*ex), GFP_KERNEL); + if (!new) + return -ENOMEM; + list_add_tail_rcu(>list, dest); + return 0; +} + /* * called under devcgroup_mutex */ static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig) { - struct dev_exception_item *ex, *tmp, *new; + struct dev_exception_item *ex, *tmp; lockdep_assert_held(_mutex); list_for_each_entry(ex, orig, list) { - new = kmemdup(ex, sizeof(*ex), GFP_KERNEL); - if (!new) + if (dev_exception_copy(dest, ex)) goto free_and_exit; - list_add_tail(>list, dest); } return 0; free_and_exit: list_for_each_entry_safe(ex, tmp, dest, list) { - list_del(>list); - kfree(ex); + list_del_rcu(>list); + kfree_rcu(ex, rcu); } return -ENOMEM; } @@ -202,32 +212,80 @@ static int dev_exception_add(struct dev_ return rc; } -static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) +static void dev_exception_clean(struct list_head *exceptions) { struct dev_exception_item *ex, *tmp; - list_for_each_entry_safe(ex, tmp, _cgroup->exceptions, list) { - list_del_rcu(>list); - kfree_rcu(ex, rcu); - } - list_for_each_entry_safe(ex, tmp, _cgroup->local.exceptions, -list) { + list_for_each_entry_safe(ex, tmp, exceptions, list) { list_del_rcu(>list); kfree_rcu(ex, rcu); } } +static void __dev_exception_clean_all(struct dev_cgroup *dev_cgroup) +{ + dev_exception_clean(_cgroup->exceptions); + dev_exception_clean(_cgroup->local.exceptions); +} + /** * dev_exception_clean - frees all entries of the exception list * @dev_cgroup: dev_cgroup with the exception list to be cleaned * * called under devcgroup_mutex */ -static void dev_exception_clean(struct dev_cgroup *dev_cgroup) +static void dev_exception_clean_all(struct dev_cgroup *dev_cgroup) { lockdep_assert_held(_mutex); - __dev_exception_clean(dev_cgroup); + __dev_exception_clean_all(dev_cgroup); +} + +static inline bool is_devcg_online(const struct dev_cgroup *devcg) +{ + return (devcg->behavior != DEVCG_DEFAULT_NONE); +} + +/** + * devcg_for_each_child - traverse online children of a device cgroup + * @child_cs: loop cursor pointing to the current child + * @pos_cgrp: used for iteration + * @parent_cs: target device cgroup to walk children of + * + * Walk @child_cs through the online children of @parent_cs. Must be used + * with RCU read locked. + */ +#define devcg_for_each_child(pos_cgrp, root) \ + cgroup_for_each_child((pos_cgrp), (root)) \ + if (is_devcg_online(cgroup_to_devcgroup((pos_cgrp + +static int devcgroup_online(struct cgroup *cgroup) +{ +
[PATCH v2 2/4] device_cgroup: keep track of local group settings
In preparation for better hierarchy support, it's needed to retain the local settings in order to try to reapply them after a propagated change if they're still valid. Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 115 +-- 1 file changed, 83 insertions(+), 32 deletions(-) --- github.orig/security/device_cgroup.c2013-01-24 10:40:33.913068234 -0500 +++ github/security/device_cgroup.c 2013-01-24 10:40:46.384253615 -0500 @@ -39,13 +39,27 @@ struct dev_exception_item { struct rcu_head rcu; }; +enum devcg_behavior { + DEVCG_DEFAULT_NONE, + DEVCG_DEFAULT_ALLOW, + DEVCG_DEFAULT_DENY, +}; + struct dev_cgroup { struct cgroup_subsys_state css; + + /* result of merging the parent's rules with local ones */ struct list_head exceptions; - enum { - DEVCG_DEFAULT_ALLOW, - DEVCG_DEFAULT_DENY, - } behavior; + enum devcg_behavior behavior; + + /* +* local set rules, saved so when a parent propagates new rules, the +* local preferences can be preserved +*/ + struct { + struct list_head exceptions; + enum devcg_behavior behavior; + } local; }; static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) @@ -104,8 +118,41 @@ free_and_exit: /* * called under devcgroup_mutex */ -static int dev_exception_add(struct list_head *exceptions, -struct dev_exception_item *ex) +static void __dev_exception_rm(struct list_head *exceptions, + struct dev_exception_item *ex) +{ + struct dev_exception_item *walk, *tmp; + + lockdep_assert_held(_mutex); + + list_for_each_entry_safe(walk, tmp, exceptions, list) { + if (walk->type != ex->type) + continue; + if (walk->major != ex->major) + continue; + if (walk->minor != ex->minor) + continue; + + walk->access &= ~ex->access; + if (!walk->access) { + list_del_rcu(>list); + kfree_rcu(walk, rcu); + } + } +} + +static void dev_exception_rm(struct dev_cgroup *devcgroup, + struct dev_exception_item *ex) +{ + __dev_exception_rm(>local.exceptions, ex); + __dev_exception_rm(>exceptions, ex); +} + +/* + * called under devcgroup_mutex + */ +static int __dev_exception_add(struct list_head *exceptions, + struct dev_exception_item *ex) { struct dev_exception_item *excopy, *walk; @@ -133,30 +180,26 @@ static int dev_exception_add(struct list return 0; } -/* - * called under devcgroup_mutex - */ -static void dev_exception_rm(struct list_head *exceptions, +static int dev_exception_add(struct dev_cgroup *devcgroup, struct dev_exception_item *ex) { - struct dev_exception_item *walk, *tmp; + int rc; lockdep_assert_held(_mutex); - list_for_each_entry_safe(walk, tmp, exceptions, list) { - if (walk->type != ex->type) - continue; - if (walk->major != ex->major) - continue; - if (walk->minor != ex->minor) - continue; + /* +* we add to the local list so we can preserve local preferences if +* the parent propagates down new rules +*/ + rc = __dev_exception_add(>local.exceptions, ex); + if (rc) + return rc; + + rc = __dev_exception_add(>exceptions, ex); + if (rc) + __dev_exception_rm(>local.exceptions, ex); - walk->access &= ~ex->access; - if (!walk->access) { - list_del_rcu(>list); - kfree_rcu(walk, rcu); - } - } + return rc; } static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) @@ -167,6 +210,11 @@ static void __dev_exception_clean(struct list_del_rcu(>list); kfree_rcu(ex, rcu); } + list_for_each_entry_safe(ex, tmp, _cgroup->local.exceptions, +list) { + list_del_rcu(>list); + kfree_rcu(ex, rcu); + } } /** @@ -195,6 +243,8 @@ static struct cgroup_subsys_state *devcg if (!dev_cgroup) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(_cgroup->exceptions); + INIT_LIST_HEAD(_cgroup->local.exceptions); + dev_cgroup->local.behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup->parent; if (parent_cgroup == NULL) @@ -413,18 +463,19 @@ memset(, 0, sizeof(ex)); if (!may_allow_all(parent))
[PATCH v2 1/4] device_cgroup: prepare exception list handling functions for two lists
In the following patches, device_cgroup structure will have two sets of behavior and exceptions list (actual one, another with the local settings) so rework the functions to use exception list, not a device_cgroup. Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) --- github.orig/security/device_cgroup.c2013-01-24 10:40:32.814051897 -0500 +++ github/security/device_cgroup.c 2013-01-24 10:40:33.913068234 -0500 @@ -104,7 +104,7 @@ free_and_exit: /* * called under devcgroup_mutex */ -static int dev_exception_add(struct dev_cgroup *dev_cgroup, +static int dev_exception_add(struct list_head *exceptions, struct dev_exception_item *ex) { struct dev_exception_item *excopy, *walk; @@ -115,7 +115,7 @@ static int dev_exception_add(struct dev_ if (!excopy) return -ENOMEM; - list_for_each_entry(walk, _cgroup->exceptions, list) { + list_for_each_entry(walk, exceptions, list) { if (walk->type != ex->type) continue; if (walk->major != ex->major) @@ -129,21 +129,21 @@ static int dev_exception_add(struct dev_ } if (excopy != NULL) - list_add_tail_rcu(>list, _cgroup->exceptions); + list_add_tail_rcu(>list, exceptions); return 0; } /* * called under devcgroup_mutex */ -static void dev_exception_rm(struct dev_cgroup *dev_cgroup, +static void dev_exception_rm(struct list_head *exceptions, struct dev_exception_item *ex) { struct dev_exception_item *walk, *tmp; lockdep_assert_held(_mutex); - list_for_each_entry_safe(walk, tmp, _cgroup->exceptions, list) { + list_for_each_entry_safe(walk, tmp, exceptions, list) { if (walk->type != ex->type) continue; if (walk->major != ex->major) @@ -514,10 +514,10 @@ case '\0': * don't want to break compatibility */ if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) { - dev_exception_rm(devcgroup, ); + dev_exception_rm(>exceptions, ); return 0; } - return dev_exception_add(devcgroup, ); + return dev_exception_add(>exceptions, ); case DEVCG_DENY: /* * If the default policy is to deny by default, try to remove @@ -525,10 +525,10 @@ return 0; * don't want to break compatibility */ if (devcgroup->behavior == DEVCG_DEFAULT_DENY) { - dev_exception_rm(devcgroup, ); + dev_exception_rm(>exceptions, ); return 0; } - return dev_exception_add(devcgroup, ); + return dev_exception_add(>exceptions, ); default: return -EINVAL; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 0/4] devcg: introduce proper hierarchy support
This patchset implements device cgroup hierarchy. Behaviors and exceptions will be propagated down in the tree and local preferences will be re-evaluated everytime a change in its parent occours, reapplying them if it's still possible. v2: - rebase on top "device_cgroup: don't grab mutex in rcu callback" - in case parent changes behavior or exceptions and the local exceptions won't apply anymore, remove them instead of keeping them around. Cc: Tejun Heo Cc: Serge Hallyn Signed-off-by: Aristeu Rozanski -- Aristeu -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 0/4] devcg: introduce proper hierarchy support
This patchset implements device cgroup hierarchy. Behaviors and exceptions will be propagated down in the tree and local preferences will be re-evaluated everytime a change in its parent occours, reapplying them if it's still possible. v2: - rebase on top device_cgroup: don't grab mutex in rcu callback - in case parent changes behavior or exceptions and the local exceptions won't apply anymore, remove them instead of keeping them around. Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com -- Aristeu -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 1/4] device_cgroup: prepare exception list handling functions for two lists
In the following patches, device_cgroup structure will have two sets of behavior and exceptions list (actual one, another with the local settings) so rework the functions to use exception list, not a device_cgroup. Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) --- github.orig/security/device_cgroup.c2013-01-24 10:40:32.814051897 -0500 +++ github/security/device_cgroup.c 2013-01-24 10:40:33.913068234 -0500 @@ -104,7 +104,7 @@ free_and_exit: /* * called under devcgroup_mutex */ -static int dev_exception_add(struct dev_cgroup *dev_cgroup, +static int dev_exception_add(struct list_head *exceptions, struct dev_exception_item *ex) { struct dev_exception_item *excopy, *walk; @@ -115,7 +115,7 @@ static int dev_exception_add(struct dev_ if (!excopy) return -ENOMEM; - list_for_each_entry(walk, dev_cgroup-exceptions, list) { + list_for_each_entry(walk, exceptions, list) { if (walk-type != ex-type) continue; if (walk-major != ex-major) @@ -129,21 +129,21 @@ static int dev_exception_add(struct dev_ } if (excopy != NULL) - list_add_tail_rcu(excopy-list, dev_cgroup-exceptions); + list_add_tail_rcu(excopy-list, exceptions); return 0; } /* * called under devcgroup_mutex */ -static void dev_exception_rm(struct dev_cgroup *dev_cgroup, +static void dev_exception_rm(struct list_head *exceptions, struct dev_exception_item *ex) { struct dev_exception_item *walk, *tmp; lockdep_assert_held(devcgroup_mutex); - list_for_each_entry_safe(walk, tmp, dev_cgroup-exceptions, list) { + list_for_each_entry_safe(walk, tmp, exceptions, list) { if (walk-type != ex-type) continue; if (walk-major != ex-major) @@ -514,10 +514,10 @@ case '\0': * don't want to break compatibility */ if (devcgroup-behavior == DEVCG_DEFAULT_ALLOW) { - dev_exception_rm(devcgroup, ex); + dev_exception_rm(devcgroup-exceptions, ex); return 0; } - return dev_exception_add(devcgroup, ex); + return dev_exception_add(devcgroup-exceptions, ex); case DEVCG_DENY: /* * If the default policy is to deny by default, try to remove @@ -525,10 +525,10 @@ return 0; * don't want to break compatibility */ if (devcgroup-behavior == DEVCG_DEFAULT_DENY) { - dev_exception_rm(devcgroup, ex); + dev_exception_rm(devcgroup-exceptions, ex); return 0; } - return dev_exception_add(devcgroup, ex); + return dev_exception_add(devcgroup-exceptions, ex); default: return -EINVAL; } -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 2/4] device_cgroup: keep track of local group settings
In preparation for better hierarchy support, it's needed to retain the local settings in order to try to reapply them after a propagated change if they're still valid. Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 115 +-- 1 file changed, 83 insertions(+), 32 deletions(-) --- github.orig/security/device_cgroup.c2013-01-24 10:40:33.913068234 -0500 +++ github/security/device_cgroup.c 2013-01-24 10:40:46.384253615 -0500 @@ -39,13 +39,27 @@ struct dev_exception_item { struct rcu_head rcu; }; +enum devcg_behavior { + DEVCG_DEFAULT_NONE, + DEVCG_DEFAULT_ALLOW, + DEVCG_DEFAULT_DENY, +}; + struct dev_cgroup { struct cgroup_subsys_state css; + + /* result of merging the parent's rules with local ones */ struct list_head exceptions; - enum { - DEVCG_DEFAULT_ALLOW, - DEVCG_DEFAULT_DENY, - } behavior; + enum devcg_behavior behavior; + + /* +* local set rules, saved so when a parent propagates new rules, the +* local preferences can be preserved +*/ + struct { + struct list_head exceptions; + enum devcg_behavior behavior; + } local; }; static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) @@ -104,8 +118,41 @@ free_and_exit: /* * called under devcgroup_mutex */ -static int dev_exception_add(struct list_head *exceptions, -struct dev_exception_item *ex) +static void __dev_exception_rm(struct list_head *exceptions, + struct dev_exception_item *ex) +{ + struct dev_exception_item *walk, *tmp; + + lockdep_assert_held(devcgroup_mutex); + + list_for_each_entry_safe(walk, tmp, exceptions, list) { + if (walk-type != ex-type) + continue; + if (walk-major != ex-major) + continue; + if (walk-minor != ex-minor) + continue; + + walk-access = ~ex-access; + if (!walk-access) { + list_del_rcu(walk-list); + kfree_rcu(walk, rcu); + } + } +} + +static void dev_exception_rm(struct dev_cgroup *devcgroup, + struct dev_exception_item *ex) +{ + __dev_exception_rm(devcgroup-local.exceptions, ex); + __dev_exception_rm(devcgroup-exceptions, ex); +} + +/* + * called under devcgroup_mutex + */ +static int __dev_exception_add(struct list_head *exceptions, + struct dev_exception_item *ex) { struct dev_exception_item *excopy, *walk; @@ -133,30 +180,26 @@ static int dev_exception_add(struct list return 0; } -/* - * called under devcgroup_mutex - */ -static void dev_exception_rm(struct list_head *exceptions, +static int dev_exception_add(struct dev_cgroup *devcgroup, struct dev_exception_item *ex) { - struct dev_exception_item *walk, *tmp; + int rc; lockdep_assert_held(devcgroup_mutex); - list_for_each_entry_safe(walk, tmp, exceptions, list) { - if (walk-type != ex-type) - continue; - if (walk-major != ex-major) - continue; - if (walk-minor != ex-minor) - continue; + /* +* we add to the local list so we can preserve local preferences if +* the parent propagates down new rules +*/ + rc = __dev_exception_add(devcgroup-local.exceptions, ex); + if (rc) + return rc; + + rc = __dev_exception_add(devcgroup-exceptions, ex); + if (rc) + __dev_exception_rm(devcgroup-local.exceptions, ex); - walk-access = ~ex-access; - if (!walk-access) { - list_del_rcu(walk-list); - kfree_rcu(walk, rcu); - } - } + return rc; } static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) @@ -167,6 +210,11 @@ static void __dev_exception_clean(struct list_del_rcu(ex-list); kfree_rcu(ex, rcu); } + list_for_each_entry_safe(ex, tmp, dev_cgroup-local.exceptions, +list) { + list_del_rcu(ex-list); + kfree_rcu(ex, rcu); + } } /** @@ -195,6 +243,8 @@ static struct cgroup_subsys_state *devcg if (!dev_cgroup) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(dev_cgroup-exceptions); + INIT_LIST_HEAD(dev_cgroup-local.exceptions); + dev_cgroup-local.behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup-parent; if (parent_cgroup == NULL) @@ -413,18 +463,19
[PATCH v2 4/4] device_cgroup: propagate local changes down the hierarchy
This patch makes all changes propagate down in hierarchy respecting when possible local configurations. Behavior changes will clean up exceptions in all the children except when the parent changes the behavior from allow to deny and the child's behavior was already deny, in which case the local exceptions will be reused. The inverse is not possible: you can't have a parent with behavior deny and a child with behavior accept. New exceptions allowing additional access to devices won't be propagated, but it'll be possible to add an exception to access all of part of the newly allowed device(s). New exceptions disallowing access to devices will be propagated down and the local group's exceptions will be revalidated for the new situation. Example: A / \ B groupbehavior exceptions Aallow b 8:* rwm, c 116:1 rw Bdeny c 1:3 rwm, c 116:2 rwm, b 3:* rwm If a new exception is added to group A: # echo c 116:* r A/devices.deny it'll propagate down and after revalidating B's local exceptions, the exception c 116:2 rwm will be removed. In case parent behavior or exceptions change and local settings are not allowed anymore, they'll be deleted. v2: instead of keeping the local settings that won't apply anymore, remove them Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 296 --- 1 file changed, 252 insertions(+), 44 deletions(-) --- github.orig/security/device_cgroup.c2013-01-24 10:41:07.513567697 -0500 +++ github/security/device_cgroup.c 2013-01-24 10:41:15.545687094 -0500 @@ -89,28 +89,38 @@ static int devcgroup_can_attach(struct c return 0; } +static int dev_exception_copy(struct list_head *dest, + struct dev_exception_item *ex) +{ + struct dev_exception_item *new; + + new = kmemdup(ex, sizeof(*ex), GFP_KERNEL); + if (!new) + return -ENOMEM; + list_add_tail_rcu(new-list, dest); + return 0; +} + /* * called under devcgroup_mutex */ static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig) { - struct dev_exception_item *ex, *tmp, *new; + struct dev_exception_item *ex, *tmp; lockdep_assert_held(devcgroup_mutex); list_for_each_entry(ex, orig, list) { - new = kmemdup(ex, sizeof(*ex), GFP_KERNEL); - if (!new) + if (dev_exception_copy(dest, ex)) goto free_and_exit; - list_add_tail(new-list, dest); } return 0; free_and_exit: list_for_each_entry_safe(ex, tmp, dest, list) { - list_del(ex-list); - kfree(ex); + list_del_rcu(ex-list); + kfree_rcu(ex, rcu); } return -ENOMEM; } @@ -202,32 +212,80 @@ static int dev_exception_add(struct dev_ return rc; } -static void __dev_exception_clean(struct dev_cgroup *dev_cgroup) +static void dev_exception_clean(struct list_head *exceptions) { struct dev_exception_item *ex, *tmp; - list_for_each_entry_safe(ex, tmp, dev_cgroup-exceptions, list) { - list_del_rcu(ex-list); - kfree_rcu(ex, rcu); - } - list_for_each_entry_safe(ex, tmp, dev_cgroup-local.exceptions, -list) { + list_for_each_entry_safe(ex, tmp, exceptions, list) { list_del_rcu(ex-list); kfree_rcu(ex, rcu); } } +static void __dev_exception_clean_all(struct dev_cgroup *dev_cgroup) +{ + dev_exception_clean(dev_cgroup-exceptions); + dev_exception_clean(dev_cgroup-local.exceptions); +} + /** * dev_exception_clean - frees all entries of the exception list * @dev_cgroup: dev_cgroup with the exception list to be cleaned * * called under devcgroup_mutex */ -static void dev_exception_clean(struct dev_cgroup *dev_cgroup) +static void dev_exception_clean_all(struct dev_cgroup *dev_cgroup) { lockdep_assert_held(devcgroup_mutex); - __dev_exception_clean(dev_cgroup); + __dev_exception_clean_all(dev_cgroup); +} + +static inline bool is_devcg_online(const struct dev_cgroup *devcg) +{ + return (devcg-behavior != DEVCG_DEFAULT_NONE); +} + +/** + * devcg_for_each_child - traverse online children of a device cgroup + * @child_cs: loop cursor pointing to the current child + * @pos_cgrp: used for iteration + * @parent_cs: target device cgroup to walk children of + * + * Walk @child_cs through the online children of @parent_cs. Must be used + * with RCU read locked. + */ +#define devcg_for_each_child(pos_cgrp, root) \ + cgroup_for_each_child((pos_cgrp), (root)) \ + if
[PATCH v2 3/4] device_cgroup: make may_access() stronger
In order to revalidate local exceptions for the hierarchy change propagation, make may_access() stronger. Cc: Tejun Heo t...@kernel.org Cc: Serge Hallyn serge.hal...@canonical.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 48 +-- 1 file changed, 30 insertions(+), 18 deletions(-) --- github.orig/security/device_cgroup.c2013-01-24 10:40:46.384253615 -0500 +++ github/security/device_cgroup.c 2013-01-24 10:41:07.513567697 -0500 @@ -353,13 +353,15 @@ return 0; * won't have more privileges than its parent or to * verify if a certain access is allowed. * @dev_cgroup: dev cgroup to be tested against + * @behavior: behavior of the exception * @refex: new exception */ -static int may_access(struct dev_cgroup *dev_cgroup, - struct dev_exception_item *refex) +static bool may_access(struct dev_cgroup *dev_cgroup, + struct dev_exception_item *refex, + enum devcg_behavior behavior) { struct dev_exception_item *ex; - bool match = false; + int match = false; rcu_lockdep_assert(rcu_read_lock_held() || lockdep_is_held(devcgroup_mutex), @@ -380,18 +382,28 @@ if (ex-minor != ~0 ex-minor != re break; } - /* -* In two cases we'll consider this new exception valid: -* - the dev cgroup has its default policy to allow + exception list: -* the new exception should *not* match any of the exceptions -* (behavior == DEVCG_DEFAULT_ALLOW, !match) -* - the dev cgroup has its default policy to deny + exception list: -* the new exception *should* match the exceptions -* (behavior == DEVCG_DEFAULT_DENY, match) -*/ - if ((dev_cgroup-behavior == DEVCG_DEFAULT_DENY) == match) - return 1; - return 0; + if (dev_cgroup-behavior == DEVCG_DEFAULT_ALLOW) { + if (behavior == DEVCG_DEFAULT_ALLOW) { + /* the exception will deny access to certain devices */ + return true; + } else { + /* the exception will allow access to certain devices */ + if (match) + /* +* a new exception allowing access shouldn't +* match an parent's exception +*/ + return false; + return true; + } + } else { + /* only behavior == DEVCG_DEFAULT_DENY allowed here */ + if (match) + /* parent has an exception that matches the proposed */ + return true; + else + return false; + } } /* @@ -408,7 +420,7 @@ static int parent_has_perm(struct dev_cg if (!pcg) return 1; parent = cgroup_to_devcgroup(pcg); - return may_access(parent, ex); + return may_access(parent, ex, childcg-behavior); } /** @@ -442,7 +454,7 @@ static int devcgroup_update_access(struc { const char *b; char temp[12]; /* 11 + 1 characters needed for a u32 */ - int count, rc; + int count, rc = 0; struct dev_exception_item ex; struct cgroup *p = devcgroup-css.cgroup; struct dev_cgroup *parent = NULL; @@ -660,7 +672,7 @@ memset(ex, 0, sizeof(ex)); rcu_read_lock(); dev_cgroup = task_devcgroup(current); - rc = may_access(dev_cgroup, ex); + rc = may_access(dev_cgroup, ex, dev_cgroup-behavior); rcu_read_unlock(); if (!rc) -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v7 3/4] cgroup: add xattr support
From: Li Zefan This is one of the items in the plumber's wish list. For use cases: >> What would the use case be for this? > > Attaching meta information to services, in an easily discoverable > way. For example, in systemd we create one cgroup for each service, and > could then store data like the main pid of the specific service as an > xattr on the cgroup itself. That way we'd have almost all service state > in the cgroupfs, which would make it possible to terminate systemd and > later restart it without losing any state information. But there's more: > for example, some very peculiar services cannot be terminated on > shutdown (i.e. fakeraid DM stuff) and it would be really nice if the > services in question could just mark that on their cgroup, by setting an > xattr. On the more desktopy side of things there are other > possibilities: for example there are plans defining what an application > is along the lines of a cgroup (i.e. an app being a collection of > processes). With xattrs one could then attach an icon or human readable > program name on the cgroup. > > The key idea is that this would allow attaching runtime meta information > to cgroups and everything they model (services, apps, vms), that doesn't > need any complex userspace infrastructure, has good access control > (i.e. because the file system enforces that anyway, and there's the > "trusted." xattr namespace), notifications (inotify), and can easily be > shared among applications. > > Lennart v7: - no changes v6: - remove user xattr namespace, only allow trusted and security v5: - check for capabilities before setting/removing xattrs v4: - no changes v3: - instead of config option, use mount option to enable xattr support Cc: Li Zefan Cc: Tejun Heo Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Signed-off-by: Li Zefan Signed-off-by: Aristeu Rozanski --- include/linux/cgroup.h | 13 -- kernel/cgroup.c| 100 + 2 files changed, 103 insertions(+), 10 deletions(-) Index: github/include/linux/cgroup.h === --- github.orig/include/linux/cgroup.h 2012-08-23 15:46:12.465169082 -0400 +++ github/include/linux/cgroup.h 2012-08-23 15:48:42.485258910 -0400 @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef CONFIG_CGROUPS @@ -216,6 +217,9 @@ /* List of events which userspace want to receive */ struct list_head event_list; spinlock_t event_list_lock; + + /* directory xattrs */ + struct simple_xattrs xattrs; }; /* @@ -309,6 +313,9 @@ /* CFTYPE_* flags */ unsigned int flags; + /* file xattrs */ + struct simple_xattrs xattrs; + int (*open)(struct inode *inode, struct file *file); ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, struct file *file, @@ -394,7 +401,7 @@ */ struct cftype_set { struct list_headnode; /* chained at subsys->cftsets */ - const struct cftype *cfts; + struct cftype *cfts; }; struct cgroup_scanner { @@ -406,8 +413,8 @@ void *data; }; -int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); -int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); +int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); +int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_is_removed(const struct cgroup *cgrp); Index: github/kernel/cgroup.c === --- github.orig/kernel/cgroup.c 2012-08-23 15:48:33.577016061 -0400 +++ github/kernel/cgroup.c 2012-08-23 15:48:42.485258910 -0400 @@ -276,7 +276,8 @@ /* bits in struct cgroupfs_root flags field */ enum { - ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ + ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ + ROOT_XATTR, /* supports extended attributes */ }; static int cgroup_is_releasable(const struct cgroup *cgrp) @@ -913,15 +914,19 @@ */ BUG_ON(!list_empty(>pidlists)); + simple_xattrs_free(>xattrs); + kfree_rcu(cgrp, rcu_head); } else { struct cfent *cfe = __d_cfe(dentry); struct cgroup *cgrp = dentry->d_parent->d_fsdata; + struct cftype *cft = cfe->type; WARN_ONCE(!list_empty(>node) && cgrp != >root->top_cgroup, "cfe still linked for %s\n", cfe->type->name); kfree(cfe); + simple_xattrs_free(>xattrs); } iput(inode); } @@ -1140,6 +1145,8 @@ seq_printf(seq, ",%s", ss->name); if (test_bit(ROOT_NOPREFIX, >flags)) seq_puts(seq, ",noprefix"); + if
[PATCH v7 2/4] cgroup: revise how we re-populate root directory
From: Li Zefan When remounting cgroupfs with some subsystems added to it and some removed, cgroup will remove all the files in root directory and then re-popluate it. What I'm doing here is, only remove files which belong to subsystems that are to be unbinded, and only create files for newly-added subsystems. The purpose is to have all other files untouched. This is a preparation for cgroup xattr support. v7: - checkpatch warnings fixed v6: - no changes v5: - no changes v4: - refactored cgroup_clear_directory() to not use cgroup_rm_file() - instead of going thru the list of files, get the file list using the subsystems - use 'subsys_mask' instead of {added,removed}_bits and made cgroup_populate_dir() to match the parameters with cgroup_clear_directory() v3: - refresh patches after recent refactoring Cc: Li Zefan Cc: Tejun Heo Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Signed-off-by: Li Zefan Signed-off-by: Aristeu Rozanski --- kernel/cgroup.c | 61 1 file changed, 48 insertions(+), 13 deletions(-) Index: github/kernel/cgroup.c === --- github.orig/kernel/cgroup.c 2012-08-23 15:46:12.557171590 -0400 +++ github/kernel/cgroup.c 2012-08-23 15:48:33.577016061 -0400 @@ -824,7 +824,8 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); -static int cgroup_populate_dir(struct cgroup *cgrp); +static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, + unsigned long subsys_mask); static const struct inode_operations cgroup_dir_inode_operations; static const struct file_operations proc_cgroupstats_operations; @@ -963,12 +964,29 @@ return -ENOENT; } -static void cgroup_clear_directory(struct dentry *dir) +/** + * cgroup_clear_directory - selective removal of base and subsystem files + * @dir: directory containing the files + * @base_files: true if the base files should be removed + * @subsys_mask: mask of the subsystem ids whose files should be removed + */ +static void cgroup_clear_directory(struct dentry *dir, bool base_files, + unsigned long subsys_mask) { struct cgroup *cgrp = __d_cgrp(dir); + struct cgroup_subsys *ss; - while (!list_empty(>files)) - cgroup_rm_file(cgrp, NULL); + for_each_subsys(cgrp->root, ss) { + struct cftype_set *set; + if (!test_bit(ss->subsys_id, _mask)) + continue; + list_for_each_entry(set, >cftsets, node) + cgroup_rm_file(cgrp, set->cfts); + } + if (base_files) { + while (!list_empty(>files)) + cgroup_rm_file(cgrp, NULL); + } } /* @@ -977,8 +995,9 @@ static void cgroup_d_remove_dir(struct dentry *dentry) { struct dentry *parent; + struct cgroupfs_root *root = dentry->d_sb->s_fs_info; - cgroup_clear_directory(dentry); + cgroup_clear_directory(dentry, true, root->subsys_bits); parent = dentry->d_parent; spin_lock(>d_lock); @@ -1339,6 +1358,7 @@ struct cgroupfs_root *root = sb->s_fs_info; struct cgroup *cgrp = >top_cgroup; struct cgroup_sb_opts opts; + unsigned long added_bits, removed_bits; mutex_lock(>dentry->d_inode->i_mutex); mutex_lock(_mutex); @@ -1354,6 +1374,9 @@ pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", task_tgid_nr(current), current->comm); + added_bits = opts.subsys_bits & ~root->subsys_bits; + removed_bits = root->subsys_bits & ~opts.subsys_bits; + /* Don't allow flags or name to change at remount */ if (opts.flags != root->flags || (opts.name && strcmp(opts.name, root->name))) { @@ -1369,8 +1392,9 @@ } /* clear out any existing files and repopulate subsystem files */ - cgroup_clear_directory(cgrp->dentry); - cgroup_populate_dir(cgrp); + cgroup_clear_directory(cgrp->dentry, false, removed_bits); + /* re-populate subsystem files */ + cgroup_populate_dir(cgrp, false, added_bits); if (opts.release_agent) strcpy(root->release_agent_path, opts.release_agent); @@ -1669,7 +1693,7 @@ BUG_ON(root->number_of_cgroups != 1); cred = override_creds(_cred); - cgroup_populate_dir(root_cgrp); + cgroup_populate_dir(root_cgrp, true, root->subsys_bits); revert_creds(cred); mutex_unlock(_root_mutex); mutex_unlock(_mutex); @@ -3843,18 +3867,29 @@ { } /* terminate */ };
[PATCH v7 1/4] xattr: extract simple_xattr code from tmpfs
From: Li Zefan Extract in-memory xattr APIs from tmpfs. Will be used by cgroup. $ size vmlinux.o textdata bss dec hex filename 4658782 880729 5195032 10734543 a3cbcf vmlinux.o $ size vmlinux.o textdata bss dec hex filename 4658957 880729 5195032 10734718 a3cc7e vmlinux.o v7: - checkpatch warnings fixed - Implement the changes requested by Hugh Dickins: - make simple_xattrs_init and simple_xattrs_free inline - get rid of locking and list reinitialization in simple_xattrs_free, they're not needed v6: - no changes v5: - no changes v4: - move simple_xattrs_free() to fs/xattr.c v3: - in kmem_xattrs_free(), reinitialize the list - use simple_xattr_* prefix - introduce simple_xattr_add() to prevent direct list usage Cc: Li Zefan Cc: Tejun Heo Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Signed-off-by: Li Zefan Signed-off-by: Aristeu Rozanski --- fs/xattr.c | 167 + include/linux/shmem_fs.h |3 include/linux/xattr.h| 48 + mm/shmem.c | 171 +++ 4 files changed, 230 insertions(+), 159 deletions(-) Index: github/fs/xattr.c === --- github.orig/fs/xattr.c 2012-08-23 15:43:44.305129707 -0400 +++ github/fs/xattr.c 2012-08-23 15:43:45.477161661 -0400 @@ -791,3 +791,170 @@ EXPORT_SYMBOL(generic_listxattr); EXPORT_SYMBOL(generic_setxattr); EXPORT_SYMBOL(generic_removexattr); + +/* + * Allocate new xattr and copy in the value; but leave the name to callers. + */ +struct simple_xattr *simple_xattr_alloc(const void *value, size_t size) +{ + struct simple_xattr *new_xattr; + size_t len; + + /* wrap around? */ + len = sizeof(*new_xattr) + size; + if (len <= sizeof(*new_xattr)) + return NULL; + + new_xattr = kmalloc(len, GFP_KERNEL); + if (!new_xattr) + return NULL; + + new_xattr->size = size; + memcpy(new_xattr->value, value, size); + return new_xattr; +} + +/* + * xattr GET operation for in-memory/pseudo filesystems + */ +int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, +void *buffer, size_t size) +{ + struct simple_xattr *xattr; + int ret = -ENODATA; + + spin_lock(>lock); + list_for_each_entry(xattr, >head, list) { + if (strcmp(name, xattr->name)) + continue; + + ret = xattr->size; + if (buffer) { + if (size < xattr->size) + ret = -ERANGE; + else + memcpy(buffer, xattr->value, xattr->size); + } + break; + } + spin_unlock(>lock); + return ret; +} + +static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name, + const void *value, size_t size, int flags) +{ + struct simple_xattr *xattr; + struct simple_xattr *new_xattr = NULL; + int err = 0; + + /* value == NULL means remove */ + if (value) { + new_xattr = simple_xattr_alloc(value, size); + if (!new_xattr) + return -ENOMEM; + + new_xattr->name = kstrdup(name, GFP_KERNEL); + if (!new_xattr->name) { + kfree(new_xattr); + return -ENOMEM; + } + } + + spin_lock(>lock); + list_for_each_entry(xattr, >head, list) { + if (!strcmp(name, xattr->name)) { + if (flags & XATTR_CREATE) { + xattr = new_xattr; + err = -EEXIST; + } else if (new_xattr) { + list_replace(>list, _xattr->list); + } else { + list_del(>list); + } + goto out; + } + } + if (flags & XATTR_REPLACE) { + xattr = new_xattr; + err = -ENODATA; + } else { + list_add(_xattr->list, >head); + xattr = NULL; + } +out: + spin_unlock(>lock); + if (xattr) { + kfree(xattr->name); + kfree(xattr); + } + return err; + +} + +/* + * xattr SET operation for in-memory/pseudo filesystems + */ +int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, +const void *value, size_t size, int flags) +{ + if (size == 0) + value = ""; /* empty EA, do not remove */ + return __simple_xattr_set(xattrs, name, value, size, flags); +} + +/* + * xattr REMOVE operation for in-memory/pseudo filesystems + */ +int
[PATCH v7 4/4] cgroup: rename subsys_bits to subsys_mask
In a previous discussion, Tejun Heo suggested to rename references to subsys_bits (added_bits, removed_bits, etc) by something more meaningful. Cc: Li Zefan Cc: Tejun Heo Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Signed-off-by: Aristeu Rozanski --- kernel/cgroup.c | 84 1 file changed, 42 insertions(+), 42 deletions(-) Index: github/kernel/cgroup.c === --- github.orig/kernel/cgroup.c 2012-08-16 11:33:43.276337899 -0400 +++ github/kernel/cgroup.c 2012-08-16 11:33:43.292338339 -0400 @@ -111,13 +111,13 @@ * The bitmask of subsystems intended to be attached to this * hierarchy */ - unsigned long subsys_bits; + unsigned long subsys_mask; /* Unique id for this hierarchy. */ int hierarchy_id; /* The bitmask of subsystems currently attached to this hierarchy */ - unsigned long actual_subsys_bits; + unsigned long actual_subsys_mask; /* A list running through the attached subsystems */ struct list_head subsys_list; @@ -557,7 +557,7 @@ * won't change, so no need for locking. */ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - if (root->subsys_bits & (1UL << i)) { + if (root->subsys_mask & (1UL << i)) { /* Subsystem is in this hierarchy. So we want * the subsystem state from the new * cgroup */ @@ -1002,7 +1002,7 @@ struct dentry *parent; struct cgroupfs_root *root = dentry->d_sb->s_fs_info; - cgroup_clear_directory(dentry, true, root->subsys_bits); + cgroup_clear_directory(dentry, true, root->subsys_mask); parent = dentry->d_parent; spin_lock(>d_lock); @@ -1046,22 +1046,22 @@ * returns an error, no reference counts are touched. */ static int rebind_subsystems(struct cgroupfs_root *root, - unsigned long final_bits) + unsigned long final_subsys_mask) { - unsigned long added_bits, removed_bits; + unsigned long added_mask, removed_mask; struct cgroup *cgrp = >top_cgroup; int i; BUG_ON(!mutex_is_locked(_mutex)); BUG_ON(!mutex_is_locked(_root_mutex)); - removed_bits = root->actual_subsys_bits & ~final_bits; - added_bits = final_bits & ~root->actual_subsys_bits; + removed_mask = root->actual_subsys_mask & ~final_subsys_mask; + added_mask = final_subsys_mask & ~root->actual_subsys_mask; /* Check that any added subsystems are currently free */ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { unsigned long bit = 1UL << i; struct cgroup_subsys *ss = subsys[i]; - if (!(bit & added_bits)) + if (!(bit & added_mask)) continue; /* * Nobody should tell us to do a subsys that doesn't exist: @@ -1086,7 +1086,7 @@ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; unsigned long bit = 1UL << i; - if (bit & added_bits) { + if (bit & added_mask) { /* We're binding this subsystem to this hierarchy */ BUG_ON(ss == NULL); BUG_ON(cgrp->subsys[i]); @@ -1099,7 +1099,7 @@ if (ss->bind) ss->bind(cgrp); /* refcount was already taken, and we're keeping it */ - } else if (bit & removed_bits) { + } else if (bit & removed_mask) { /* We're removing this subsystem */ BUG_ON(ss == NULL); BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); @@ -1112,7 +1112,7 @@ list_move(>sibling, _list); /* subsystem is now free - drop reference on module */ module_put(ss->module); - } else if (bit & final_bits) { + } else if (bit & final_subsys_mask) { /* Subsystem state should already exist */ BUG_ON(ss == NULL); BUG_ON(!cgrp->subsys[i]); @@ -1129,7 +1129,7 @@ BUG_ON(cgrp->subsys[i]); } } - root->subsys_bits = root->actual_subsys_bits = final_bits; + root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; synchronize_rcu(); return 0; @@ -1158,7 +1158,7 @@ } struct cgroup_sb_opts { - unsigned long subsys_bits; + unsigned long subsys_mask; unsigned long flags; char *release_agent; bool clone_children; @@ -1267,7 +1267,7 @@ /* Mutually
[PATCH v7 0/4] cgroup: add xattr support
This series are a refreshed version of a patchset submitted by Li Zefan back in march: https://lkml.org/lkml/2012/3/1/13 With Li's permission, I refreshed the patches to apply over the latest upstream and added the modifications suggested by others in the thread: - using a mount option instead of config option to enable the xattr support - reinitialize the list in kmem_xattrs_free() - renamed functions to simple_xattr_*() There're two users for this patchset: - SELinux: to be able to control access to cgroupfs inside containers - systemd: to store meta information such as main PID in a service cgroup, set specific services special options in the cgroup. While the xattrs will use kernel memory like tmpfs, they're restricted to 'security' (which controls the format of the value) and 'trusted' (which requires CAP_SYS_ADMIN). If kernel memory usage is still a concern, we're not far from having memcg account for kernel memory. v7: - fix checkpatch.pl warnings - Implement the changes requested by Hugh Dickins: - make simple_xattrs_init and simple_xattrs_free inline - get rid of locking and list reinitialization in simple_xattrs_free, they're not needed v6: - only allow trusted and security - replace subsys_bits by something more meaningful v5: - check for permissions for user xattr namespace v4: - implemented requested changes by Tejun Heo in patch #2 Cc: Li Zefan Cc: Tejun Heo Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Signed-off-by: Li Zefan Signed-off-by: Aristeu Rozanski -- Aristeu -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v7 0/4] cgroup: add xattr support
This series are a refreshed version of a patchset submitted by Li Zefan back in march: https://lkml.org/lkml/2012/3/1/13 With Li's permission, I refreshed the patches to apply over the latest upstream and added the modifications suggested by others in the thread: - using a mount option instead of config option to enable the xattr support - reinitialize the list in kmem_xattrs_free() - renamed functions to simple_xattr_*() There're two users for this patchset: - SELinux: to be able to control access to cgroupfs inside containers - systemd: to store meta information such as main PID in a service cgroup, set specific services special options in the cgroup. While the xattrs will use kernel memory like tmpfs, they're restricted to 'security' (which controls the format of the value) and 'trusted' (which requires CAP_SYS_ADMIN). If kernel memory usage is still a concern, we're not far from having memcg account for kernel memory. v7: - fix checkpatch.pl warnings - Implement the changes requested by Hugh Dickins: - make simple_xattrs_init and simple_xattrs_free inline - get rid of locking and list reinitialization in simple_xattrs_free, they're not needed v6: - only allow trusted and security - replace subsys_bits by something more meaningful v5: - check for permissions for user xattr namespace v4: - implemented requested changes by Tejun Heo in patch #2 Cc: Li Zefan lize...@huawei.com Cc: Tejun Heo t...@kernel.org Cc: Hugh Dickins hu...@google.com Cc: Hillf Danton dhi...@gmail.com Cc: Lennart Poettering lpoet...@redhat.com Signed-off-by: Li Zefan lize...@huawei.com Signed-off-by: Aristeu Rozanski a...@redhat.com -- Aristeu -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v7 4/4] cgroup: rename subsys_bits to subsys_mask
In a previous discussion, Tejun Heo suggested to rename references to subsys_bits (added_bits, removed_bits, etc) by something more meaningful. Cc: Li Zefan lize...@huawei.com Cc: Tejun Heo t...@kernel.org Cc: Hugh Dickins hu...@google.com Cc: Hillf Danton dhi...@gmail.com Cc: Lennart Poettering lpoet...@redhat.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- kernel/cgroup.c | 84 1 file changed, 42 insertions(+), 42 deletions(-) Index: github/kernel/cgroup.c === --- github.orig/kernel/cgroup.c 2012-08-16 11:33:43.276337899 -0400 +++ github/kernel/cgroup.c 2012-08-16 11:33:43.292338339 -0400 @@ -111,13 +111,13 @@ * The bitmask of subsystems intended to be attached to this * hierarchy */ - unsigned long subsys_bits; + unsigned long subsys_mask; /* Unique id for this hierarchy. */ int hierarchy_id; /* The bitmask of subsystems currently attached to this hierarchy */ - unsigned long actual_subsys_bits; + unsigned long actual_subsys_mask; /* A list running through the attached subsystems */ struct list_head subsys_list; @@ -557,7 +557,7 @@ * won't change, so no need for locking. */ for (i = 0; i CGROUP_SUBSYS_COUNT; i++) { - if (root-subsys_bits (1UL i)) { + if (root-subsys_mask (1UL i)) { /* Subsystem is in this hierarchy. So we want * the subsystem state from the new * cgroup */ @@ -1002,7 +1002,7 @@ struct dentry *parent; struct cgroupfs_root *root = dentry-d_sb-s_fs_info; - cgroup_clear_directory(dentry, true, root-subsys_bits); + cgroup_clear_directory(dentry, true, root-subsys_mask); parent = dentry-d_parent; spin_lock(parent-d_lock); @@ -1046,22 +1046,22 @@ * returns an error, no reference counts are touched. */ static int rebind_subsystems(struct cgroupfs_root *root, - unsigned long final_bits) + unsigned long final_subsys_mask) { - unsigned long added_bits, removed_bits; + unsigned long added_mask, removed_mask; struct cgroup *cgrp = root-top_cgroup; int i; BUG_ON(!mutex_is_locked(cgroup_mutex)); BUG_ON(!mutex_is_locked(cgroup_root_mutex)); - removed_bits = root-actual_subsys_bits ~final_bits; - added_bits = final_bits ~root-actual_subsys_bits; + removed_mask = root-actual_subsys_mask ~final_subsys_mask; + added_mask = final_subsys_mask ~root-actual_subsys_mask; /* Check that any added subsystems are currently free */ for (i = 0; i CGROUP_SUBSYS_COUNT; i++) { unsigned long bit = 1UL i; struct cgroup_subsys *ss = subsys[i]; - if (!(bit added_bits)) + if (!(bit added_mask)) continue; /* * Nobody should tell us to do a subsys that doesn't exist: @@ -1086,7 +1086,7 @@ for (i = 0; i CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; unsigned long bit = 1UL i; - if (bit added_bits) { + if (bit added_mask) { /* We're binding this subsystem to this hierarchy */ BUG_ON(ss == NULL); BUG_ON(cgrp-subsys[i]); @@ -1099,7 +1099,7 @@ if (ss-bind) ss-bind(cgrp); /* refcount was already taken, and we're keeping it */ - } else if (bit removed_bits) { + } else if (bit removed_mask) { /* We're removing this subsystem */ BUG_ON(ss == NULL); BUG_ON(cgrp-subsys[i] != dummytop-subsys[i]); @@ -1112,7 +1112,7 @@ list_move(ss-sibling, rootnode.subsys_list); /* subsystem is now free - drop reference on module */ module_put(ss-module); - } else if (bit final_bits) { + } else if (bit final_subsys_mask) { /* Subsystem state should already exist */ BUG_ON(ss == NULL); BUG_ON(!cgrp-subsys[i]); @@ -1129,7 +1129,7 @@ BUG_ON(cgrp-subsys[i]); } } - root-subsys_bits = root-actual_subsys_bits = final_bits; + root-subsys_mask = root-actual_subsys_mask = final_subsys_mask; synchronize_rcu(); return 0; @@ -1158,7 +1158,7 @@ } struct cgroup_sb_opts { - unsigned long subsys_bits; + unsigned long subsys_mask; unsigned long flags; char *release_agent;
[PATCH v7 1/4] xattr: extract simple_xattr code from tmpfs
From: Li Zefan lize...@huawei.com Extract in-memory xattr APIs from tmpfs. Will be used by cgroup. $ size vmlinux.o textdata bss dec hex filename 4658782 880729 5195032 10734543 a3cbcf vmlinux.o $ size vmlinux.o textdata bss dec hex filename 4658957 880729 5195032 10734718 a3cc7e vmlinux.o v7: - checkpatch warnings fixed - Implement the changes requested by Hugh Dickins: - make simple_xattrs_init and simple_xattrs_free inline - get rid of locking and list reinitialization in simple_xattrs_free, they're not needed v6: - no changes v5: - no changes v4: - move simple_xattrs_free() to fs/xattr.c v3: - in kmem_xattrs_free(), reinitialize the list - use simple_xattr_* prefix - introduce simple_xattr_add() to prevent direct list usage Cc: Li Zefan lize...@huawei.com Cc: Tejun Heo t...@kernel.org Cc: Hugh Dickins hu...@google.com Cc: Hillf Danton dhi...@gmail.com Cc: Lennart Poettering lpoet...@redhat.com Signed-off-by: Li Zefan lize...@huawei.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- fs/xattr.c | 167 + include/linux/shmem_fs.h |3 include/linux/xattr.h| 48 + mm/shmem.c | 171 +++ 4 files changed, 230 insertions(+), 159 deletions(-) Index: github/fs/xattr.c === --- github.orig/fs/xattr.c 2012-08-23 15:43:44.305129707 -0400 +++ github/fs/xattr.c 2012-08-23 15:43:45.477161661 -0400 @@ -791,3 +791,170 @@ EXPORT_SYMBOL(generic_listxattr); EXPORT_SYMBOL(generic_setxattr); EXPORT_SYMBOL(generic_removexattr); + +/* + * Allocate new xattr and copy in the value; but leave the name to callers. + */ +struct simple_xattr *simple_xattr_alloc(const void *value, size_t size) +{ + struct simple_xattr *new_xattr; + size_t len; + + /* wrap around? */ + len = sizeof(*new_xattr) + size; + if (len = sizeof(*new_xattr)) + return NULL; + + new_xattr = kmalloc(len, GFP_KERNEL); + if (!new_xattr) + return NULL; + + new_xattr-size = size; + memcpy(new_xattr-value, value, size); + return new_xattr; +} + +/* + * xattr GET operation for in-memory/pseudo filesystems + */ +int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, +void *buffer, size_t size) +{ + struct simple_xattr *xattr; + int ret = -ENODATA; + + spin_lock(xattrs-lock); + list_for_each_entry(xattr, xattrs-head, list) { + if (strcmp(name, xattr-name)) + continue; + + ret = xattr-size; + if (buffer) { + if (size xattr-size) + ret = -ERANGE; + else + memcpy(buffer, xattr-value, xattr-size); + } + break; + } + spin_unlock(xattrs-lock); + return ret; +} + +static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name, + const void *value, size_t size, int flags) +{ + struct simple_xattr *xattr; + struct simple_xattr *new_xattr = NULL; + int err = 0; + + /* value == NULL means remove */ + if (value) { + new_xattr = simple_xattr_alloc(value, size); + if (!new_xattr) + return -ENOMEM; + + new_xattr-name = kstrdup(name, GFP_KERNEL); + if (!new_xattr-name) { + kfree(new_xattr); + return -ENOMEM; + } + } + + spin_lock(xattrs-lock); + list_for_each_entry(xattr, xattrs-head, list) { + if (!strcmp(name, xattr-name)) { + if (flags XATTR_CREATE) { + xattr = new_xattr; + err = -EEXIST; + } else if (new_xattr) { + list_replace(xattr-list, new_xattr-list); + } else { + list_del(xattr-list); + } + goto out; + } + } + if (flags XATTR_REPLACE) { + xattr = new_xattr; + err = -ENODATA; + } else { + list_add(new_xattr-list, xattrs-head); + xattr = NULL; + } +out: + spin_unlock(xattrs-lock); + if (xattr) { + kfree(xattr-name); + kfree(xattr); + } + return err; + +} + +/* + * xattr SET operation for in-memory/pseudo filesystems + */ +int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, +const void *value, size_t size, int flags) +{ + if (size == 0) + value = ; /* empty EA,
[PATCH v7 2/4] cgroup: revise how we re-populate root directory
From: Li Zefan lize...@huawei.com When remounting cgroupfs with some subsystems added to it and some removed, cgroup will remove all the files in root directory and then re-popluate it. What I'm doing here is, only remove files which belong to subsystems that are to be unbinded, and only create files for newly-added subsystems. The purpose is to have all other files untouched. This is a preparation for cgroup xattr support. v7: - checkpatch warnings fixed v6: - no changes v5: - no changes v4: - refactored cgroup_clear_directory() to not use cgroup_rm_file() - instead of going thru the list of files, get the file list using the subsystems - use 'subsys_mask' instead of {added,removed}_bits and made cgroup_populate_dir() to match the parameters with cgroup_clear_directory() v3: - refresh patches after recent refactoring Cc: Li Zefan lize...@huawei.com Cc: Tejun Heo t...@kernel.org Cc: Hugh Dickins hu...@google.com Cc: Hillf Danton dhi...@gmail.com Cc: Lennart Poettering lpoet...@redhat.com Signed-off-by: Li Zefan lize...@huawei.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- kernel/cgroup.c | 61 1 file changed, 48 insertions(+), 13 deletions(-) Index: github/kernel/cgroup.c === --- github.orig/kernel/cgroup.c 2012-08-23 15:46:12.557171590 -0400 +++ github/kernel/cgroup.c 2012-08-23 15:48:33.577016061 -0400 @@ -824,7 +824,8 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); -static int cgroup_populate_dir(struct cgroup *cgrp); +static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, + unsigned long subsys_mask); static const struct inode_operations cgroup_dir_inode_operations; static const struct file_operations proc_cgroupstats_operations; @@ -963,12 +964,29 @@ return -ENOENT; } -static void cgroup_clear_directory(struct dentry *dir) +/** + * cgroup_clear_directory - selective removal of base and subsystem files + * @dir: directory containing the files + * @base_files: true if the base files should be removed + * @subsys_mask: mask of the subsystem ids whose files should be removed + */ +static void cgroup_clear_directory(struct dentry *dir, bool base_files, + unsigned long subsys_mask) { struct cgroup *cgrp = __d_cgrp(dir); + struct cgroup_subsys *ss; - while (!list_empty(cgrp-files)) - cgroup_rm_file(cgrp, NULL); + for_each_subsys(cgrp-root, ss) { + struct cftype_set *set; + if (!test_bit(ss-subsys_id, subsys_mask)) + continue; + list_for_each_entry(set, ss-cftsets, node) + cgroup_rm_file(cgrp, set-cfts); + } + if (base_files) { + while (!list_empty(cgrp-files)) + cgroup_rm_file(cgrp, NULL); + } } /* @@ -977,8 +995,9 @@ static void cgroup_d_remove_dir(struct dentry *dentry) { struct dentry *parent; + struct cgroupfs_root *root = dentry-d_sb-s_fs_info; - cgroup_clear_directory(dentry); + cgroup_clear_directory(dentry, true, root-subsys_bits); parent = dentry-d_parent; spin_lock(parent-d_lock); @@ -1339,6 +1358,7 @@ struct cgroupfs_root *root = sb-s_fs_info; struct cgroup *cgrp = root-top_cgroup; struct cgroup_sb_opts opts; + unsigned long added_bits, removed_bits; mutex_lock(cgrp-dentry-d_inode-i_mutex); mutex_lock(cgroup_mutex); @@ -1354,6 +1374,9 @@ pr_warning(cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n, task_tgid_nr(current), current-comm); + added_bits = opts.subsys_bits ~root-subsys_bits; + removed_bits = root-subsys_bits ~opts.subsys_bits; + /* Don't allow flags or name to change at remount */ if (opts.flags != root-flags || (opts.name strcmp(opts.name, root-name))) { @@ -1369,8 +1392,9 @@ } /* clear out any existing files and repopulate subsystem files */ - cgroup_clear_directory(cgrp-dentry); - cgroup_populate_dir(cgrp); + cgroup_clear_directory(cgrp-dentry, false, removed_bits); + /* re-populate subsystem files */ + cgroup_populate_dir(cgrp, false, added_bits); if (opts.release_agent) strcpy(root-release_agent_path, opts.release_agent); @@ -1669,7 +1693,7 @@ BUG_ON(root-number_of_cgroups != 1); cred = override_creds(init_cred); - cgroup_populate_dir(root_cgrp); + cgroup_populate_dir(root_cgrp, true, root-subsys_bits);
[PATCH v7 3/4] cgroup: add xattr support
From: Li Zefan lize...@huawei.com This is one of the items in the plumber's wish list. For use cases: What would the use case be for this? Attaching meta information to services, in an easily discoverable way. For example, in systemd we create one cgroup for each service, and could then store data like the main pid of the specific service as an xattr on the cgroup itself. That way we'd have almost all service state in the cgroupfs, which would make it possible to terminate systemd and later restart it without losing any state information. But there's more: for example, some very peculiar services cannot be terminated on shutdown (i.e. fakeraid DM stuff) and it would be really nice if the services in question could just mark that on their cgroup, by setting an xattr. On the more desktopy side of things there are other possibilities: for example there are plans defining what an application is along the lines of a cgroup (i.e. an app being a collection of processes). With xattrs one could then attach an icon or human readable program name on the cgroup. The key idea is that this would allow attaching runtime meta information to cgroups and everything they model (services, apps, vms), that doesn't need any complex userspace infrastructure, has good access control (i.e. because the file system enforces that anyway, and there's the trusted. xattr namespace), notifications (inotify), and can easily be shared among applications. Lennart v7: - no changes v6: - remove user xattr namespace, only allow trusted and security v5: - check for capabilities before setting/removing xattrs v4: - no changes v3: - instead of config option, use mount option to enable xattr support Cc: Li Zefan lize...@huawei.com Cc: Tejun Heo t...@kernel.org Cc: Hugh Dickins hu...@google.com Cc: Hillf Danton dhi...@gmail.com Cc: Lennart Poettering lpoet...@redhat.com Signed-off-by: Li Zefan lize...@huawei.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- include/linux/cgroup.h | 13 -- kernel/cgroup.c| 100 + 2 files changed, 103 insertions(+), 10 deletions(-) Index: github/include/linux/cgroup.h === --- github.orig/include/linux/cgroup.h 2012-08-23 15:46:12.465169082 -0400 +++ github/include/linux/cgroup.h 2012-08-23 15:48:42.485258910 -0400 @@ -17,6 +17,7 @@ #include linux/rwsem.h #include linux/idr.h #include linux/workqueue.h +#include linux/xattr.h #ifdef CONFIG_CGROUPS @@ -216,6 +217,9 @@ /* List of events which userspace want to receive */ struct list_head event_list; spinlock_t event_list_lock; + + /* directory xattrs */ + struct simple_xattrs xattrs; }; /* @@ -309,6 +313,9 @@ /* CFTYPE_* flags */ unsigned int flags; + /* file xattrs */ + struct simple_xattrs xattrs; + int (*open)(struct inode *inode, struct file *file); ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, struct file *file, @@ -394,7 +401,7 @@ */ struct cftype_set { struct list_headnode; /* chained at subsys-cftsets */ - const struct cftype *cfts; + struct cftype *cfts; }; struct cgroup_scanner { @@ -406,8 +413,8 @@ void *data; }; -int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); -int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); +int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); +int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_is_removed(const struct cgroup *cgrp); Index: github/kernel/cgroup.c === --- github.orig/kernel/cgroup.c 2012-08-23 15:48:33.577016061 -0400 +++ github/kernel/cgroup.c 2012-08-23 15:48:42.485258910 -0400 @@ -276,7 +276,8 @@ /* bits in struct cgroupfs_root flags field */ enum { - ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ + ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ + ROOT_XATTR, /* supports extended attributes */ }; static int cgroup_is_releasable(const struct cgroup *cgrp) @@ -913,15 +914,19 @@ */ BUG_ON(!list_empty(cgrp-pidlists)); + simple_xattrs_free(cgrp-xattrs); + kfree_rcu(cgrp, rcu_head); } else { struct cfent *cfe = __d_cfe(dentry); struct cgroup *cgrp = dentry-d_parent-d_fsdata; + struct cftype *cft = cfe-type; WARN_ONCE(!list_empty(cfe-node) cgrp != cgrp-root-top_cgroup, cfe still linked for %s\n, cfe-type-name); kfree(cfe); + simple_xattrs_free(cft-xattrs); } iput(inode); } @@
[PATCH v6 1/4] xattr: extract simple_xattr code from tmpfs
From: Li Zefan Extract in-memory xattr APIs from tmpfs. Will be used by cgroup. $ size vmlinux.o textdata bss dec hex filename 4658782 880729 5195032 10734543 a3cbcf vmlinux.o $ size vmlinux.o textdata bss dec hex filename 4658957 880729 5195032 10734718 a3cc7e vmlinux.o v6: - no changes v5: - no changes v4: - move simple_xattrs_free() to fs/xattr.c v3: - in kmem_xattrs_free(), reinitialize the list - use simple_xattr_* prefix - introduce simple_xattr_add() to prevent direct list usage Cc: Li Zefan Cc: Tejun Heo Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Signed-off-by: Li Zefan Signed-off-by: Aristeu Rozanski --- fs/xattr.c | 200 +++ include/linux/shmem_fs.h |3 include/linux/xattr.h| 25 + mm/shmem.c | 171 +++- 4 files changed, 240 insertions(+), 159 deletions(-) Index: github/fs/xattr.c === --- github.orig/fs/xattr.c 2012-08-16 11:28:12.719273435 -0400 +++ github/fs/xattr.c 2012-08-16 11:28:13.975307743 -0400 @@ -791,3 +791,203 @@ EXPORT_SYMBOL(generic_listxattr); EXPORT_SYMBOL(generic_setxattr); EXPORT_SYMBOL(generic_removexattr); + +/* + * initialize the simple_xattrs structure + */ +void simple_xattrs_init(struct simple_xattrs *xattrs) +{ + INIT_LIST_HEAD(>head); + spin_lock_init(>lock); +} + +/* + * Allocate new xattr and copy in the value; but leave the name to callers. + */ +struct simple_xattr *simple_xattr_alloc(const void *value, size_t size) +{ + struct simple_xattr *new_xattr; + size_t len; + + /* wrap around? */ + len = sizeof(*new_xattr) + size; + if (len <= sizeof(*new_xattr)) + return NULL; + + new_xattr = kmalloc(len, GFP_KERNEL); + if (!new_xattr) + return NULL; + + new_xattr->size = size; + memcpy(new_xattr->value, value, size); + return new_xattr; +} + +/* + * free all the xattrs + */ +void simple_xattrs_free(struct simple_xattrs *xattrs) +{ + struct simple_xattr *xattr, *node; + + spin_lock(>lock); + list_for_each_entry_safe(xattr, node, >head, list) { + kfree(xattr->name); + kfree(xattr); + } + INIT_LIST_HEAD(>head); + spin_unlock(>lock); +} + +/* + * xattr GET operation for in-memory/pseudo filesystems + */ +int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, +void *buffer, size_t size) +{ + struct simple_xattr *xattr; + int ret = -ENODATA; + + spin_lock(>lock); + list_for_each_entry(xattr, >head, list) { + if (strcmp(name, xattr->name)) + continue; + + ret = xattr->size; + if (buffer) { + if (size < xattr->size) + ret = -ERANGE; + else + memcpy(buffer, xattr->value, xattr->size); + } + break; + } + spin_unlock(>lock); + return ret; +} + +static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name, + const void *value, size_t size, int flags) +{ + struct simple_xattr *xattr; + struct simple_xattr *new_xattr = NULL; + size_t len; + int err = 0; + + /* value == NULL means remove */ + if (value) { + /* wrap around? */ + len = sizeof(*new_xattr) + size; + if (len <= sizeof(*new_xattr)) + return -ENOMEM; + + new_xattr = kmalloc(len, GFP_KERNEL); + if (!new_xattr) + return -ENOMEM; + + new_xattr->name = kstrdup(name, GFP_KERNEL); + if (!new_xattr->name) { + kfree(new_xattr); + return -ENOMEM; + } + + new_xattr->size = size; + memcpy(new_xattr->value, value, size); + } + + spin_lock(>lock); + list_for_each_entry(xattr, >head, list) { + if (!strcmp(name, xattr->name)) { + if (flags & XATTR_CREATE) { + xattr = new_xattr; + err = -EEXIST; + } else if (new_xattr) { + list_replace(>list, _xattr->list); + } else { + list_del(>list); + } + goto out; + } + } + if (flags & XATTR_REPLACE) { + xattr = new_xattr; + err = -ENODATA; + } else { + list_add(_xattr->list, >head); + xattr = NULL; + } +out: + spin_unlock(>lock); + if
[PATCH v6 2/4] cgroup: revise how we re-populate root directory
From: Li Zefan When remounting cgroupfs with some subsystems added to it and some removed, cgroup will remove all the files in root directory and then re-popluate it. What I'm doing here is, only remove files which belong to subsystems that are to be unbinded, and only create files for newly-added subsystems. The purpose is to have all other files untouched. This is a preparation for cgroup xattr support. v6: - no changes v5: - no changes v4: - refactored cgroup_clear_directory() to not use cgroup_rm_file() - instead of going thru the list of files, get the file list using the subsystems - use 'subsys_mask' instead of {added,removed}_bits and made cgroup_populate_dir() to match the parameters with cgroup_clear_directory() v3: - refresh patches after recent refactoring Cc: Li Zefan Cc: Tejun Heo Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Signed-off-by: Li Zefan Signed-off-by: Aristeu Rozanski --- kernel/cgroup.c | 61 1 file changed, 48 insertions(+), 13 deletions(-) Index: github/kernel/cgroup.c === --- github.orig/kernel/cgroup.c 2012-08-16 11:28:12.683272452 -0400 +++ github/kernel/cgroup.c 2012-08-16 11:28:42.592089681 -0400 @@ -824,7 +824,8 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); -static int cgroup_populate_dir(struct cgroup *cgrp); +static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, + unsigned long subsys_mask); static const struct inode_operations cgroup_dir_inode_operations; static const struct file_operations proc_cgroupstats_operations; @@ -963,12 +964,29 @@ return -ENOENT; } -static void cgroup_clear_directory(struct dentry *dir) +/** + * cgroup_clear_directory - selective removal of base and subsystem files + * @dir: directory containing the files + * @base_files: true if the base files should be removed + * @subsys_mask: mask of the subsystem ids whose files should be removed + */ +static void cgroup_clear_directory(struct dentry *dir, bool base_files, + unsigned long subsys_mask) { struct cgroup *cgrp = __d_cgrp(dir); + struct cgroup_subsys *ss; - while (!list_empty(>files)) - cgroup_rm_file(cgrp, NULL); + for_each_subsys(cgrp->root, ss) { + struct cftype_set *set; + if (!test_bit(ss->subsys_id, _mask)) + continue; + list_for_each_entry(set, >cftsets, node) + cgroup_rm_file(cgrp, set->cfts); + } + if (base_files) { + while (!list_empty(>files)) + cgroup_rm_file(cgrp, NULL); + } } /* @@ -977,8 +995,9 @@ static void cgroup_d_remove_dir(struct dentry *dentry) { struct dentry *parent; + struct cgroupfs_root *root = dentry->d_sb->s_fs_info; - cgroup_clear_directory(dentry); + cgroup_clear_directory(dentry, true, root->subsys_bits); parent = dentry->d_parent; spin_lock(>d_lock); @@ -1339,6 +1358,7 @@ struct cgroupfs_root *root = sb->s_fs_info; struct cgroup *cgrp = >top_cgroup; struct cgroup_sb_opts opts; + unsigned long added_bits, removed_bits; mutex_lock(>dentry->d_inode->i_mutex); mutex_lock(_mutex); @@ -1354,6 +1374,9 @@ pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", task_tgid_nr(current), current->comm); + added_bits = opts.subsys_bits & ~root->subsys_bits; + removed_bits = root->subsys_bits & ~opts.subsys_bits; + /* Don't allow flags or name to change at remount */ if (opts.flags != root->flags || (opts.name && strcmp(opts.name, root->name))) { @@ -1369,8 +1392,9 @@ } /* clear out any existing files and repopulate subsystem files */ - cgroup_clear_directory(cgrp->dentry); - cgroup_populate_dir(cgrp); + cgroup_clear_directory(cgrp->dentry, false, removed_bits); + /* re-populate subsystem files */ + cgroup_populate_dir(cgrp, false, added_bits); if (opts.release_agent) strcpy(root->release_agent_path, opts.release_agent); @@ -1669,7 +1693,7 @@ BUG_ON(root->number_of_cgroups != 1); cred = override_creds(_cred); - cgroup_populate_dir(root_cgrp); + cgroup_populate_dir(root_cgrp, true, root->subsys_bits); revert_creds(cred); mutex_unlock(_root_mutex); mutex_unlock(_mutex); @@ -3843,18 +3867,29 @@ { } /* terminate */ }; -static int
[PATCH v6 0/4] cgroup: add xattr support
This series are a refreshed version of a patchset submitted by Li Zefan back in march: https://lkml.org/lkml/2012/3/1/13 With Li's permission, I refreshed the patches to apply over the latest upstream and added the modifications suggested by others in the thread: - using a mount option instead of config option to enable the xattr support - reinitialize the list in kmem_xattrs_free() - renamed functions to simple_xattr_*() There're two users for this patchset: - SELinux: to be able to control access to cgroupfs inside containers - systemd: to store meta information such as main PID in a service cgroup, set specific services special options in the cgroup. While the xattrs will use kernel memory like tmpfs, they're restricted to 'security' (which controls the format of the value) and 'trusted' (which requires CAP_SYS_ADMIN). If kernel memory usage is still a concern, we're not far from having memcg account for kernel memory. v6: - only allow trusted and security - replace subsys_bits by something more meaningful v5: - check for permissions for user xattr namespace v4: - implemented requested changes by Tejun Heo in patch #2 Cc: Li Zefan Cc: Tejun Heo Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Signed-off-by: Li Zefan Signed-off-by: Aristeu Rozanski -- Aristeu -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v6 3/4] cgroup: add xattr support
From: Li Zefan This is one of the items in the plumber's wish list. For use cases: >> What would the use case be for this? > > Attaching meta information to services, in an easily discoverable > way. For example, in systemd we create one cgroup for each service, and > could then store data like the main pid of the specific service as an > xattr on the cgroup itself. That way we'd have almost all service state > in the cgroupfs, which would make it possible to terminate systemd and > later restart it without losing any state information. But there's more: > for example, some very peculiar services cannot be terminated on > shutdown (i.e. fakeraid DM stuff) and it would be really nice if the > services in question could just mark that on their cgroup, by setting an > xattr. On the more desktopy side of things there are other > possibilities: for example there are plans defining what an application > is along the lines of a cgroup (i.e. an app being a collection of > processes). With xattrs one could then attach an icon or human readable > program name on the cgroup. > > The key idea is that this would allow attaching runtime meta information > to cgroups and everything they model (services, apps, vms), that doesn't > need any complex userspace infrastructure, has good access control > (i.e. because the file system enforces that anyway, and there's the > "trusted." xattr namespace), notifications (inotify), and can easily be > shared among applications. > > Lennart v6: - remove user xattr namespace, only allow trusted and security v5: - check for capabilities before setting/removing xattrs v4: - no changes v3: - instead of config option, use mount option to enable xattr support Cc: Li Zefan Cc: Tejun Heo Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Signed-off-by: Li Zefan Signed-off-by: Aristeu Rozanski --- include/linux/cgroup.h | 13 -- kernel/cgroup.c| 100 + 2 files changed, 103 insertions(+), 10 deletions(-) Index: github/include/linux/cgroup.h === --- github.orig/include/linux/cgroup.h 2012-08-16 10:24:50.0 -0400 +++ github/include/linux/cgroup.h 2012-08-16 10:27:53.975223786 -0400 @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef CONFIG_CGROUPS @@ -216,6 +217,9 @@ /* List of events which userspace want to receive */ struct list_head event_list; spinlock_t event_list_lock; + + /* directory xattrs */ + struct simple_xattrs xattrs; }; /* @@ -309,6 +313,9 @@ /* CFTYPE_* flags */ unsigned int flags; + /* file xattrs */ + struct simple_xattrs xattrs; + int (*open)(struct inode *inode, struct file *file); ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, struct file *file, @@ -394,7 +401,7 @@ */ struct cftype_set { struct list_headnode; /* chained at subsys->cftsets */ - const struct cftype *cfts; + struct cftype *cfts; }; struct cgroup_scanner { @@ -406,8 +413,8 @@ void *data; }; -int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); -int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); +int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); +int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_is_removed(const struct cgroup *cgrp); Index: github/kernel/cgroup.c === --- github.orig/kernel/cgroup.c 2012-08-16 10:27:45.0 -0400 +++ github/kernel/cgroup.c 2012-08-16 11:10:37.470765933 -0400 @@ -276,7 +276,8 @@ /* bits in struct cgroupfs_root flags field */ enum { - ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ + ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ + ROOT_XATTR, /* supports extended attributes */ }; static int cgroup_is_releasable(const struct cgroup *cgrp) @@ -913,15 +914,19 @@ */ BUG_ON(!list_empty(>pidlists)); + simple_xattrs_free(>xattrs); + kfree_rcu(cgrp, rcu_head); } else { struct cfent *cfe = __d_cfe(dentry); struct cgroup *cgrp = dentry->d_parent->d_fsdata; + struct cftype *cft = cfe->type; WARN_ONCE(!list_empty(>node) && cgrp != >root->top_cgroup, "cfe still linked for %s\n", cfe->type->name); kfree(cfe); + simple_xattrs_free(>xattrs); } iput(inode); } @@ -1140,6 +1145,8 @@ seq_printf(seq, ",%s", ss->name); if (test_bit(ROOT_NOPREFIX, >flags)) seq_puts(seq, ",noprefix"); + if (test_bit(ROOT_XATTR,
[PATCH v6 4/4] cgroup: rename subsys_bits to subsys_mask
In a previous discussion, Tejun Heo suggested to rename references to subsys_bits (added_bits, removed_bits, etc) by something more meaningful. Cc: Li Zefan Cc: Tejun Heo Cc: Hugh Dickins Cc: Hillf Danton Cc: Lennart Poettering Signed-off-by: Aristeu Rozanski subsys_bits & (1UL << i)) { + if (root->subsys_mask & (1UL << i)) { /* Subsystem is in this hierarchy. So we want * the subsystem state from the new * cgroup */ @@ -1002,7 +1002,7 @@ struct dentry *parent; struct cgroupfs_root *root = dentry->d_sb->s_fs_info; - cgroup_clear_directory(dentry, true, root->subsys_bits); + cgroup_clear_directory(dentry, true, root->subsys_mask); parent = dentry->d_parent; spin_lock(>d_lock); @@ -1046,22 +1046,22 @@ * returns an error, no reference counts are touched. */ static int rebind_subsystems(struct cgroupfs_root *root, - unsigned long final_bits) + unsigned long final_subsys_mask) { - unsigned long added_bits, removed_bits; + unsigned long added_mask, removed_mask; struct cgroup *cgrp = >top_cgroup; int i; BUG_ON(!mutex_is_locked(_mutex)); BUG_ON(!mutex_is_locked(_root_mutex)); - removed_bits = root->actual_subsys_bits & ~final_bits; - added_bits = final_bits & ~root->actual_subsys_bits; + removed_mask = root->actual_subsys_mask & ~final_subsys_mask; + added_mask = final_subsys_mask & ~root->actual_subsys_mask; /* Check that any added subsystems are currently free */ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { unsigned long bit = 1UL << i; struct cgroup_subsys *ss = subsys[i]; - if (!(bit & added_bits)) + if (!(bit & added_mask)) continue; /* * Nobody should tell us to do a subsys that doesn't exist: @@ -1086,7 +1086,7 @@ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; unsigned long bit = 1UL << i; - if (bit & added_bits) { + if (bit & added_mask) { /* We're binding this subsystem to this hierarchy */ BUG_ON(ss == NULL); BUG_ON(cgrp->subsys[i]); @@ -1099,7 +1099,7 @@ if (ss->bind) ss->bind(cgrp); /* refcount was already taken, and we're keeping it */ - } else if (bit & removed_bits) { + } else if (bit & removed_mask) { /* We're removing this subsystem */ BUG_ON(ss == NULL); BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); @@ -1112,7 +1112,7 @@ list_move(>sibling, _list); /* subsystem is now free - drop reference on module */ module_put(ss->module); - } else if (bit & final_bits) { + } else if (bit & final_subsys_mask) { /* Subsystem state should already exist */ BUG_ON(ss == NULL); BUG_ON(!cgrp->subsys[i]); @@ -1129,7 +1129,7 @@ BUG_ON(cgrp->subsys[i]); } } - root->subsys_bits = root->actual_subsys_bits = final_bits; + root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; synchronize_rcu(); return 0; @@ -1158,7 +1158,7 @@ } struct cgroup_sb_opts { - unsigned long subsys_bits; + unsigned long subsys_mask; unsigned long flags; char *release_agent; bool clone_children; @@ -1267,7 +1267,7 @@ /* Mutually exclusive option 'all' + subsystem name */ if (all_ss) return -EINVAL; - set_bit(i, >subsys_bits); + set_bit(i, >subsys_mask); one_ss = true; break; @@ -1288,7 +1288,7 @@ continue; if (ss->disabled) continue; - set_bit(i, >subsys_bits); + set_bit(i, >subsys_mask); } } @@ -1300,19 +1300,19 @@ * the cpuset subsystem. */ if (test_bit(ROOT_NOPREFIX, >flags) && - (opts->subsys_bits & mask)) + (opts->subsys_mask & mask)) return -EINVAL; /* Can't specify "none" and some subsystems */ - if (opts->subsys_bits && opts->none) + if (opts->subsys_mask && opts->none) return -EINVAL; /* * We either have to specify by name or by subsystems. (So
[PATCH v6 4/4] cgroup: rename subsys_bits to subsys_mask
In a previous discussion, Tejun Heo suggested to rename references to subsys_bits (added_bits, removed_bits, etc) by something more meaningful. Cc: Li Zefan lize...@huawei.com Cc: Tejun Heo t...@kernel.org Cc: Hugh Dickins hu...@google.com Cc: Hillf Danton dhi...@gmail.com Cc: Lennart Poettering lpoet...@redhat.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- kernel/cgroup.c | 84 1 file changed, 42 insertions(+), 42 deletions(-) Index: github/kernel/cgroup.c === --- github.orig/kernel/cgroup.c 2012-08-16 11:33:43.276337899 -0400 +++ github/kernel/cgroup.c 2012-08-16 11:33:43.292338339 -0400 @@ -111,13 +111,13 @@ * The bitmask of subsystems intended to be attached to this * hierarchy */ - unsigned long subsys_bits; + unsigned long subsys_mask; /* Unique id for this hierarchy. */ int hierarchy_id; /* The bitmask of subsystems currently attached to this hierarchy */ - unsigned long actual_subsys_bits; + unsigned long actual_subsys_mask; /* A list running through the attached subsystems */ struct list_head subsys_list; @@ -557,7 +557,7 @@ * won't change, so no need for locking. */ for (i = 0; i CGROUP_SUBSYS_COUNT; i++) { - if (root-subsys_bits (1UL i)) { + if (root-subsys_mask (1UL i)) { /* Subsystem is in this hierarchy. So we want * the subsystem state from the new * cgroup */ @@ -1002,7 +1002,7 @@ struct dentry *parent; struct cgroupfs_root *root = dentry-d_sb-s_fs_info; - cgroup_clear_directory(dentry, true, root-subsys_bits); + cgroup_clear_directory(dentry, true, root-subsys_mask); parent = dentry-d_parent; spin_lock(parent-d_lock); @@ -1046,22 +1046,22 @@ * returns an error, no reference counts are touched. */ static int rebind_subsystems(struct cgroupfs_root *root, - unsigned long final_bits) + unsigned long final_subsys_mask) { - unsigned long added_bits, removed_bits; + unsigned long added_mask, removed_mask; struct cgroup *cgrp = root-top_cgroup; int i; BUG_ON(!mutex_is_locked(cgroup_mutex)); BUG_ON(!mutex_is_locked(cgroup_root_mutex)); - removed_bits = root-actual_subsys_bits ~final_bits; - added_bits = final_bits ~root-actual_subsys_bits; + removed_mask = root-actual_subsys_mask ~final_subsys_mask; + added_mask = final_subsys_mask ~root-actual_subsys_mask; /* Check that any added subsystems are currently free */ for (i = 0; i CGROUP_SUBSYS_COUNT; i++) { unsigned long bit = 1UL i; struct cgroup_subsys *ss = subsys[i]; - if (!(bit added_bits)) + if (!(bit added_mask)) continue; /* * Nobody should tell us to do a subsys that doesn't exist: @@ -1086,7 +1086,7 @@ for (i = 0; i CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; unsigned long bit = 1UL i; - if (bit added_bits) { + if (bit added_mask) { /* We're binding this subsystem to this hierarchy */ BUG_ON(ss == NULL); BUG_ON(cgrp-subsys[i]); @@ -1099,7 +1099,7 @@ if (ss-bind) ss-bind(cgrp); /* refcount was already taken, and we're keeping it */ - } else if (bit removed_bits) { + } else if (bit removed_mask) { /* We're removing this subsystem */ BUG_ON(ss == NULL); BUG_ON(cgrp-subsys[i] != dummytop-subsys[i]); @@ -1112,7 +1112,7 @@ list_move(ss-sibling, rootnode.subsys_list); /* subsystem is now free - drop reference on module */ module_put(ss-module); - } else if (bit final_bits) { + } else if (bit final_subsys_mask) { /* Subsystem state should already exist */ BUG_ON(ss == NULL); BUG_ON(!cgrp-subsys[i]); @@ -1129,7 +1129,7 @@ BUG_ON(cgrp-subsys[i]); } } - root-subsys_bits = root-actual_subsys_bits = final_bits; + root-subsys_mask = root-actual_subsys_mask = final_subsys_mask; synchronize_rcu(); return 0; @@ -1158,7 +1158,7 @@ } struct cgroup_sb_opts { - unsigned long subsys_bits; + unsigned long subsys_mask; unsigned long flags; char *release_agent;
[PATCH v6 3/4] cgroup: add xattr support
From: Li Zefan lize...@huawei.com This is one of the items in the plumber's wish list. For use cases: What would the use case be for this? Attaching meta information to services, in an easily discoverable way. For example, in systemd we create one cgroup for each service, and could then store data like the main pid of the specific service as an xattr on the cgroup itself. That way we'd have almost all service state in the cgroupfs, which would make it possible to terminate systemd and later restart it without losing any state information. But there's more: for example, some very peculiar services cannot be terminated on shutdown (i.e. fakeraid DM stuff) and it would be really nice if the services in question could just mark that on their cgroup, by setting an xattr. On the more desktopy side of things there are other possibilities: for example there are plans defining what an application is along the lines of a cgroup (i.e. an app being a collection of processes). With xattrs one could then attach an icon or human readable program name on the cgroup. The key idea is that this would allow attaching runtime meta information to cgroups and everything they model (services, apps, vms), that doesn't need any complex userspace infrastructure, has good access control (i.e. because the file system enforces that anyway, and there's the trusted. xattr namespace), notifications (inotify), and can easily be shared among applications. Lennart v6: - remove user xattr namespace, only allow trusted and security v5: - check for capabilities before setting/removing xattrs v4: - no changes v3: - instead of config option, use mount option to enable xattr support Cc: Li Zefan lize...@huawei.com Cc: Tejun Heo t...@kernel.org Cc: Hugh Dickins hu...@google.com Cc: Hillf Danton dhi...@gmail.com Cc: Lennart Poettering lpoet...@redhat.com Signed-off-by: Li Zefan lize...@huawei.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- include/linux/cgroup.h | 13 -- kernel/cgroup.c| 100 + 2 files changed, 103 insertions(+), 10 deletions(-) Index: github/include/linux/cgroup.h === --- github.orig/include/linux/cgroup.h 2012-08-16 10:24:50.0 -0400 +++ github/include/linux/cgroup.h 2012-08-16 10:27:53.975223786 -0400 @@ -17,6 +17,7 @@ #include linux/rwsem.h #include linux/idr.h #include linux/workqueue.h +#include linux/xattr.h #ifdef CONFIG_CGROUPS @@ -216,6 +217,9 @@ /* List of events which userspace want to receive */ struct list_head event_list; spinlock_t event_list_lock; + + /* directory xattrs */ + struct simple_xattrs xattrs; }; /* @@ -309,6 +313,9 @@ /* CFTYPE_* flags */ unsigned int flags; + /* file xattrs */ + struct simple_xattrs xattrs; + int (*open)(struct inode *inode, struct file *file); ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, struct file *file, @@ -394,7 +401,7 @@ */ struct cftype_set { struct list_headnode; /* chained at subsys-cftsets */ - const struct cftype *cfts; + struct cftype *cfts; }; struct cgroup_scanner { @@ -406,8 +413,8 @@ void *data; }; -int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); -int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); +int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); +int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_is_removed(const struct cgroup *cgrp); Index: github/kernel/cgroup.c === --- github.orig/kernel/cgroup.c 2012-08-16 10:27:45.0 -0400 +++ github/kernel/cgroup.c 2012-08-16 11:10:37.470765933 -0400 @@ -276,7 +276,8 @@ /* bits in struct cgroupfs_root flags field */ enum { - ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ + ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ + ROOT_XATTR, /* supports extended attributes */ }; static int cgroup_is_releasable(const struct cgroup *cgrp) @@ -913,15 +914,19 @@ */ BUG_ON(!list_empty(cgrp-pidlists)); + simple_xattrs_free(cgrp-xattrs); + kfree_rcu(cgrp, rcu_head); } else { struct cfent *cfe = __d_cfe(dentry); struct cgroup *cgrp = dentry-d_parent-d_fsdata; + struct cftype *cft = cfe-type; WARN_ONCE(!list_empty(cfe-node) cgrp != cgrp-root-top_cgroup, cfe still linked for %s\n, cfe-type-name); kfree(cfe); + simple_xattrs_free(cft-xattrs); } iput(inode); } @@ -1140,6 +1145,8 @@
[PATCH v6 0/4] cgroup: add xattr support
This series are a refreshed version of a patchset submitted by Li Zefan back in march: https://lkml.org/lkml/2012/3/1/13 With Li's permission, I refreshed the patches to apply over the latest upstream and added the modifications suggested by others in the thread: - using a mount option instead of config option to enable the xattr support - reinitialize the list in kmem_xattrs_free() - renamed functions to simple_xattr_*() There're two users for this patchset: - SELinux: to be able to control access to cgroupfs inside containers - systemd: to store meta information such as main PID in a service cgroup, set specific services special options in the cgroup. While the xattrs will use kernel memory like tmpfs, they're restricted to 'security' (which controls the format of the value) and 'trusted' (which requires CAP_SYS_ADMIN). If kernel memory usage is still a concern, we're not far from having memcg account for kernel memory. v6: - only allow trusted and security - replace subsys_bits by something more meaningful v5: - check for permissions for user xattr namespace v4: - implemented requested changes by Tejun Heo in patch #2 Cc: Li Zefan lize...@huawei.com Cc: Tejun Heo t...@kernel.org Cc: Hugh Dickins hu...@google.com Cc: Hillf Danton dhi...@gmail.com Cc: Lennart Poettering lpoet...@redhat.com Signed-off-by: Li Zefan lize...@huawei.com Signed-off-by: Aristeu Rozanski a...@redhat.com -- Aristeu -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v6 2/4] cgroup: revise how we re-populate root directory
From: Li Zefan lize...@huawei.com When remounting cgroupfs with some subsystems added to it and some removed, cgroup will remove all the files in root directory and then re-popluate it. What I'm doing here is, only remove files which belong to subsystems that are to be unbinded, and only create files for newly-added subsystems. The purpose is to have all other files untouched. This is a preparation for cgroup xattr support. v6: - no changes v5: - no changes v4: - refactored cgroup_clear_directory() to not use cgroup_rm_file() - instead of going thru the list of files, get the file list using the subsystems - use 'subsys_mask' instead of {added,removed}_bits and made cgroup_populate_dir() to match the parameters with cgroup_clear_directory() v3: - refresh patches after recent refactoring Cc: Li Zefan lize...@huawei.com Cc: Tejun Heo t...@kernel.org Cc: Hugh Dickins hu...@google.com Cc: Hillf Danton dhi...@gmail.com Cc: Lennart Poettering lpoet...@redhat.com Signed-off-by: Li Zefan lize...@huawei.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- kernel/cgroup.c | 61 1 file changed, 48 insertions(+), 13 deletions(-) Index: github/kernel/cgroup.c === --- github.orig/kernel/cgroup.c 2012-08-16 11:28:12.683272452 -0400 +++ github/kernel/cgroup.c 2012-08-16 11:28:42.592089681 -0400 @@ -824,7 +824,8 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); -static int cgroup_populate_dir(struct cgroup *cgrp); +static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, + unsigned long subsys_mask); static const struct inode_operations cgroup_dir_inode_operations; static const struct file_operations proc_cgroupstats_operations; @@ -963,12 +964,29 @@ return -ENOENT; } -static void cgroup_clear_directory(struct dentry *dir) +/** + * cgroup_clear_directory - selective removal of base and subsystem files + * @dir: directory containing the files + * @base_files: true if the base files should be removed + * @subsys_mask: mask of the subsystem ids whose files should be removed + */ +static void cgroup_clear_directory(struct dentry *dir, bool base_files, + unsigned long subsys_mask) { struct cgroup *cgrp = __d_cgrp(dir); + struct cgroup_subsys *ss; - while (!list_empty(cgrp-files)) - cgroup_rm_file(cgrp, NULL); + for_each_subsys(cgrp-root, ss) { + struct cftype_set *set; + if (!test_bit(ss-subsys_id, subsys_mask)) + continue; + list_for_each_entry(set, ss-cftsets, node) + cgroup_rm_file(cgrp, set-cfts); + } + if (base_files) { + while (!list_empty(cgrp-files)) + cgroup_rm_file(cgrp, NULL); + } } /* @@ -977,8 +995,9 @@ static void cgroup_d_remove_dir(struct dentry *dentry) { struct dentry *parent; + struct cgroupfs_root *root = dentry-d_sb-s_fs_info; - cgroup_clear_directory(dentry); + cgroup_clear_directory(dentry, true, root-subsys_bits); parent = dentry-d_parent; spin_lock(parent-d_lock); @@ -1339,6 +1358,7 @@ struct cgroupfs_root *root = sb-s_fs_info; struct cgroup *cgrp = root-top_cgroup; struct cgroup_sb_opts opts; + unsigned long added_bits, removed_bits; mutex_lock(cgrp-dentry-d_inode-i_mutex); mutex_lock(cgroup_mutex); @@ -1354,6 +1374,9 @@ pr_warning(cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n, task_tgid_nr(current), current-comm); + added_bits = opts.subsys_bits ~root-subsys_bits; + removed_bits = root-subsys_bits ~opts.subsys_bits; + /* Don't allow flags or name to change at remount */ if (opts.flags != root-flags || (opts.name strcmp(opts.name, root-name))) { @@ -1369,8 +1392,9 @@ } /* clear out any existing files and repopulate subsystem files */ - cgroup_clear_directory(cgrp-dentry); - cgroup_populate_dir(cgrp); + cgroup_clear_directory(cgrp-dentry, false, removed_bits); + /* re-populate subsystem files */ + cgroup_populate_dir(cgrp, false, added_bits); if (opts.release_agent) strcpy(root-release_agent_path, opts.release_agent); @@ -1669,7 +1693,7 @@ BUG_ON(root-number_of_cgroups != 1); cred = override_creds(init_cred); - cgroup_populate_dir(root_cgrp); + cgroup_populate_dir(root_cgrp, true, root-subsys_bits); revert_creds(cred);
[PATCH v6 1/4] xattr: extract simple_xattr code from tmpfs
From: Li Zefan lize...@huawei.com Extract in-memory xattr APIs from tmpfs. Will be used by cgroup. $ size vmlinux.o textdata bss dec hex filename 4658782 880729 5195032 10734543 a3cbcf vmlinux.o $ size vmlinux.o textdata bss dec hex filename 4658957 880729 5195032 10734718 a3cc7e vmlinux.o v6: - no changes v5: - no changes v4: - move simple_xattrs_free() to fs/xattr.c v3: - in kmem_xattrs_free(), reinitialize the list - use simple_xattr_* prefix - introduce simple_xattr_add() to prevent direct list usage Cc: Li Zefan lize...@huawei.com Cc: Tejun Heo t...@kernel.org Cc: Hugh Dickins hu...@google.com Cc: Hillf Danton dhi...@gmail.com Cc: Lennart Poettering lpoet...@redhat.com Signed-off-by: Li Zefan lize...@huawei.com Signed-off-by: Aristeu Rozanski a...@redhat.com --- fs/xattr.c | 200 +++ include/linux/shmem_fs.h |3 include/linux/xattr.h| 25 + mm/shmem.c | 171 +++- 4 files changed, 240 insertions(+), 159 deletions(-) Index: github/fs/xattr.c === --- github.orig/fs/xattr.c 2012-08-16 11:28:12.719273435 -0400 +++ github/fs/xattr.c 2012-08-16 11:28:13.975307743 -0400 @@ -791,3 +791,203 @@ EXPORT_SYMBOL(generic_listxattr); EXPORT_SYMBOL(generic_setxattr); EXPORT_SYMBOL(generic_removexattr); + +/* + * initialize the simple_xattrs structure + */ +void simple_xattrs_init(struct simple_xattrs *xattrs) +{ + INIT_LIST_HEAD(xattrs-head); + spin_lock_init(xattrs-lock); +} + +/* + * Allocate new xattr and copy in the value; but leave the name to callers. + */ +struct simple_xattr *simple_xattr_alloc(const void *value, size_t size) +{ + struct simple_xattr *new_xattr; + size_t len; + + /* wrap around? */ + len = sizeof(*new_xattr) + size; + if (len = sizeof(*new_xattr)) + return NULL; + + new_xattr = kmalloc(len, GFP_KERNEL); + if (!new_xattr) + return NULL; + + new_xattr-size = size; + memcpy(new_xattr-value, value, size); + return new_xattr; +} + +/* + * free all the xattrs + */ +void simple_xattrs_free(struct simple_xattrs *xattrs) +{ + struct simple_xattr *xattr, *node; + + spin_lock(xattrs-lock); + list_for_each_entry_safe(xattr, node, xattrs-head, list) { + kfree(xattr-name); + kfree(xattr); + } + INIT_LIST_HEAD(xattrs-head); + spin_unlock(xattrs-lock); +} + +/* + * xattr GET operation for in-memory/pseudo filesystems + */ +int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, +void *buffer, size_t size) +{ + struct simple_xattr *xattr; + int ret = -ENODATA; + + spin_lock(xattrs-lock); + list_for_each_entry(xattr, xattrs-head, list) { + if (strcmp(name, xattr-name)) + continue; + + ret = xattr-size; + if (buffer) { + if (size xattr-size) + ret = -ERANGE; + else + memcpy(buffer, xattr-value, xattr-size); + } + break; + } + spin_unlock(xattrs-lock); + return ret; +} + +static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name, + const void *value, size_t size, int flags) +{ + struct simple_xattr *xattr; + struct simple_xattr *new_xattr = NULL; + size_t len; + int err = 0; + + /* value == NULL means remove */ + if (value) { + /* wrap around? */ + len = sizeof(*new_xattr) + size; + if (len = sizeof(*new_xattr)) + return -ENOMEM; + + new_xattr = kmalloc(len, GFP_KERNEL); + if (!new_xattr) + return -ENOMEM; + + new_xattr-name = kstrdup(name, GFP_KERNEL); + if (!new_xattr-name) { + kfree(new_xattr); + return -ENOMEM; + } + + new_xattr-size = size; + memcpy(new_xattr-value, value, size); + } + + spin_lock(xattrs-lock); + list_for_each_entry(xattr, xattrs-head, list) { + if (!strcmp(name, xattr-name)) { + if (flags XATTR_CREATE) { + xattr = new_xattr; + err = -EEXIST; + } else if (new_xattr) { + list_replace(xattr-list, new_xattr-list); + } else { + list_del(xattr-list); + } + goto out; + } + } + if (flags XATTR_REPLACE) { +
[PATCH RESEND 0/4] device_cgroup: replace internally whitelist with exception list
The original model of device_cgroup is having a whitelist where all the allowed devices are listed. The problem with this approach is that is impossible to have the case of allowing everything but few devices. The reason for that lies in the way the whitelist is handled internally: since there's only a whitelist, the "all devices" entry would have to be removed and replaced by the entire list of possible devices but the ones that are being denied. Since dev_t is 32 bits long, representing the allowed devices as a bitfield is not memory efficient. This patch replaces the "whitelist" by a "exceptions" list and the default policy is kept as "deny_all" variable in dev_cgroup structure. The current interface determines that whenever "a" is written to devices.allow or devices.deny, the entry masking all devices will be added or removed, respectively. This behavior is kept and it's what will determine the default policy: # cat devices.list a *:* rwm # echo a >devices.deny # cat devices.list # echo a >devices.allow # cat devices.list a *:* rwm The interface is also preserved. For example, if one wants to block only access to /dev/null: # ls -l /dev/null crw-rw-rw- 1 root root 1, 3 Jul 24 16:17 /dev/null # echo a >devices.allow # echo "c 1:3 rwm" >devices.deny # cat /dev/null cat: /dev/null: Operation not permitted # echo >/dev/null bash: /dev/null: Operation not permitted mknod /tmp/null c 1 3 mknod: â/tmp/nullâ: Operation not permitted # echo "c 1:3 r" >devices.allow # cat /dev/null # echo >/dev/null bash: /dev/null: Operation not permitted mknod /tmp/null c 1 3 mknod: â/tmp/nullâ: Operation not permitted # echo "c 1:3 rw" >devices.allow # echo >/dev/null # cat /dev/null # mknod /tmp/null c 1 3 mknod: â/tmp/nullâ: Operation not permitted # echo "c 1:3 rwm" >devices.allow # echo >/dev/null # cat /dev/null # mknod /tmp/null c 1 3 # device_cgroup.c | 371 1 file changed, 212 insertions(+), 159 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH RESEND 3/4] device_cgroup: convert device_cgroup internally to policy + exceptions
The original model of device_cgroup is having a whitelist where all the allowed devices are listed. The problem with this approach is that is impossible to have the case of allowing everything but few devices. The reason for that lies in the way the whitelist is handled internally: since there's only a whitelist, the "all devices" entry would have to be removed and replaced by the entire list of possible devices but the ones that are being denied. Since dev_t is 32 bits long, representing the allowed devices as a bitfield is not memory efficient. This patch replaces the "whitelist" by a "exceptions" list and the default policy is kept as "deny_all" variable in dev_cgroup structure. The current interface determines that whenever "a" is written to devices.allow or devices.deny, the entry masking all devices will be added or removed, respectively. This behavior is kept and it's what will determine the default policy: # cat devices.list a *:* rwm # echo a >devices.deny # cat devices.list # echo a >devices.allow # cat devices.list a *:* rwm The interface is also preserved. For example, if one wants to block only access to /dev/null: # ls -l /dev/null crw-rw-rw- 1 root root 1, 3 Jul 24 16:17 /dev/null # echo a >devices.allow # echo "c 1:3 rwm" >devices.deny # cat /dev/null cat: /dev/null: Operation not permitted # echo >/dev/null bash: /dev/null: Operation not permitted mknod /tmp/null c 1 3 mknod: â/tmp/nullâ: Operation not permitted # echo "c 1:3 r" >devices.allow # cat /dev/null # echo >/dev/null bash: /dev/null: Operation not permitted mknod /tmp/null c 1 3 mknod: â/tmp/nullâ: Operation not permitted # echo "c 1:3 rw" >devices.allow # echo >/dev/null # cat /dev/null # mknod /tmp/null c 1 3 mknod: â/tmp/nullâ: Operation not permitted # echo "c 1:3 rwm" >devices.allow # echo >/dev/null # cat /dev/null # mknod /tmp/null c 1 3 # Note that I didn't rename the functions/variables in this patch, but in the next one to make reviewing easier. Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 228 +++ 1 file changed, 132 insertions(+), 96 deletions(-) Index: github/security/device_cgroup.c === --- github.orig/security/device_cgroup.c2012-07-24 17:58:42.984081909 -0400 +++ github/security/device_cgroup.c 2012-07-24 17:59:11.324823460 -0400 @@ -96,7 +96,6 @@ return -ENOMEM; } -/* Stupid prototype - don't bother combining existing entries */ /* * called under devcgroup_mutex */ @@ -136,16 +135,13 @@ struct dev_whitelist_item *walk, *tmp; list_for_each_entry_safe(walk, tmp, _cgroup->whitelist, list) { - if (walk->type == DEV_ALL) - goto remove; if (walk->type != wh->type) continue; - if (walk->major != ~0 && walk->major != wh->major) + if (walk->major != wh->major) continue; - if (walk->minor != ~0 && walk->minor != wh->minor) + if (walk->minor != wh->minor) continue; -remove: walk->access &= ~wh->access; if (!walk->access) { list_del_rcu(>list); @@ -185,19 +181,9 @@ INIT_LIST_HEAD(_cgroup->whitelist); parent_cgroup = cgroup->parent; - if (parent_cgroup == NULL) { - struct dev_whitelist_item *wh; - wh = kmalloc(sizeof(*wh), GFP_KERNEL); - if (!wh) { - kfree(dev_cgroup); - return ERR_PTR(-ENOMEM); - } - wh->minor = wh->major = ~0; - wh->type = DEV_ALL; - wh->access = ACC_MASK; + if (parent_cgroup == NULL) dev_cgroup->deny_all = false; - list_add(>list, _cgroup->whitelist); - } else { + else { parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); mutex_lock(_mutex); ret = dev_whitelist_copy(_cgroup->whitelist, @@ -268,33 +254,48 @@ char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; rcu_read_lock(); - list_for_each_entry_rcu(wh, >whitelist, list) { - set_access(acc, wh->access); - set_majmin(maj, wh->major); - set_majmin(min, wh->minor); - seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), + /* +* To preserve the compatibility: +* - Only show the "all devices" when the default policy is to allow +* - List the exceptions in case the default policy is to deny +*
[PATCH RESEND 4/4] device_cgroup: rename whitelist to exception list
This patch replaces the "whitelist" usage in the code and comments and replace them by exception list related information. Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 198 +++ 1 file changed, 99 insertions(+), 99 deletions(-) Index: github/security/device_cgroup.c === --- github.orig/security/device_cgroup.c2012-07-24 17:50:14.0 -0400 +++ github/security/device_cgroup.c 2012-07-24 17:50:48.343663036 -0400 @@ -26,12 +26,12 @@ static DEFINE_MUTEX(devcgroup_mutex); /* - * whitelist locking rules: + * exception list locking rules: * hold devcgroup_mutex for update/read. * hold rcu_read_lock() for read. */ -struct dev_whitelist_item { +struct dev_exception_item { u32 major, minor; short type; short access; @@ -41,7 +41,7 @@ struct dev_cgroup { struct cgroup_subsys_state css; - struct list_head whitelist; + struct list_head exceptions; bool deny_all; }; @@ -75,12 +75,12 @@ /* * called under devcgroup_mutex */ -static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig) +static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig) { - struct dev_whitelist_item *wh, *tmp, *new; + struct dev_exception_item *ex, *tmp, *new; - list_for_each_entry(wh, orig, list) { - new = kmemdup(wh, sizeof(*wh), GFP_KERNEL); + list_for_each_entry(ex, orig, list) { + new = kmemdup(ex, sizeof(*ex), GFP_KERNEL); if (!new) goto free_and_exit; list_add_tail(>list, dest); @@ -89,9 +89,9 @@ return 0; free_and_exit: - list_for_each_entry_safe(wh, tmp, dest, list) { - list_del(>list); - kfree(wh); + list_for_each_entry_safe(ex, tmp, dest, list) { + list_del(>list); + kfree(ex); } return -ENOMEM; } @@ -99,50 +99,50 @@ /* * called under devcgroup_mutex */ -static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, - struct dev_whitelist_item *wh) +static int dev_exception_add(struct dev_cgroup *dev_cgroup, +struct dev_exception_item *ex) { - struct dev_whitelist_item *whcopy, *walk; + struct dev_exception_item *excopy, *walk; - whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL); - if (!whcopy) + excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL); + if (!excopy) return -ENOMEM; - list_for_each_entry(walk, _cgroup->whitelist, list) { - if (walk->type != wh->type) + list_for_each_entry(walk, _cgroup->exceptions, list) { + if (walk->type != ex->type) continue; - if (walk->major != wh->major) + if (walk->major != ex->major) continue; - if (walk->minor != wh->minor) + if (walk->minor != ex->minor) continue; - walk->access |= wh->access; - kfree(whcopy); - whcopy = NULL; + walk->access |= ex->access; + kfree(excopy); + excopy = NULL; } - if (whcopy != NULL) - list_add_tail_rcu(>list, _cgroup->whitelist); + if (excopy != NULL) + list_add_tail_rcu(>list, _cgroup->exceptions); return 0; } /* * called under devcgroup_mutex */ -static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, - struct dev_whitelist_item *wh) +static void dev_exception_rm(struct dev_cgroup *dev_cgroup, +struct dev_exception_item *ex) { - struct dev_whitelist_item *walk, *tmp; + struct dev_exception_item *walk, *tmp; - list_for_each_entry_safe(walk, tmp, _cgroup->whitelist, list) { - if (walk->type != wh->type) + list_for_each_entry_safe(walk, tmp, _cgroup->exceptions, list) { + if (walk->type != ex->type) continue; - if (walk->major != wh->major) + if (walk->major != ex->major) continue; - if (walk->minor != wh->minor) + if (walk->minor != ex->minor) continue; - walk->access &= ~wh->access; + walk->access &= ~ex->access; if (!walk->access) { list_del_rcu(>list); kfree_rcu(walk, rcu); @@ -151,18 +151,18 @@ } /** - * dev_whitelist_clean - frees all entries of the whitelist - * @dev_cgroup: dev_cgroup with the whitelist to be cleaned + * dev_exception_clean - frees all entries of the exception list + * @dev_cgroup: dev_cgroup with the exception list to be cleaned * * called
[PATCH RESEND 2/4] device_cgroup: introduce dev_whitelist_clean()
This function cleans all the items in a whitelist and will be used by the next patches. Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c | 22 +- 1 file changed, 17 insertions(+), 5 deletions(-) Index: github/security/device_cgroup.c === --- github.orig/security/device_cgroup.c2012-07-24 17:16:00.085156162 -0400 +++ github/security/device_cgroup.c 2012-07-24 17:58:42.984081909 -0400 @@ -154,6 +154,22 @@ } } +/** + * dev_whitelist_clean - frees all entries of the whitelist + * @dev_cgroup: dev_cgroup with the whitelist to be cleaned + * + * called under devcgroup_mutex + */ +static void dev_whitelist_clean(struct dev_cgroup *dev_cgroup) +{ + struct dev_whitelist_item *wh, *tmp; + + list_for_each_entry_safe(wh, tmp, _cgroup->whitelist, list) { + list_del(>list); + kfree(wh); + } +} + /* * called from kernel/cgroup.c with cgroup_lock() held. */ @@ -200,13 +216,9 @@ static void devcgroup_destroy(struct cgroup *cgroup) { struct dev_cgroup *dev_cgroup; - struct dev_whitelist_item *wh, *tmp; dev_cgroup = cgroup_to_devcgroup(cgroup); - list_for_each_entry_safe(wh, tmp, _cgroup->whitelist, list) { - list_del(>list); - kfree(wh); - } + dev_whitelist_clean(dev_cgroup); kfree(dev_cgroup); } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH RESEND 1/4] device_cgroup: add "deny_all" in dev_cgroup structure
deny_all will determine if the default policy is to deny all device access unless for the ones in the exception list. This variable will be used in the next patches to convert device_cgroup internally into a default policy + rules. Signed-off-by: Aristeu Rozanski --- security/device_cgroup.c |5 + 1 file changed, 5 insertions(+) Index: github/security/device_cgroup.c === --- github.orig/security/device_cgroup.c2012-07-24 17:15:58.277108951 -0400 +++ github/security/device_cgroup.c 2012-07-24 17:16:00.085156162 -0400 @@ -42,6 +42,7 @@ struct dev_cgroup { struct cgroup_subsys_state css; struct list_head whitelist; + bool deny_all; }; static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) @@ -178,12 +179,14 @@ wh->minor = wh->major = ~0; wh->type = DEV_ALL; wh->access = ACC_MASK; + dev_cgroup->deny_all = false; list_add(>list, _cgroup->whitelist); } else { parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); mutex_lock(_mutex); ret = dev_whitelist_copy(_cgroup->whitelist, _dev_cgroup->whitelist); + dev_cgroup->deny_all = parent_dev_cgroup->deny_all; mutex_unlock(_mutex); if (ret) { kfree(dev_cgroup); @@ -409,9 +412,11 @@ case DEVCG_ALLOW: if (!parent_has_perm(devcgroup, )) return -EPERM; + devcgroup->deny_all = false; return dev_whitelist_add(devcgroup, ); case DEVCG_DENY: dev_whitelist_rm(devcgroup, ); + devcgroup->deny_all = true; break; default: return -EINVAL; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH RESEND 1/4] device_cgroup: add deny_all in dev_cgroup structure
deny_all will determine if the default policy is to deny all device access unless for the ones in the exception list. This variable will be used in the next patches to convert device_cgroup internally into a default policy + rules. Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c |5 + 1 file changed, 5 insertions(+) Index: github/security/device_cgroup.c === --- github.orig/security/device_cgroup.c2012-07-24 17:15:58.277108951 -0400 +++ github/security/device_cgroup.c 2012-07-24 17:16:00.085156162 -0400 @@ -42,6 +42,7 @@ struct dev_cgroup { struct cgroup_subsys_state css; struct list_head whitelist; + bool deny_all; }; static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) @@ -178,12 +179,14 @@ wh-minor = wh-major = ~0; wh-type = DEV_ALL; wh-access = ACC_MASK; + dev_cgroup-deny_all = false; list_add(wh-list, dev_cgroup-whitelist); } else { parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); mutex_lock(devcgroup_mutex); ret = dev_whitelist_copy(dev_cgroup-whitelist, parent_dev_cgroup-whitelist); + dev_cgroup-deny_all = parent_dev_cgroup-deny_all; mutex_unlock(devcgroup_mutex); if (ret) { kfree(dev_cgroup); @@ -409,9 +412,11 @@ case DEVCG_ALLOW: if (!parent_has_perm(devcgroup, wh)) return -EPERM; + devcgroup-deny_all = false; return dev_whitelist_add(devcgroup, wh); case DEVCG_DENY: dev_whitelist_rm(devcgroup, wh); + devcgroup-deny_all = true; break; default: return -EINVAL; -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH RESEND 2/4] device_cgroup: introduce dev_whitelist_clean()
This function cleans all the items in a whitelist and will be used by the next patches. Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 22 +- 1 file changed, 17 insertions(+), 5 deletions(-) Index: github/security/device_cgroup.c === --- github.orig/security/device_cgroup.c2012-07-24 17:16:00.085156162 -0400 +++ github/security/device_cgroup.c 2012-07-24 17:58:42.984081909 -0400 @@ -154,6 +154,22 @@ } } +/** + * dev_whitelist_clean - frees all entries of the whitelist + * @dev_cgroup: dev_cgroup with the whitelist to be cleaned + * + * called under devcgroup_mutex + */ +static void dev_whitelist_clean(struct dev_cgroup *dev_cgroup) +{ + struct dev_whitelist_item *wh, *tmp; + + list_for_each_entry_safe(wh, tmp, dev_cgroup-whitelist, list) { + list_del(wh-list); + kfree(wh); + } +} + /* * called from kernel/cgroup.c with cgroup_lock() held. */ @@ -200,13 +216,9 @@ static void devcgroup_destroy(struct cgroup *cgroup) { struct dev_cgroup *dev_cgroup; - struct dev_whitelist_item *wh, *tmp; dev_cgroup = cgroup_to_devcgroup(cgroup); - list_for_each_entry_safe(wh, tmp, dev_cgroup-whitelist, list) { - list_del(wh-list); - kfree(wh); - } + dev_whitelist_clean(dev_cgroup); kfree(dev_cgroup); } -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH RESEND 4/4] device_cgroup: rename whitelist to exception list
This patch replaces the whitelist usage in the code and comments and replace them by exception list related information. Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 198 +++ 1 file changed, 99 insertions(+), 99 deletions(-) Index: github/security/device_cgroup.c === --- github.orig/security/device_cgroup.c2012-07-24 17:50:14.0 -0400 +++ github/security/device_cgroup.c 2012-07-24 17:50:48.343663036 -0400 @@ -26,12 +26,12 @@ static DEFINE_MUTEX(devcgroup_mutex); /* - * whitelist locking rules: + * exception list locking rules: * hold devcgroup_mutex for update/read. * hold rcu_read_lock() for read. */ -struct dev_whitelist_item { +struct dev_exception_item { u32 major, minor; short type; short access; @@ -41,7 +41,7 @@ struct dev_cgroup { struct cgroup_subsys_state css; - struct list_head whitelist; + struct list_head exceptions; bool deny_all; }; @@ -75,12 +75,12 @@ /* * called under devcgroup_mutex */ -static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig) +static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig) { - struct dev_whitelist_item *wh, *tmp, *new; + struct dev_exception_item *ex, *tmp, *new; - list_for_each_entry(wh, orig, list) { - new = kmemdup(wh, sizeof(*wh), GFP_KERNEL); + list_for_each_entry(ex, orig, list) { + new = kmemdup(ex, sizeof(*ex), GFP_KERNEL); if (!new) goto free_and_exit; list_add_tail(new-list, dest); @@ -89,9 +89,9 @@ return 0; free_and_exit: - list_for_each_entry_safe(wh, tmp, dest, list) { - list_del(wh-list); - kfree(wh); + list_for_each_entry_safe(ex, tmp, dest, list) { + list_del(ex-list); + kfree(ex); } return -ENOMEM; } @@ -99,50 +99,50 @@ /* * called under devcgroup_mutex */ -static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, - struct dev_whitelist_item *wh) +static int dev_exception_add(struct dev_cgroup *dev_cgroup, +struct dev_exception_item *ex) { - struct dev_whitelist_item *whcopy, *walk; + struct dev_exception_item *excopy, *walk; - whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL); - if (!whcopy) + excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL); + if (!excopy) return -ENOMEM; - list_for_each_entry(walk, dev_cgroup-whitelist, list) { - if (walk-type != wh-type) + list_for_each_entry(walk, dev_cgroup-exceptions, list) { + if (walk-type != ex-type) continue; - if (walk-major != wh-major) + if (walk-major != ex-major) continue; - if (walk-minor != wh-minor) + if (walk-minor != ex-minor) continue; - walk-access |= wh-access; - kfree(whcopy); - whcopy = NULL; + walk-access |= ex-access; + kfree(excopy); + excopy = NULL; } - if (whcopy != NULL) - list_add_tail_rcu(whcopy-list, dev_cgroup-whitelist); + if (excopy != NULL) + list_add_tail_rcu(excopy-list, dev_cgroup-exceptions); return 0; } /* * called under devcgroup_mutex */ -static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, - struct dev_whitelist_item *wh) +static void dev_exception_rm(struct dev_cgroup *dev_cgroup, +struct dev_exception_item *ex) { - struct dev_whitelist_item *walk, *tmp; + struct dev_exception_item *walk, *tmp; - list_for_each_entry_safe(walk, tmp, dev_cgroup-whitelist, list) { - if (walk-type != wh-type) + list_for_each_entry_safe(walk, tmp, dev_cgroup-exceptions, list) { + if (walk-type != ex-type) continue; - if (walk-major != wh-major) + if (walk-major != ex-major) continue; - if (walk-minor != wh-minor) + if (walk-minor != ex-minor) continue; - walk-access = ~wh-access; + walk-access = ~ex-access; if (!walk-access) { list_del_rcu(walk-list); kfree_rcu(walk, rcu); @@ -151,18 +151,18 @@ } /** - * dev_whitelist_clean - frees all entries of the whitelist - * @dev_cgroup: dev_cgroup with the whitelist to be cleaned + * dev_exception_clean - frees all entries of the exception list + * @dev_cgroup: dev_cgroup with the exception list to be cleaned
[PATCH RESEND 3/4] device_cgroup: convert device_cgroup internally to policy + exceptions
The original model of device_cgroup is having a whitelist where all the allowed devices are listed. The problem with this approach is that is impossible to have the case of allowing everything but few devices. The reason for that lies in the way the whitelist is handled internally: since there's only a whitelist, the all devices entry would have to be removed and replaced by the entire list of possible devices but the ones that are being denied. Since dev_t is 32 bits long, representing the allowed devices as a bitfield is not memory efficient. This patch replaces the whitelist by a exceptions list and the default policy is kept as deny_all variable in dev_cgroup structure. The current interface determines that whenever a is written to devices.allow or devices.deny, the entry masking all devices will be added or removed, respectively. This behavior is kept and it's what will determine the default policy: # cat devices.list a *:* rwm # echo a devices.deny # cat devices.list # echo a devices.allow # cat devices.list a *:* rwm The interface is also preserved. For example, if one wants to block only access to /dev/null: # ls -l /dev/null crw-rw-rw- 1 root root 1, 3 Jul 24 16:17 /dev/null # echo a devices.allow # echo c 1:3 rwm devices.deny # cat /dev/null cat: /dev/null: Operation not permitted # echo /dev/null bash: /dev/null: Operation not permitted mknod /tmp/null c 1 3 mknod: â/tmp/nullâ: Operation not permitted # echo c 1:3 r devices.allow # cat /dev/null # echo /dev/null bash: /dev/null: Operation not permitted mknod /tmp/null c 1 3 mknod: â/tmp/nullâ: Operation not permitted # echo c 1:3 rw devices.allow # echo /dev/null # cat /dev/null # mknod /tmp/null c 1 3 mknod: â/tmp/nullâ: Operation not permitted # echo c 1:3 rwm devices.allow # echo /dev/null # cat /dev/null # mknod /tmp/null c 1 3 # Note that I didn't rename the functions/variables in this patch, but in the next one to make reviewing easier. Signed-off-by: Aristeu Rozanski a...@redhat.com --- security/device_cgroup.c | 228 +++ 1 file changed, 132 insertions(+), 96 deletions(-) Index: github/security/device_cgroup.c === --- github.orig/security/device_cgroup.c2012-07-24 17:58:42.984081909 -0400 +++ github/security/device_cgroup.c 2012-07-24 17:59:11.324823460 -0400 @@ -96,7 +96,6 @@ return -ENOMEM; } -/* Stupid prototype - don't bother combining existing entries */ /* * called under devcgroup_mutex */ @@ -136,16 +135,13 @@ struct dev_whitelist_item *walk, *tmp; list_for_each_entry_safe(walk, tmp, dev_cgroup-whitelist, list) { - if (walk-type == DEV_ALL) - goto remove; if (walk-type != wh-type) continue; - if (walk-major != ~0 walk-major != wh-major) + if (walk-major != wh-major) continue; - if (walk-minor != ~0 walk-minor != wh-minor) + if (walk-minor != wh-minor) continue; -remove: walk-access = ~wh-access; if (!walk-access) { list_del_rcu(walk-list); @@ -185,19 +181,9 @@ INIT_LIST_HEAD(dev_cgroup-whitelist); parent_cgroup = cgroup-parent; - if (parent_cgroup == NULL) { - struct dev_whitelist_item *wh; - wh = kmalloc(sizeof(*wh), GFP_KERNEL); - if (!wh) { - kfree(dev_cgroup); - return ERR_PTR(-ENOMEM); - } - wh-minor = wh-major = ~0; - wh-type = DEV_ALL; - wh-access = ACC_MASK; + if (parent_cgroup == NULL) dev_cgroup-deny_all = false; - list_add(wh-list, dev_cgroup-whitelist); - } else { + else { parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); mutex_lock(devcgroup_mutex); ret = dev_whitelist_copy(dev_cgroup-whitelist, @@ -268,33 +254,48 @@ char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; rcu_read_lock(); - list_for_each_entry_rcu(wh, devcgroup-whitelist, list) { - set_access(acc, wh-access); - set_majmin(maj, wh-major); - set_majmin(min, wh-minor); - seq_printf(m, %c %s:%s %s\n, type_to_char(wh-type), + /* +* To preserve the compatibility: +* - Only show the all devices when the default policy is to allow +* - List the exceptions in case the default policy is to deny +* This way, the file
[PATCH RESEND 0/4] device_cgroup: replace internally whitelist with exception list
The original model of device_cgroup is having a whitelist where all the allowed devices are listed. The problem with this approach is that is impossible to have the case of allowing everything but few devices. The reason for that lies in the way the whitelist is handled internally: since there's only a whitelist, the all devices entry would have to be removed and replaced by the entire list of possible devices but the ones that are being denied. Since dev_t is 32 bits long, representing the allowed devices as a bitfield is not memory efficient. This patch replaces the whitelist by a exceptions list and the default policy is kept as deny_all variable in dev_cgroup structure. The current interface determines that whenever a is written to devices.allow or devices.deny, the entry masking all devices will be added or removed, respectively. This behavior is kept and it's what will determine the default policy: # cat devices.list a *:* rwm # echo a devices.deny # cat devices.list # echo a devices.allow # cat devices.list a *:* rwm The interface is also preserved. For example, if one wants to block only access to /dev/null: # ls -l /dev/null crw-rw-rw- 1 root root 1, 3 Jul 24 16:17 /dev/null # echo a devices.allow # echo c 1:3 rwm devices.deny # cat /dev/null cat: /dev/null: Operation not permitted # echo /dev/null bash: /dev/null: Operation not permitted mknod /tmp/null c 1 3 mknod: â/tmp/nullâ: Operation not permitted # echo c 1:3 r devices.allow # cat /dev/null # echo /dev/null bash: /dev/null: Operation not permitted mknod /tmp/null c 1 3 mknod: â/tmp/nullâ: Operation not permitted # echo c 1:3 rw devices.allow # echo /dev/null # cat /dev/null # mknod /tmp/null c 1 3 mknod: â/tmp/nullâ: Operation not permitted # echo c 1:3 rwm devices.allow # echo /dev/null # cat /dev/null # mknod /tmp/null c 1 3 # device_cgroup.c | 371 1 file changed, 212 insertions(+), 159 deletions(-) -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[OOPS] 2.4.2-ac20
hi, i got this oops followed by a freeze aris Gnu C egcs-2.91.66 Gnu make 3.79 binutils 2.9.1.0.25 util-linux 2.10l modutils 2.4.1 e2fsprogs 1.18 pcmcia-cs 3.1.16 Linux C Library2.1.3 ldd: version 1.9.9 Procps 2.0.6 Net-tools 1.55 Kbd0.99 Sh-utils 2.0 Modules Loaded serial isa-pnp nsc-ircc irda apm vfat fat eepro100 esssolo1 ksymoops 2.3.7 on i686 2.4.2-ac20. Options used -V (default) -k /proc/ksyms (default) -l /proc/modules (default) -o /lib/modules/2.4.2-ac20/ (default) -m /boot/System.map.2.4.2.ac20 (specified) CPU:0 EIP:0010:[] Using defaults from ksymoops -t elf32-i386 -a i386 EFLAGS: 00010213 eax: ebx: c290270c ecx: c28f2000 edx: 0015 esi: c28f2000 edi: 0246 ebp: c28f236c esp: 0022be64 Process swapper (pid: 0, stackpage=c021b000) Stack: c01770b4 c28f2020 c016ad00 c28f2000 c28f2768 c28f2368 c28f2568 c021bfa4 c0181d00 c1121d00 20181834 c28f276c 0001 0008 3286 c10ccd84 c10ccd84 c021bedc 0082 c10ccd58 c10ccd80 Call Trace: [] [c016ad00>] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] [] Code: 0f 0b b9 80 a5 20 c0 ff 0d 80 a5 20 c0 0f 88 6e 76 0c 00 c7 >>EIP; c011645b<= Trace; c01770b4 Trace; c0119c60 <__run_task_queue+50/60> Trace; c011c0ee Trace; c0119b7f Trace; c0119ac0 Trace; c01199ca Trace; c010a175 Trace; c0107160 Trace; c0108e50 Trace; c0107160 Trace; c0107160 Trace; c0100018 Trace; c0107180 Trace; c01071e2 Trace; c0105000 Trace; c0100192 Code; c011645b <_EIP>: Code; c011645b<= 0: 0f 0b ud2a <= Code; c011645d 2: b9 80 a5 20 c0movl $0xc020a580,%ecx Code; c0116462 7: ff 0d 80 a5 20 c0 decl 0xc020a580 Code; c0116468 d: 0f 88 6e 76 0c 00 js c7681 <_EIP+0xc7681> c01ddadc Code; c011646e 13: c7 00 00 00 00 00 movl $0x0,(%eax)
[OOPS] 2.4.2-ac20
hi, i got this oops followed by a freeze aris Gnu C egcs-2.91.66 Gnu make 3.79 binutils 2.9.1.0.25 util-linux 2.10l modutils 2.4.1 e2fsprogs 1.18 pcmcia-cs 3.1.16 Linux C Library2.1.3 ldd: version 1.9.9 Procps 2.0.6 Net-tools 1.55 Kbd0.99 Sh-utils 2.0 Modules Loaded serial isa-pnp nsc-ircc irda apm vfat fat eepro100 esssolo1 ksymoops 2.3.7 on i686 2.4.2-ac20. Options used -V (default) -k /proc/ksyms (default) -l /proc/modules (default) -o /lib/modules/2.4.2-ac20/ (default) -m /boot/System.map.2.4.2.ac20 (specified) CPU:0 EIP:0010:[c011645b] Using defaults from ksymoops -t elf32-i386 -a i386 EFLAGS: 00010213 eax: ebx: c290270c ecx: c28f2000 edx: 0015 esi: c28f2000 edi: 0246 ebp: c28f236c esp: 0022be64 Process swapper (pid: 0, stackpage=c021b000) Stack: c01770b4 c28f2020 c016ad00 c28f2000 c28f2768 c28f2368 c28f2568 c021bfa4 c0181d00 c1121d00 20181834 c28f276c 0001 0008 3286 c10ccd84 c10ccd84 c021bedc 0082 c10ccd58 c10ccd80 Call Trace: [c01770b4] [c016ad00] [c0181d00] [c016dfa8] [c016933b] [c0119c60] [c011c0ee] [c0119b7f] [c0119ac0] [c01199ca] [c010a175] [c0107160] [c0108e50] [c0107160] [c0107160] [c0100018] [c0107180] [c01071e2] [c0105000] [c0100192] Code: 0f 0b b9 80 a5 20 c0 ff 0d 80 a5 20 c0 0f 88 6e 76 0c 00 c7 EIP; c011645b acquire_console_sem+f/30 = Trace; c01770b4 con_flush_chars+10/24 Trace; c0119c60 __run_task_queue+50/60 Trace; c011c0ee tqueue_bh+16/1c Trace; c0119b7f bh_action+1b/64 Trace; c0119ac0 tasklet_hi_action+38/5c Trace; c01199ca do_softirq+4a/6c Trace; c010a175 do_IRQ+a1/b4 Trace; c0107160 default_idle+0/28 Trace; c0108e50 ret_from_intr+0/20 Trace; c0107160 default_idle+0/28 Trace; c0107160 default_idle+0/28 Trace; c0100018 startup_32+18/13a Trace; c0107180 default_idle+20/28 Trace; c01071e2 cpu_idle+3a/50 Trace; c0105000 empty_bad_page+0/1000 Trace; c0100192 L6+0/2 Code; c011645b acquire_console_sem+f/30 _EIP: Code; c011645b acquire_console_sem+f/30 = 0: 0f 0b ud2a = Code; c011645d acquire_console_sem+11/30 2: b9 80 a5 20 c0movl $0xc020a580,%ecx Code; c0116462 acquire_console_sem+16/30 7: ff 0d 80 a5 20 c0 decl 0xc020a580 Code; c0116468 acquire_console_sem+1c/30 d: 0f 88 6e 76 0c 00 js c7681 _EIP+0xc7681 c01ddadc stext_lock+18c/10e4 Code; c011646e acquire_console_sem+22/30 13: c7 00 00 00 00 00 movl $0x0,(%eax)
[PATCH][2.2] eepro 0.12d
ok, as i don't have documentation this is the right thing to be done: restore the default path for old cards and keep the new one to these blue cards. i hope this finally fixes all problems that my changes (by guesses and lot of dosemu) introduced on a stable driver. if it doesn't work for you please email me. i won't rest until i restore the stability of the old boards and make the blue one supported. P.S.: (again) if you have any documentation about these boards please send to me. it will make my work easy and more decent than guesses by sniffing io. --- aristeu sergio rozanski filho | www.cathedrallabs.org/~aris [EMAIL PROTECTED]| [EMAIL PROTECTED] --- --- linux/drivers/net/eepro.c.old Wed Dec 27 21:44:29 2000 +++ linux/drivers/net/eepro.c Wed Dec 27 22:17:54 2000 @@ -23,6 +23,8 @@ This is a compatibility hardware problem. Versions: + 0.12d tottaly isolated old code to new code (blue cards). + (aris, 12/27/2000) 0.12c fixed other multiple cards bug and other cleanups (aris, 08/21/2000) 0.12b added reset when the tx interrupt is called and TX isn't done @@ -103,7 +105,7 @@ */ static const char *version = - "eepro.c: v0.12b 06/20/2000 [EMAIL PROTECTED]\n"; + "eepro.c: v0.12d 12/27/2000 [EMAIL PROTECTED]\n"; #include @@ -149,47 +151,28 @@ #include #include - #include -/* For linux 2.1.xx */ -#if defined (LINUX_VERSION_CODE) && LINUX_VERSION_CODE > 0x20155 - #include #include #include -#define compat_dev_kfree_skb( skb, mode ) dev_kfree_skb( (skb) ) /* I had reports of looong delays with SLOW_DOWN defined as udelay(2) */ #define SLOW_DOWN inb(0x80) -/* udelay(2) */ -#define compat_init_func(X) __initfunc(X) -#define compat_init_data __initdata - -#else -/* for 2.x */ - -#define compat_dev_kfree_skb( skb, mode ) dev_kfree_skb( (skb), (mode) ) -#define test_and_set_bit(a,b) set_bit((a),(b)) -#define SLOW_DOWN SLOW_DOWN_IO -#define compat_init_func(X) X -#define compat_init_data - -#endif - -/* First, a few definitions that the brave might change. */ -/* A zero-terminated list of I/O addresses to be probed. */ -static unsigned int eepro_portlist[] compat_init_data = +/* First, a few definitions that the brave might change. + * A zero-terminated list of I/O addresses to be probed. + * note: 0x300 is default, the 595FX supports ALL IO Ports + * from 0x000 to 0x3F0, some of which are reserved in PCs + */ +static unsigned eepro_portlist[] __initdata = { 0x300, 0x210, 0x240, 0x280, 0x2C0, 0x200, 0x320, 0x340, 0x360, 0}; -/* note: 0x300 is default, the 595FX supports ALL IO Ports - from 0x000 to 0x3F0, some of which are reserved in PCs */ /* use 0 for production, 1 for verification, >2 for debug */ #ifndef NET_DEBUG #define NET_DEBUG 0 #endif -static unsigned int net_debug = NET_DEBUG; +static unsigned net_debug = NET_DEBUG; /* The number of low I/O ports used by the ethercard. */ #define EEPRO_IO_EXTENT16 @@ -204,16 +187,17 @@ struct eepro_local { struct enet_statistics stats; unsigned rx_start; - unsigned tx_start; /* start of the transmit chain */ - int tx_last; /* pointer to last packet in the transmit chain */ - unsigned tx_end; /* end of the transmit chain (plus 1) */ - int eepro; /* 1 for the EtherExpress Pro/10, - 2 for the EtherExpress Pro/10+, - 0 for other 82595-based lan cards. */ - int version;/* a flag to indicate if this is a TX or FX - version of the 82595 chip. */ + unsigned tx_start; /* start of the transmit chain */ + int tx_last;/* pointer to last packet in the transmit chain */ + unsigned tx_end;/* end of the transmit chain (plus 1) */ + int eepro; /* 1 for the EtherExpress Pro/10, +* 2 for the EtherExpress Pro/10+, +* 3 for the blue cards, +* 0 for other 82595-based lan cards. */ + int version;/* a flag to indicate if this is a TX or FX +* version of the 82595 chip. */ int stepping; - spinlock_t lock; /* Serializing lock */ + spinlock_t lock;/* Serializing lock */ unsigned rcv_ram; unsigned rcv_start; unsigned xmt_bar; @@ -322,7 +306,7 @@ static void set_multicast_list(struct device *dev); static int read_eeprom(int ioaddr, int location, struct device *dev); -static void hardware_send_packet(struct device *dev, void *buf, short length); +static i
[PATCH][2.4] eepro 0.12c
hi linus, driver: eepro problem: the actual state of driver makes old supported board stop to function after some time of operation. please consider applying this patch. the cleanup and cosmetic changes will be in the next release of driver as you asked for. --- aristeu sergio rozanski filho | www.cathedrallabs.org/~aris [EMAIL PROTECTED]| [EMAIL PROTECTED] --- --- linux/drivers/net/eepro.c.old Tue Dec 5 20:29:38 2000 +++ linux/drivers/net/eepro.c Mon Jan 8 22:56:44 2001 @@ -23,6 +23,7 @@ This is a compatibility hardware problem. Versions: + 0.12c fixing some problems with old cards (aris, 01/08/2001) 0.12b misc fixes (aris, 06/26/2000) 0.12a port of version 0.12a of 2.2.x kernels to 2.3.x (aris ([EMAIL PROTECTED]), 05/19/2000) @@ -96,7 +97,7 @@ */ static const char *version = - "eepro.c: v0.12b 04/26/2000 [EMAIL PROTECTED]\n"; + "eepro.c: v0.12c 01/08/2000 [EMAIL PROTECTED]\n"; #include @@ -501,8 +502,10 @@ /* set diagnose flag */ #define eepro_diag(ioaddr) outb(DIAGNOSE_CMD, ioaddr) +#ifdef ANSWER_TX_AND_RX/* experimental way of handling interrupts */ /* ack for rx/tx int */ #define eepro_ack_rxtx(ioaddr) outb (RX_INT | TX_INT, ioaddr + STATUS_REG) +#endif /* ack for rx int */ #define eepro_ack_rx(ioaddr) outb (RX_INT, ioaddr + STATUS_REG) @@ -1067,6 +1070,8 @@ } eepro_sel_reset(ioaddr); + SLOW_DOWN; + SLOW_DOWN; lp->tx_start = lp->tx_end = XMT_LOWER_LIMIT << 8; lp->tx_last = 0; @@ -1162,9 +1167,11 @@ while (((status = inb(ioaddr + STATUS_REG)) & 0x06) && (boguscount--)) { switch (status & (RX_INT | TX_INT)) { +#ifdef ANSWER_TX_AND_RX case (RX_INT | TX_INT): eepro_ack_rxtx(ioaddr); break; +#endif case RX_INT: eepro_ack_rx(ioaddr); break; @@ -1178,6 +1185,9 @@ /* Get the received packets */ eepro_rx(dev); +#ifndef ANSWER_TX_AND_RX + continue; +#endif } if (status & TX_INT) { if (net_debug > 4) @@ -1367,7 +1377,11 @@ /* Re-enable RX and TX interrupts */ eepro_en_int(ioaddr); } - eepro_complete_selreset(ioaddr); + if (lp->eepro == LAN595FX_10ISA) { + eepro_complete_selreset(ioaddr); + } + else + eepro_en_rx(ioaddr); } /* The horrible routine to read a word from the serial EEPROM. */ @@ -1535,7 +1549,9 @@ printk(KERN_DEBUG "%s: exiting hardware_send_packet routine.\n", dev->name); return; } - netif_stop_queue(dev); + if (lp->eepro == LAN595FX_10ISA) + netif_stop_queue(dev); + if (net_debug > 5) printk(KERN_DEBUG "%s: exiting hardware_send_packet routine.\n", dev->name); } @@ -1654,9 +1670,13 @@ xmt_status = inw(ioaddr+IO_PORT); if ((xmt_status & TX_DONE_BIT) == 0) { - udelay(40); - boguscount--; - continue; + if (lp->eepro == LAN595FX_10ISA) { + udelay(40); + boguscount--; + continue; + } + else + break; } xmt_status = inw(ioaddr+IO_PORT); @@ -1723,7 +1743,7 @@ * interrupt again for tx. in other words: tx timeout what will take * a lot of time to happen, so we'll do a complete selreset. */ - if (!boguscount) + if (!boguscount && lp->eepro == LAN595FX_10ISA) eepro_complete_selreset(ioaddr); }
[PATCH][2.4] eepro 0.12c
hi linus, driver: eepro problem: the actual state of driver makes old supported board stop to function after some time of operation. please consider applying this patch. the cleanup and cosmetic changes will be in the next release of driver as you asked for. --- aristeu sergio rozanski filho | www.cathedrallabs.org/~aris [EMAIL PROTECTED]| [EMAIL PROTECTED] --- --- linux/drivers/net/eepro.c.old Tue Dec 5 20:29:38 2000 +++ linux/drivers/net/eepro.c Mon Jan 8 22:56:44 2001 @@ -23,6 +23,7 @@ This is a compatibility hardware problem. Versions: + 0.12c fixing some problems with old cards (aris, 01/08/2001) 0.12b misc fixes (aris, 06/26/2000) 0.12a port of version 0.12a of 2.2.x kernels to 2.3.x (aris ([EMAIL PROTECTED]), 05/19/2000) @@ -96,7 +97,7 @@ */ static const char *version = - "eepro.c: v0.12b 04/26/2000 [EMAIL PROTECTED]\n"; + "eepro.c: v0.12c 01/08/2000 [EMAIL PROTECTED]\n"; #include linux/module.h @@ -501,8 +502,10 @@ /* set diagnose flag */ #define eepro_diag(ioaddr) outb(DIAGNOSE_CMD, ioaddr) +#ifdef ANSWER_TX_AND_RX/* experimental way of handling interrupts */ /* ack for rx/tx int */ #define eepro_ack_rxtx(ioaddr) outb (RX_INT | TX_INT, ioaddr + STATUS_REG) +#endif /* ack for rx int */ #define eepro_ack_rx(ioaddr) outb (RX_INT, ioaddr + STATUS_REG) @@ -1067,6 +1070,8 @@ } eepro_sel_reset(ioaddr); + SLOW_DOWN; + SLOW_DOWN; lp-tx_start = lp-tx_end = XMT_LOWER_LIMIT 8; lp-tx_last = 0; @@ -1162,9 +1167,11 @@ while (((status = inb(ioaddr + STATUS_REG)) 0x06) (boguscount--)) { switch (status (RX_INT | TX_INT)) { +#ifdef ANSWER_TX_AND_RX case (RX_INT | TX_INT): eepro_ack_rxtx(ioaddr); break; +#endif case RX_INT: eepro_ack_rx(ioaddr); break; @@ -1178,6 +1185,9 @@ /* Get the received packets */ eepro_rx(dev); +#ifndef ANSWER_TX_AND_RX + continue; +#endif } if (status TX_INT) { if (net_debug 4) @@ -1367,7 +1377,11 @@ /* Re-enable RX and TX interrupts */ eepro_en_int(ioaddr); } - eepro_complete_selreset(ioaddr); + if (lp-eepro == LAN595FX_10ISA) { + eepro_complete_selreset(ioaddr); + } + else + eepro_en_rx(ioaddr); } /* The horrible routine to read a word from the serial EEPROM. */ @@ -1535,7 +1549,9 @@ printk(KERN_DEBUG "%s: exiting hardware_send_packet routine.\n", dev-name); return; } - netif_stop_queue(dev); + if (lp-eepro == LAN595FX_10ISA) + netif_stop_queue(dev); + if (net_debug 5) printk(KERN_DEBUG "%s: exiting hardware_send_packet routine.\n", dev-name); } @@ -1654,9 +1670,13 @@ xmt_status = inw(ioaddr+IO_PORT); if ((xmt_status TX_DONE_BIT) == 0) { - udelay(40); - boguscount--; - continue; + if (lp-eepro == LAN595FX_10ISA) { + udelay(40); + boguscount--; + continue; + } + else + break; } xmt_status = inw(ioaddr+IO_PORT); @@ -1723,7 +1743,7 @@ * interrupt again for tx. in other words: tx timeout what will take * a lot of time to happen, so we'll do a complete selreset. */ - if (!boguscount) + if (!boguscount lp-eepro == LAN595FX_10ISA) eepro_complete_selreset(ioaddr); }
[PATCH][2.2] eepro 0.12d
ok, as i don't have documentation this is the right thing to be done: restore the default path for old cards and keep the new one to these blue cards. i hope this finally fixes all problems that my changes (by guesses and lot of dosemu) introduced on a stable driver. if it doesn't work for you please email me. i won't rest until i restore the stability of the old boards and make the blue one supported. P.S.: (again) if you have any documentation about these boards please send to me. it will make my work easy and more decent than guesses by sniffing io. --- aristeu sergio rozanski filho | www.cathedrallabs.org/~aris [EMAIL PROTECTED]| [EMAIL PROTECTED] --- --- linux/drivers/net/eepro.c.old Wed Dec 27 21:44:29 2000 +++ linux/drivers/net/eepro.c Wed Dec 27 22:17:54 2000 @@ -23,6 +23,8 @@ This is a compatibility hardware problem. Versions: + 0.12d tottaly isolated old code to new code (blue cards). + (aris, 12/27/2000) 0.12c fixed other multiple cards bug and other cleanups (aris, 08/21/2000) 0.12b added reset when the tx interrupt is called and TX isn't done @@ -103,7 +105,7 @@ */ static const char *version = - "eepro.c: v0.12b 06/20/2000 [EMAIL PROTECTED]\n"; + "eepro.c: v0.12d 12/27/2000 [EMAIL PROTECTED]\n"; #include linux/module.h @@ -149,47 +151,28 @@ #include linux/etherdevice.h #include linux/skbuff.h - #include linux/version.h -/* For linux 2.1.xx */ -#if defined (LINUX_VERSION_CODE) LINUX_VERSION_CODE 0x20155 - #include asm/spinlock.h #include linux/init.h #include linux/delay.h -#define compat_dev_kfree_skb( skb, mode ) dev_kfree_skb( (skb) ) /* I had reports of looong delays with SLOW_DOWN defined as udelay(2) */ #define SLOW_DOWN inb(0x80) -/* udelay(2) */ -#define compat_init_func(X) __initfunc(X) -#define compat_init_data __initdata - -#else -/* for 2.x */ - -#define compat_dev_kfree_skb( skb, mode ) dev_kfree_skb( (skb), (mode) ) -#define test_and_set_bit(a,b) set_bit((a),(b)) -#define SLOW_DOWN SLOW_DOWN_IO -#define compat_init_func(X) X -#define compat_init_data - -#endif - -/* First, a few definitions that the brave might change. */ -/* A zero-terminated list of I/O addresses to be probed. */ -static unsigned int eepro_portlist[] compat_init_data = +/* First, a few definitions that the brave might change. + * A zero-terminated list of I/O addresses to be probed. + * note: 0x300 is default, the 595FX supports ALL IO Ports + * from 0x000 to 0x3F0, some of which are reserved in PCs + */ +static unsigned eepro_portlist[] __initdata = { 0x300, 0x210, 0x240, 0x280, 0x2C0, 0x200, 0x320, 0x340, 0x360, 0}; -/* note: 0x300 is default, the 595FX supports ALL IO Ports - from 0x000 to 0x3F0, some of which are reserved in PCs */ /* use 0 for production, 1 for verification, 2 for debug */ #ifndef NET_DEBUG #define NET_DEBUG 0 #endif -static unsigned int net_debug = NET_DEBUG; +static unsigned net_debug = NET_DEBUG; /* The number of low I/O ports used by the ethercard. */ #define EEPRO_IO_EXTENT16 @@ -204,16 +187,17 @@ struct eepro_local { struct enet_statistics stats; unsigned rx_start; - unsigned tx_start; /* start of the transmit chain */ - int tx_last; /* pointer to last packet in the transmit chain */ - unsigned tx_end; /* end of the transmit chain (plus 1) */ - int eepro; /* 1 for the EtherExpress Pro/10, - 2 for the EtherExpress Pro/10+, - 0 for other 82595-based lan cards. */ - int version;/* a flag to indicate if this is a TX or FX - version of the 82595 chip. */ + unsigned tx_start; /* start of the transmit chain */ + int tx_last;/* pointer to last packet in the transmit chain */ + unsigned tx_end;/* end of the transmit chain (plus 1) */ + int eepro; /* 1 for the EtherExpress Pro/10, +* 2 for the EtherExpress Pro/10+, +* 3 for the blue cards, +* 0 for other 82595-based lan cards. */ + int version;/* a flag to indicate if this is a TX or FX +* version of the 82595 chip. */ int stepping; - spinlock_t lock; /* Serializing lock */ + spinlock_t lock;/* Serializing lock */ unsigned rcv_ram; unsigned rcv_start; unsigned xmt_bar; @@ -322,7 +306,7 @@ static void set_multicast_list(struct device *dev); static int read_eeprom(int ioaddr, int location, struct device
[PATCH] remove warnings from drivers/net/eepro.c (240-test12-pre7)(fwd)
hi, as my patches for eepro are getting late, here is a patch from Rasmus that i would apply with mine, please apply -- Aris --- Aristeu Sergio Rozanski Filho [EMAIL PROTECTED] --- -- --- linux-240-t12-pre7-clean/drivers/net/eepro.cFri Dec 8 00:44:58 2000 +++ linux/drivers/net/eepro.c Fri Dec 8 21:02:50 2000 @@ -1727,6 +1727,8 @@ eepro_complete_selreset(ioaddr); } +#ifdef MODULE + #define MAX_EEPRO 8 static struct net_device dev_eepro[MAX_EEPRO]; @@ -1737,7 +1739,7 @@ }; static int autodetect; -static int n_eepro = 0; +static int n_eepro; /* For linux 2.1.xx */ MODULE_AUTHOR("Pascal Dupuis <[EMAIL PROTECTED]> for the 2.1 stuff (locking,...)"); @@ -1746,8 +1748,6 @@ MODULE_PARM(irq, "1-" __MODULE_STRING(MAX_EEPRO) "i"); MODULE_PARM(mem, "1-" __MODULE_STRING(MAX_EEPRO) "i"); MODULE_PARM(autodetect, "1-" __MODULE_STRING(1) "i"); - -#ifdef MODULE int init_module(void) - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] Please read the FAQ at http://www.tux.org/lkml/
[PATCH] remove warnings from drivers/net/eepro.c (240-test12-pre7)(fwd)
hi, as my patches for eepro are getting late, here is a patch from Rasmus that i would apply with mine, please apply -- Aris --- Aristeu Sergio Rozanski Filho [EMAIL PROTECTED] --- -- --- linux-240-t12-pre7-clean/drivers/net/eepro.cFri Dec 8 00:44:58 2000 +++ linux/drivers/net/eepro.c Fri Dec 8 21:02:50 2000 @@ -1727,6 +1727,8 @@ eepro_complete_selreset(ioaddr); } +#ifdef MODULE + #define MAX_EEPRO 8 static struct net_device dev_eepro[MAX_EEPRO]; @@ -1737,7 +1739,7 @@ }; static int autodetect; -static int n_eepro = 0; +static int n_eepro; /* For linux 2.1.xx */ MODULE_AUTHOR("Pascal Dupuis [EMAIL PROTECTED] for the 2.1 stuff (locking,...)"); @@ -1746,8 +1748,6 @@ MODULE_PARM(irq, "1-" __MODULE_STRING(MAX_EEPRO) "i"); MODULE_PARM(mem, "1-" __MODULE_STRING(MAX_EEPRO) "i"); MODULE_PARM(autodetect, "1-" __MODULE_STRING(1) "i"); - -#ifdef MODULE int init_module(void) - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] eepro 0.12c
hi, done, thanks Jes! On 26 Sep 2000, Jes Sorensen wrote: > >>>>> "aris" == aris <[EMAIL PROTECTED]> writes: > > aris> hi, misc fixes on eepro driver, please apply > > aris> @@ -212,6 +214,12 @@ > aris> version of the 82595 chip. */ > aris> int stepping; > aris> spinlock_t lock; /* Serializing lock */ > aris> + unsigned rcv_ram; > aris> + unsigned rcv_start; > aris> + unsigned xmt_bar; > aris> + unsigned xmt_lower_limit_reg; > aris> + unsigned xmt_upper_limit_reg; > aris> + unsigned eeprom_reg; > aris> }; > > Please don't use unsigned without specifying the size, use either > unsigned int or unsigned long. > > Cheers, > Jes > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to [EMAIL PROTECTED] > Please read the FAQ at http://www.tux.org/lkml/ > -- Aris --- Aristeu Sergio Rozanski Filho [EMAIL PROTECTED] --- --- linux/drivers/net/eepro.c.old Tue Sep 12 15:41:50 2000 +++ linux/drivers/net/eepro.c Tue Sep 26 12:46:27 2000 @@ -23,6 +23,8 @@ This is a compatibility hardware problem. Versions: + 0.12c fixed other multiple cards bug and other cleanups + (aris, 08/21/2000) 0.12b added reset when the tx interrupt is called and TX isn't done and other minor fixes. this may fix a problem found after initialization that delays tx until a transmit timeout is @@ -201,10 +203,10 @@ /* Information that need to be kept for each board. */ struct eepro_local { struct enet_statistics stats; - unsigned rx_start; - unsigned tx_start; /* start of the transmit chain */ + unsigned int rx_start; + unsigned int tx_start; /* start of the transmit chain */ int tx_last; /* pointer to last packet in the transmit chain */ - unsigned tx_end; /* end of the transmit chain (plus 1) */ + unsigned int tx_end; /* end of the transmit chain (plus 1) */ int eepro; /* 1 for the EtherExpress Pro/10, 2 for the EtherExpress Pro/10+, 0 for other 82595-based lan cards. */ @@ -212,6 +214,12 @@ version of the 82595 chip. */ int stepping; spinlock_t lock; /* Serializing lock */ + unsigned int rcv_ram; + unsigned int rcv_start; + unsigned int xmt_bar; + unsigned int xmt_lower_limit_reg; + unsigned int xmt_upper_limit_reg; + unsigned int eeprom_reg; }; /* The station (ethernet) address prefix, used for IDing the board. */ @@ -356,24 +364,20 @@ #defineRCV_HEADER 8 #define RCV_DEFAULT_RAM0x6000 -#define RCV_RAMrcv_ram - -static unsigned rcv_ram = RCV_DEFAULT_RAM; +#define RCV_RAMlp->rcv_ram #define XMT_HEADER 8 #define XMT_RAM(RAM_SIZE - RCV_RAM) -#define XMT_START ((rcv_start + RCV_RAM) % RAM_SIZE) +#define XMT_START ((lp->rcv_start + RCV_RAM) % RAM_SIZE) -#define RCV_LOWER_LIMIT(rcv_start >> 8) -#define RCV_UPPER_LIMIT(((rcv_start + RCV_RAM) - 2) >> 8) +#define RCV_LOWER_LIMIT(lp->rcv_start >> 8) +#define RCV_UPPER_LIMIT(((lp->rcv_start + RCV_RAM) - 2) >> 8) #define XMT_LOWER_LIMIT(XMT_START >> 8) #define XMT_UPPER_LIMIT(((XMT_START + XMT_RAM) - 2) >> 8) #define RCV_START_PRO 0x00 #define RCV_START_10 XMT_RAM - /* by default the old driver */ -static unsigned rcv_start = RCV_START_PRO; #defineRCV_DONE0x0008 #defineRX_OK 0x2000 @@ -422,7 +426,6 @@ #defineXMT_BAR_PRO 0x0a #defineXMT_BAR_10 0x0b -static unsigned xmt_bar = XMT_BAR_PRO; #defineHOST_ADDRESS_REG0x0c #defineIO_PORT 0x0e @@ -440,8 +443,6 @@ #defineXMT_UPPER_LIMIT_REG_PRO 0x0b #defineXMT_LOWER_LIMIT_REG_10 0x0b #defineXMT_UPPER_LIMIT_REG_10 0x0a -static unsigned xmt_lower_limit_reg = XMT_LOWER_LIMIT_REG_PRO; -static unsigned xmt_upper_limit_reg = XMT_UPPER_LIMIT_REG_PRO; /* Bank 2 registers */ #defineXMT_Chain_Int 0x20/* Interrupt at the end of the transmit chain */ @@ -466,7 +467,6 @@ #define EEPROM_REG_PRO 0x0a #define EEPROM_REG_10 0x0b -static unsign
[PATCH] eepro 0.12c
hi, misc fixes on eepro driver, please apply -- Aris --- Aristeu Sergio Rozanski Filho [EMAIL PROTECTED] --- --- linux/drivers/net/eepro.c.old Tue Sep 12 15:41:50 2000 +++ linux/drivers/net/eepro.c Wed Sep 13 15:28:04 2000 @@ -23,6 +23,8 @@ This is a compatibility hardware problem. Versions: + 0.12c fixed other multiple cards bug and other cleanups + (aris, 08/21/2000) 0.12b added reset when the tx interrupt is called and TX isn't done and other minor fixes. this may fix a problem found after initialization that delays tx until a transmit timeout is @@ -212,6 +214,12 @@ version of the 82595 chip. */ int stepping; spinlock_t lock; /* Serializing lock */ + unsigned rcv_ram; + unsigned rcv_start; + unsigned xmt_bar; + unsigned xmt_lower_limit_reg; + unsigned xmt_upper_limit_reg; + unsigned eeprom_reg; }; /* The station (ethernet) address prefix, used for IDing the board. */ @@ -356,24 +364,20 @@ #defineRCV_HEADER 8 #define RCV_DEFAULT_RAM0x6000 -#define RCV_RAMrcv_ram - -static unsigned rcv_ram = RCV_DEFAULT_RAM; +#define RCV_RAMlp->rcv_ram #define XMT_HEADER 8 #define XMT_RAM(RAM_SIZE - RCV_RAM) -#define XMT_START ((rcv_start + RCV_RAM) % RAM_SIZE) +#define XMT_START ((lp->rcv_start + RCV_RAM) % RAM_SIZE) -#define RCV_LOWER_LIMIT(rcv_start >> 8) -#define RCV_UPPER_LIMIT(((rcv_start + RCV_RAM) - 2) >> 8) +#define RCV_LOWER_LIMIT(lp->rcv_start >> 8) +#define RCV_UPPER_LIMIT(((lp->rcv_start + RCV_RAM) - 2) >> 8) #define XMT_LOWER_LIMIT(XMT_START >> 8) #define XMT_UPPER_LIMIT(((XMT_START + XMT_RAM) - 2) >> 8) #define RCV_START_PRO 0x00 #define RCV_START_10 XMT_RAM - /* by default the old driver */ -static unsigned rcv_start = RCV_START_PRO; #defineRCV_DONE0x0008 #defineRX_OK 0x2000 @@ -422,7 +426,6 @@ #defineXMT_BAR_PRO 0x0a #defineXMT_BAR_10 0x0b -static unsigned xmt_bar = XMT_BAR_PRO; #defineHOST_ADDRESS_REG0x0c #defineIO_PORT 0x0e @@ -440,8 +443,6 @@ #defineXMT_UPPER_LIMIT_REG_PRO 0x0b #defineXMT_LOWER_LIMIT_REG_10 0x0b #defineXMT_UPPER_LIMIT_REG_10 0x0a -static unsigned xmt_lower_limit_reg = XMT_LOWER_LIMIT_REG_PRO; -static unsigned xmt_upper_limit_reg = XMT_UPPER_LIMIT_REG_PRO; /* Bank 2 registers */ #defineXMT_Chain_Int 0x20/* Interrupt at the end of the transmit chain */ @@ -466,7 +467,6 @@ #define EEPROM_REG_PRO 0x0a #define EEPROM_REG_10 0x0b -static unsigned eeprom_reg = EEPROM_REG_PRO; #define EESK 0x01 #define EECS 0x02 @@ -528,7 +528,8 @@ #define eepro_ack_tx(ioaddr) outb (TX_INT, ioaddr + STATUS_REG) /* a complete sel reset */ -#define eepro_complete_selreset(ioaddr) { eepro_dis_int(ioaddr);\ +#define eepro_complete_selreset(ioaddr) { \ + /* eepro_dis_int(ioaddr); */ \ lp->stats.tx_errors++;\ eepro_sel_reset(ioaddr);\ lp->tx_end = \ @@ -537,7 +538,7 @@ lp->tx_last = 0;\ dev->tbusy=0;\ dev->trans_start = jiffies;\ - eepro_en_int(ioaddr);\ + /*eepro_en_int(ioaddr); */ \ eepro_en_rx(ioaddr);\ } @@ -670,7 +671,15 @@ lp = (struct eepro_local *)dev->priv; - /* Now, get the ethernet hardware address from + /* default values */ + lp->rcv_start = RCV_START_PRO; + lp->xmt_bar = XMT_BAR_PRO; + lp->xmt_lower_limit_reg = XMT_LOWER_LIMIT_REG_PRO; + lp->xmt_upper_limit_reg = XMT_UPPER_LIMIT_REG_PRO; + lp->eeprom_reg = EEPROM_REG_PRO; + lp->rcv_ram = RCV_DEFAULT_RAM; + + /* Now, get the ethernet hardware address from
Re: [PATCH] eepro 0.12c
hi, done, thanks Jes! On 26 Sep 2000, Jes Sorensen wrote: "aris" == aris [EMAIL PROTECTED] writes: aris hi, misc fixes on eepro driver, please apply aris @@ -212,6 +214,12 @@ arisversion of the 82595 chip. */ aris int stepping; aris spinlock_t lock; /* Serializing lock */ aris + unsigned rcv_ram; aris + unsigned rcv_start; aris + unsigned xmt_bar; aris + unsigned xmt_lower_limit_reg; aris + unsigned xmt_upper_limit_reg; aris + unsigned eeprom_reg; aris }; Please don't use unsigned without specifying the size, use either unsigned int or unsigned long. Cheers, Jes - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] Please read the FAQ at http://www.tux.org/lkml/ -- Aris --- Aristeu Sergio Rozanski Filho [EMAIL PROTECTED] --- --- linux/drivers/net/eepro.c.old Tue Sep 12 15:41:50 2000 +++ linux/drivers/net/eepro.c Tue Sep 26 12:46:27 2000 @@ -23,6 +23,8 @@ This is a compatibility hardware problem. Versions: + 0.12c fixed other multiple cards bug and other cleanups + (aris, 08/21/2000) 0.12b added reset when the tx interrupt is called and TX isn't done and other minor fixes. this may fix a problem found after initialization that delays tx until a transmit timeout is @@ -201,10 +203,10 @@ /* Information that need to be kept for each board. */ struct eepro_local { struct enet_statistics stats; - unsigned rx_start; - unsigned tx_start; /* start of the transmit chain */ + unsigned int rx_start; + unsigned int tx_start; /* start of the transmit chain */ int tx_last; /* pointer to last packet in the transmit chain */ - unsigned tx_end; /* end of the transmit chain (plus 1) */ + unsigned int tx_end; /* end of the transmit chain (plus 1) */ int eepro; /* 1 for the EtherExpress Pro/10, 2 for the EtherExpress Pro/10+, 0 for other 82595-based lan cards. */ @@ -212,6 +214,12 @@ version of the 82595 chip. */ int stepping; spinlock_t lock; /* Serializing lock */ + unsigned int rcv_ram; + unsigned int rcv_start; + unsigned int xmt_bar; + unsigned int xmt_lower_limit_reg; + unsigned int xmt_upper_limit_reg; + unsigned int eeprom_reg; }; /* The station (ethernet) address prefix, used for IDing the board. */ @@ -356,24 +364,20 @@ #defineRCV_HEADER 8 #define RCV_DEFAULT_RAM0x6000 -#define RCV_RAMrcv_ram - -static unsigned rcv_ram = RCV_DEFAULT_RAM; +#define RCV_RAMlp-rcv_ram #define XMT_HEADER 8 #define XMT_RAM(RAM_SIZE - RCV_RAM) -#define XMT_START ((rcv_start + RCV_RAM) % RAM_SIZE) +#define XMT_START ((lp-rcv_start + RCV_RAM) % RAM_SIZE) -#define RCV_LOWER_LIMIT(rcv_start 8) -#define RCV_UPPER_LIMIT(((rcv_start + RCV_RAM) - 2) 8) +#define RCV_LOWER_LIMIT(lp-rcv_start 8) +#define RCV_UPPER_LIMIT(((lp-rcv_start + RCV_RAM) - 2) 8) #define XMT_LOWER_LIMIT(XMT_START 8) #define XMT_UPPER_LIMIT(((XMT_START + XMT_RAM) - 2) 8) #define RCV_START_PRO 0x00 #define RCV_START_10 XMT_RAM - /* by default the old driver */ -static unsigned rcv_start = RCV_START_PRO; #defineRCV_DONE0x0008 #defineRX_OK 0x2000 @@ -422,7 +426,6 @@ #defineXMT_BAR_PRO 0x0a #defineXMT_BAR_10 0x0b -static unsigned xmt_bar = XMT_BAR_PRO; #defineHOST_ADDRESS_REG0x0c #defineIO_PORT 0x0e @@ -440,8 +443,6 @@ #defineXMT_UPPER_LIMIT_REG_PRO 0x0b #defineXMT_LOWER_LIMIT_REG_10 0x0b #defineXMT_UPPER_LIMIT_REG_10 0x0a -static unsigned xmt_lower_limit_reg = XMT_LOWER_LIMIT_REG_PRO; -static unsigned xmt_upper_limit_reg = XMT_UPPER_LIMIT_REG_PRO; /* Bank 2 registers */ #defineXMT_Chain_Int 0x20/* Interrupt at the end of the transmit chain */ @@ -466,7 +467,6 @@ #define EEPROM_REG_PRO 0x0a #define EEPROM_REG_10 0x0b -static unsigned eeprom_reg = EEPROM_REG_PRO; #define EESK 0x01 #define EECS 0x02 @@ -528,7 +528,8 @@ #define eepro_ack_tx(ioaddr) outb (TX_INT, ioaddr + STATUS_REG) /* a complete sel reset */ -#define eepro_complete_selreset(ioaddr) { eepro_dis_int
[TEST] patch for eepro driver over 2.2.17
hi, some people reported problems in eepro boards with 2.2.17 driver. please apply this patch over 2.2.17 version. warning: this is a _test_ patch! i've tested with etherexpress 10 (the only board supported by this driver that i have here - donations are very welcome ;)) and it works very well under heavy load. for interested people: test it and tell me what you think. thanks, -- Aris --- Aristeu Sergio Rozanski Filho [EMAIL PROTECTED] --- --- linux/drivers/net/eepro.c.old Tue Sep 12 15:41:50 2000 +++ linux/drivers/net/eepro.c Wed Sep 13 12:00:49 2000 @@ -212,6 +214,12 @@ version of the 82595 chip. */ int stepping; spinlock_t lock; /* Serializing lock */ + unsigned rcv_ram; + unsigned rcv_start; + unsigned xmt_bar; + unsigned xmt_lower_limit_reg; + unsigned xmt_upper_limit_reg; + unsigned eeprom_reg; }; /* The station (ethernet) address prefix, used for IDing the board. */ @@ -356,24 +364,20 @@ #defineRCV_HEADER 8 #define RCV_DEFAULT_RAM0x6000 -#define RCV_RAMrcv_ram - -static unsigned rcv_ram = RCV_DEFAULT_RAM; +#define RCV_RAMlp->rcv_ram #define XMT_HEADER 8 #define XMT_RAM(RAM_SIZE - RCV_RAM) -#define XMT_START ((rcv_start + RCV_RAM) % RAM_SIZE) +#define XMT_START ((lp->rcv_start + RCV_RAM) % RAM_SIZE) -#define RCV_LOWER_LIMIT(rcv_start >> 8) -#define RCV_UPPER_LIMIT(((rcv_start + RCV_RAM) - 2) >> 8) +#define RCV_LOWER_LIMIT(lp->rcv_start >> 8) +#define RCV_UPPER_LIMIT(((lp->rcv_start + RCV_RAM) - 2) >> 8) #define XMT_LOWER_LIMIT(XMT_START >> 8) #define XMT_UPPER_LIMIT(((XMT_START + XMT_RAM) - 2) >> 8) #define RCV_START_PRO 0x00 #define RCV_START_10 XMT_RAM - /* by default the old driver */ -static unsigned rcv_start = RCV_START_PRO; #defineRCV_DONE0x0008 #defineRX_OK 0x2000 @@ -422,7 +426,6 @@ #defineXMT_BAR_PRO 0x0a #defineXMT_BAR_10 0x0b -static unsigned xmt_bar = XMT_BAR_PRO; #defineHOST_ADDRESS_REG0x0c #defineIO_PORT 0x0e @@ -440,8 +443,6 @@ #defineXMT_UPPER_LIMIT_REG_PRO 0x0b #defineXMT_LOWER_LIMIT_REG_10 0x0b #defineXMT_UPPER_LIMIT_REG_10 0x0a -static unsigned xmt_lower_limit_reg = XMT_LOWER_LIMIT_REG_PRO; -static unsigned xmt_upper_limit_reg = XMT_UPPER_LIMIT_REG_PRO; /* Bank 2 registers */ #defineXMT_Chain_Int 0x20/* Interrupt at the end of the transmit chain */ @@ -466,7 +467,6 @@ #define EEPROM_REG_PRO 0x0a #define EEPROM_REG_10 0x0b -static unsigned eeprom_reg = EEPROM_REG_PRO; #define EESK 0x01 #define EECS 0x02 @@ -528,7 +528,8 @@ #define eepro_ack_tx(ioaddr) outb (TX_INT, ioaddr + STATUS_REG) /* a complete sel reset */ -#define eepro_complete_selreset(ioaddr) { eepro_dis_int(ioaddr);\ +#define eepro_complete_selreset(ioaddr) { \ + /* eepro_dis_int(ioaddr); */ \ lp->stats.tx_errors++;\ eepro_sel_reset(ioaddr);\ lp->tx_end = \ @@ -537,7 +538,7 @@ lp->tx_last = 0;\ dev->tbusy=0;\ dev->trans_start = jiffies;\ - eepro_en_int(ioaddr);\ + /*eepro_en_int(ioaddr); */ \ eepro_en_rx(ioaddr);\ } @@ -670,7 +671,15 @@ lp = (struct eepro_local *)dev->priv; - /* Now, get the ethernet hardware address from + /* default values */ + lp->rcv_start = RCV_START_PRO; + lp->xmt_bar = XMT_BAR_PRO; + lp->xmt_lower_limit_reg = XMT_LOWER_LIMIT_REG_PRO; + lp->xmt_upper_limit_reg = XMT_UPPER_LIMIT_REG_PRO; + lp->eeprom_reg = EEPROM_REG_PRO; + lp->rcv_ram = RCV_DEFAULT_RAM; + + /* Now, get the ethernet hardware address from the EEPROM */ station_addr[0] = read_eeprom(ioaddr, 2,
Re: EEPRO Problems in 2.2.17 (sorry!)
hi, i'm working on this On Wed, 6 Sep 2000 [EMAIL PROTECTED] wrote: > > Well, > I thought the problems with the eepro driver from 2.2.16 were fixed in > 2.2.17. Apparently the problems really weren't fixed - it did seem to get > more stable though. > > I was copying some large over a NFS mount and when it got to about 6 megs, > the NFS mount hung with symptoms similar to the 2.2.16 problems I had. > Which was: no errors in the messages log, a hung NFS mount and reloading > the driver module clears the problem up. > > So I started doing some tests with ping to see how long it would stay up. > During those tests, I noticed some wierd DUP packets: > > 64 bytes from 1.2.3.4: icmp_seq=28 ttl=255 time=0.4 ms > 64 bytes from 1.2.3.4: icmp_seq=18 ttl=255 time=10894.5 ms (DUP!) > 64 bytes from 1.2.3.4: icmp_seq=29 ttl=255 time=0.4 ms > > They were all between 10-20 behind the icmp_seq and had at least a 6000ms > time. > > In the middle of a 40meg nfs copy, the driver hung at least 4 times - but > at fairly random intervals. Sometimes it will run for a long time - > sometimes it locks up twice in a row fairly fast. > > With the 2.2.14 and previous drivers I did not once notice an error with > this card. The only msg/error I see in the messages log is this: > eth1: set Rx mode to 1 address. > > But I've got that message for years now with the eepro driver. > > If anyone familiar with this driver wishes to work with me in an attempt > to reproduce this problem, I'd be happy to help. > > Thanks, > Brian Hayward > > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to [EMAIL PROTECTED] > Please read the FAQ at http://www.tux.org/lkml/ > -- Aris --- Aristeu Sergio Rozanski Filho [EMAIL PROTECTED] --- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] Please read the FAQ at http://www.tux.org/lkml/
Re: EEPRO Problems in 2.2.17 (sorry!)
hi, i'm working on this On Wed, 6 Sep 2000 [EMAIL PROTECTED] wrote: Well, I thought the problems with the eepro driver from 2.2.16 were fixed in 2.2.17. Apparently the problems really weren't fixed - it did seem to get more stable though. I was copying some large over a NFS mount and when it got to about 6 megs, the NFS mount hung with symptoms similar to the 2.2.16 problems I had. Which was: no errors in the messages log, a hung NFS mount and reloading the driver module clears the problem up. So I started doing some tests with ping to see how long it would stay up. During those tests, I noticed some wierd DUP packets: 64 bytes from 1.2.3.4: icmp_seq=28 ttl=255 time=0.4 ms 64 bytes from 1.2.3.4: icmp_seq=18 ttl=255 time=10894.5 ms (DUP!) 64 bytes from 1.2.3.4: icmp_seq=29 ttl=255 time=0.4 ms They were all between 10-20 behind the icmp_seq and had at least a 6000ms time. In the middle of a 40meg nfs copy, the driver hung at least 4 times - but at fairly random intervals. Sometimes it will run for a long time - sometimes it locks up twice in a row fairly fast. With the 2.2.14 and previous drivers I did not once notice an error with this card. The only msg/error I see in the messages log is this: eth1: set Rx mode to 1 address. But I've got that message for years now with the eepro driver. If anyone familiar with this driver wishes to work with me in an attempt to reproduce this problem, I'd be happy to help. Thanks, Brian Hayward - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] Please read the FAQ at http://www.tux.org/lkml/ -- Aris --- Aristeu Sergio Rozanski Filho [EMAIL PROTECTED] --- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] Please read the FAQ at http://www.tux.org/lkml/