On 2018-10-17 12:15, Jan Kara wrote:
> Audit tree code currently associates new fsnotify mark with each new
> chunk. As chunk attached to an inode is replaced when new tag is added /
> removed, we also need to remove old fsnotify mark and add a new one on
> such occasion.  This is cumbersome and makes locking rules somewhat
> difficult to follow.
> 
> Fix these problems by allocating fsnotify mark independently of chunk
> and keeping it all the time while there is some chunk attached to an
> inode. Also add documentation about the locking rules so that things are
> easier to follow.

Reviewed-by: Richard Guy Briggs <[email protected]>

> Signed-off-by: Jan Kara <[email protected]>
> ---
>  kernel/audit_tree.c | 160 
> +++++++++++++++++++++++++++-------------------------
>  1 file changed, 83 insertions(+), 77 deletions(-)
> 
> diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
> index 5deb4e1ed648..451d1b744e82 100644
> --- a/kernel/audit_tree.c
> +++ b/kernel/audit_tree.c
> @@ -27,7 +27,6 @@ struct audit_chunk {
>       unsigned long key;
>       struct fsnotify_mark *mark;
>       struct list_head trees;         /* with root here */
> -     int dead;
>       int count;
>       atomic_long_t refs;
>       struct rcu_head head;
> @@ -48,8 +47,15 @@ static LIST_HEAD(prune_list);
>  static struct task_struct *prune_thread;
>  
>  /*
> - * One struct chunk is attached to each inode of interest.
> - * We replace struct chunk on tagging/untagging.
> + * One struct chunk is attached to each inode of interest through
> + * audit_tree_mark (fsnotify mark). We replace struct chunk on tagging /
> + * untagging, the mark is stable as long as there is chunk attached. The
> + * association between mark and chunk is protected by hash_lock and
> + * audit_tree_group->mark_mutex. Thus as long as we hold
> + * audit_tree_group->mark_mutex and check that the mark is alive by
> + * FSNOTIFY_MARK_FLAG_ATTACHED flag check, we are sure the mark points to
> + * the current chunk.
> + *
>   * Rules have pointer to struct audit_tree.
>   * Rules have struct list_head rlist forming a list of rules over
>   * the same tree.
> @@ -68,8 +74,12 @@ static struct task_struct *prune_thread;
>   * tree is refcounted; one reference for "some rules on rules_list refer to
>   * it", one for each chunk with pointer to it.
>   *
> - * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
> - * of watch contributes 1 to .refs).
> + * chunk is refcounted by embedded .refs. Mark associated with the chunk 
> holds
> + * one chunk reference. This reference is dropped either when a mark is going
> + * to be freed (corresponding inode goes away) or when chunk attached to the
> + * mark gets replaced. This reference must be dropped using
> + * audit_mark_put_chunk() to make sure the reference is dropped only after 
> RCU
> + * grace period as it protects RCU readers of the hash table.
>   *
>   * node.index allows to get from node.list to containing chunk.
>   * MSB of that sucker is stolen to mark taggings that we might have to
> @@ -160,8 +170,6 @@ static struct audit_chunk *mark_chunk(struct 
> fsnotify_mark *mark)
>  
>  static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
>  {
> -     struct audit_chunk *chunk = mark_chunk(entry);
> -     audit_mark_put_chunk(chunk);
>       kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
>  }
>  
> @@ -188,13 +196,6 @@ static struct audit_chunk *alloc_chunk(int count)
>       if (!chunk)
>               return NULL;
>  
> -     chunk->mark = alloc_mark();
> -     if (!chunk->mark) {
> -             kfree(chunk);
> -             return NULL;
> -     }
> -     audit_mark(chunk->mark)->chunk = chunk;
> -
>       INIT_LIST_HEAD(&chunk->hash);
>       INIT_LIST_HEAD(&chunk->trees);
>       chunk->count = count;
> @@ -277,6 +278,20 @@ static struct audit_chunk *find_chunk(struct node *p)
>       return container_of(p, struct audit_chunk, owners[0]);
>  }
>  
> +static void replace_mark_chunk(struct fsnotify_mark *entry,
> +                            struct audit_chunk *chunk)
> +{
> +     struct audit_chunk *old;
> +
> +     assert_spin_locked(&hash_lock);
> +     old = mark_chunk(entry);
> +     audit_mark(entry)->chunk = chunk;
> +     if (chunk)
> +             chunk->mark = entry;
> +     if (old)
> +             old->mark = NULL;
> +}
> +
>  static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
>  {
>       struct audit_tree *owner;
> @@ -299,6 +314,7 @@ static void replace_chunk(struct audit_chunk *new, struct 
> audit_chunk *old)
>               get_tree(owner);
>               list_replace_init(&old->owners[j].list, &new->owners[i].list);
>       }
> +     replace_mark_chunk(old->mark, new);
>       /*
>        * Make sure chunk is fully initialized before making it visible in the
>        * hash. Pairs with a data dependency barrier in READ_ONCE() in
> @@ -339,23 +355,25 @@ static void untag_chunk(struct audit_chunk *chunk, 
> struct fsnotify_mark *entry)
>  
>       mutex_lock(&audit_tree_group->mark_mutex);
>       /*
> -      * mark_mutex protects mark from getting detached and thus also from
> -      * mark->connector->obj getting NULL.
> +      * mark_mutex stabilizes chunk attached to the mark so we can check
> +      * whether it didn't change while we've dropped hash_lock.
>        */
> -     if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
> +     if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
> +         mark_chunk(entry) != chunk) {
>               mutex_unlock(&audit_tree_group->mark_mutex);
>               return;
>       }
>  
>       size = chunk_count_trees(chunk);
>       if (!size) {
> -             chunk->dead = 1;
>               spin_lock(&hash_lock);
>               list_del_init(&chunk->trees);
>               list_del_rcu(&chunk->hash);
> +             replace_mark_chunk(entry, NULL);
>               spin_unlock(&hash_lock);
>               fsnotify_detach_mark(entry);
>               mutex_unlock(&audit_tree_group->mark_mutex);
> +             audit_mark_put_chunk(chunk);
>               fsnotify_free_mark(entry);
>               return;
>       }
> @@ -364,13 +382,6 @@ static void untag_chunk(struct audit_chunk *chunk, 
> struct fsnotify_mark *entry)
>       if (!new)
>               goto out_mutex;
>  
> -     if (fsnotify_add_mark_locked(new->mark, entry->connector->obj,
> -                                  FSNOTIFY_OBJ_TYPE_INODE, 1)) {
> -             fsnotify_put_mark(new->mark);
> -             goto out_mutex;
> -     }
> -
> -     chunk->dead = 1;
>       spin_lock(&hash_lock);
>       /*
>        * This has to go last when updating chunk as once replace_chunk() is
> @@ -378,10 +389,8 @@ static void untag_chunk(struct audit_chunk *chunk, 
> struct fsnotify_mark *entry)
>        */
>       replace_chunk(new, chunk);
>       spin_unlock(&hash_lock);
> -     fsnotify_detach_mark(entry);
>       mutex_unlock(&audit_tree_group->mark_mutex);
> -     fsnotify_free_mark(entry);
> -     fsnotify_put_mark(new->mark);   /* drop initial reference */
> +     audit_mark_put_chunk(chunk);
>       return;
>  
>  out_mutex:
> @@ -399,23 +408,31 @@ static int create_chunk(struct inode *inode, struct 
> audit_tree *tree)
>               return -ENOMEM;
>       }
>  
> -     entry = chunk->mark;
> +     entry = alloc_mark();
> +     if (!entry) {
> +             mutex_unlock(&audit_tree_group->mark_mutex);
> +             kfree(chunk);
> +             return -ENOMEM;
> +     }
> +
>       if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
>               mutex_unlock(&audit_tree_group->mark_mutex);
>               fsnotify_put_mark(entry);
> +             kfree(chunk);
>               return -ENOSPC;
>       }
>  
>       spin_lock(&hash_lock);
>       if (tree->goner) {
>               spin_unlock(&hash_lock);
> -             chunk->dead = 1;
>               fsnotify_detach_mark(entry);
>               mutex_unlock(&audit_tree_group->mark_mutex);
>               fsnotify_free_mark(entry);
>               fsnotify_put_mark(entry);
> +             kfree(chunk);
>               return 0;
>       }
> +     replace_mark_chunk(entry, chunk);
>       chunk->owners[0].index = (1U << 31);
>       chunk->owners[0].owner = tree;
>       get_tree(tree);
> @@ -432,33 +449,41 @@ static int create_chunk(struct inode *inode, struct 
> audit_tree *tree)
>       insert_hash(chunk);
>       spin_unlock(&hash_lock);
>       mutex_unlock(&audit_tree_group->mark_mutex);
> -     fsnotify_put_mark(entry);       /* drop initial reference */
> +     /*
> +      * Drop our initial reference. When mark we point to is getting freed,
> +      * we get notification through ->freeing_mark callback and cleanup
> +      * chunk pointing to this mark.
> +      */
> +     fsnotify_put_mark(entry);
>       return 0;
>  }
>  
>  /* the first tagged inode becomes root of tree */
>  static int tag_chunk(struct inode *inode, struct audit_tree *tree)
>  {
> -     struct fsnotify_mark *old_entry, *chunk_entry;
> +     struct fsnotify_mark *entry;
>       struct audit_chunk *chunk, *old;
>       struct node *p;
>       int n;
>  
>       mutex_lock(&audit_tree_group->mark_mutex);
> -     old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
> -                                    audit_tree_group);
> -     if (!old_entry)
> +     entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
> +     if (!entry)
>               return create_chunk(inode, tree);
>  
> -     old = mark_chunk(old_entry);
> -
> +     /*
> +      * Found mark is guaranteed to be attached and mark_mutex protects mark
> +      * from getting detached and thus it makes sure there is chunk attached
> +      * to the mark.
> +      */
>       /* are we already there? */
>       spin_lock(&hash_lock);
> +     old = mark_chunk(entry);
>       for (n = 0; n < old->count; n++) {
>               if (old->owners[n].owner == tree) {
>                       spin_unlock(&hash_lock);
>                       mutex_unlock(&audit_tree_group->mark_mutex);
> -                     fsnotify_put_mark(old_entry);
> +                     fsnotify_put_mark(entry);
>                       return 0;
>               }
>       }
> @@ -467,41 +492,16 @@ static int tag_chunk(struct inode *inode, struct 
> audit_tree *tree)
>       chunk = alloc_chunk(old->count + 1);
>       if (!chunk) {
>               mutex_unlock(&audit_tree_group->mark_mutex);
> -             fsnotify_put_mark(old_entry);
> +             fsnotify_put_mark(entry);
>               return -ENOMEM;
>       }
>  
> -     chunk_entry = chunk->mark;
> -
> -     /*
> -      * mark_mutex protects mark from getting detached and thus also from
> -      * mark->connector->obj getting NULL.
> -      */
> -     if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
> -             /* old_entry is being shot, lets just lie */
> -             mutex_unlock(&audit_tree_group->mark_mutex);
> -             fsnotify_put_mark(old_entry);
> -             fsnotify_put_mark(chunk->mark);
> -             return -ENOENT;
> -     }
> -
> -     if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
> -                                  FSNOTIFY_OBJ_TYPE_INODE, 1)) {
> -             mutex_unlock(&audit_tree_group->mark_mutex);
> -             fsnotify_put_mark(chunk_entry);
> -             fsnotify_put_mark(old_entry);
> -             return -ENOSPC;
> -     }
> -
>       spin_lock(&hash_lock);
>       if (tree->goner) {
>               spin_unlock(&hash_lock);
> -             chunk->dead = 1;
> -             fsnotify_detach_mark(chunk_entry);
>               mutex_unlock(&audit_tree_group->mark_mutex);
> -             fsnotify_free_mark(chunk_entry);
> -             fsnotify_put_mark(chunk_entry);
> -             fsnotify_put_mark(old_entry);
> +             fsnotify_put_mark(entry);
> +             kfree(chunk);
>               return 0;
>       }
>       p = &chunk->owners[chunk->count - 1];
> @@ -509,7 +509,6 @@ static int tag_chunk(struct inode *inode, struct 
> audit_tree *tree)
>       p->owner = tree;
>       get_tree(tree);
>       list_add(&p->list, &tree->chunks);
> -     old->dead = 1;
>       if (!tree->root) {
>               tree->root = chunk;
>               list_add(&tree->same_root, &chunk->trees);
> @@ -520,11 +519,10 @@ static int tag_chunk(struct inode *inode, struct 
> audit_tree *tree)
>        */
>       replace_chunk(chunk, old);
>       spin_unlock(&hash_lock);
> -     fsnotify_detach_mark(old_entry);
>       mutex_unlock(&audit_tree_group->mark_mutex);
> -     fsnotify_free_mark(old_entry);
> -     fsnotify_put_mark(chunk_entry); /* drop initial reference */
> -     fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
> +     fsnotify_put_mark(entry); /* pair to fsnotify_find mark_entry */
> +     audit_mark_put_chunk(old);
> +
>       return 0;
>  }
>  
> @@ -587,6 +585,9 @@ static void prune_tree_chunks(struct audit_tree *victim, 
> bool tagged)
>               chunk = find_chunk(p);
>               mark = chunk->mark;
>               remove_chunk_node(chunk, p);
> +             /* Racing with audit_tree_freeing_mark()? */
> +             if (!mark)
> +                     continue;
>               fsnotify_get_mark(mark);
>               spin_unlock(&hash_lock);
>  
> @@ -1009,10 +1010,6 @@ static void evict_chunk(struct audit_chunk *chunk)
>       int need_prune = 0;
>       int n;
>  
> -     if (chunk->dead)
> -             return;
> -
> -     chunk->dead = 1;
>       mutex_lock(&audit_filter_mutex);
>       spin_lock(&hash_lock);
>       while (!list_empty(&chunk->trees)) {
> @@ -1051,9 +1048,18 @@ static int audit_tree_handle_event(struct 
> fsnotify_group *group,
>  
>  static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct 
> fsnotify_group *group)
>  {
> -     struct audit_chunk *chunk = mark_chunk(entry);
> +     struct audit_chunk *chunk;
>  
> -     evict_chunk(chunk);
> +     mutex_lock(&entry->group->mark_mutex);
> +     spin_lock(&hash_lock);
> +     chunk = mark_chunk(entry);
> +     replace_mark_chunk(entry, NULL);
> +     spin_unlock(&hash_lock);
> +     mutex_unlock(&entry->group->mark_mutex);
> +     if (chunk) {
> +             evict_chunk(chunk);
> +             audit_mark_put_chunk(chunk);
> +     }
>  
>       /*
>        * We are guaranteed to have at least one reference to the mark from
> -- 
> 2.16.4
> 

- RGB

--
Richard Guy Briggs <[email protected]>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635

--
Linux-audit mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/linux-audit

Reply via email to