[RFC PATCH v2 7/7] lib/dlock-list: Use the per-subnode APIs for managing lists
This patch modifies the dlock-list to use the per-subnode APIs to manage the distributed lists. As a result, the number of lists that need to be iterated in dlock_list_iterate() will be reduced at least by half making the iteration a bit faster. Signed-off-by: Waiman Long --- include/linux/dlock-list.h | 81 +-- lib/dlock-list.c | 19 +- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h index a8e1fd2..01667fc 100644 --- a/include/linux/dlock-list.h +++ b/include/linux/dlock-list.h @@ -20,12 +20,12 @@ #include #include -#include +#include /* * include/linux/dlock-list.h * - * A distributed (per-cpu) set of lists each of which is protected by its + * A distributed (per-subnode) set of lists each of which is protected by its * own spinlock, but acts like a single consolidated list to the callers. * * The dlock_list_head structure contains the spinlock, the other @@ -45,19 +45,19 @@ struct dlock_list_head { } /* - * Per-cpu list iteration state + * Per-subnode list iteration state */ struct dlock_list_state { - int cpu; + int snid; /* Subnode ID */ spinlock_t *lock; - struct list_head*head; /* List head of current per-cpu list */ + struct list_head*head; /* List head of current per-subnode list */ struct dlock_list_node *curr; struct dlock_list_node *next; }; #define DLOCK_LIST_STATE_INIT()\ { \ - .cpu = -1, \ + .snid = -1,\ .lock = NULL, \ .head = NULL, \ .curr = NULL, \ @@ -69,7 +69,7 @@ struct dlock_list_state { static inline void init_dlock_list_state(struct dlock_list_state *state) { - state->cpu = -1; + state->snid = -1; state->lock = NULL; state->head = NULL; state->curr = NULL; @@ -83,12 +83,12 @@ static inline void init_dlock_list_state(struct dlock_list_state *state) #endif /* - * Next per-cpu list entry + * Next per-subnode list entry */ #define dlock_list_next_entry(pos, member) list_next_entry(pos, member.list) /* - * Per-cpu node data structure + * Per-subnode node data structure */ struct dlock_list_node { struct list_head list; @@ -109,50 +109,50 @@ static inline void init_dlock_list_node(struct dlock_list_node *node) } static inline void -free_dlock_list_head(struct dlock_list_head __percpu **pdlock_head) +free_dlock_list_head(struct dlock_list_head __persubnode **pdlock_head) { - free_percpu(*pdlock_head); + free_persubnode(*pdlock_head); *pdlock_head = NULL; } /* - * Check if all the per-cpu lists are empty + * Check if all the per-subnode lists are empty */ -static inline bool dlock_list_empty(struct dlock_list_head __percpu *dlock_head) +static inline bool dlock_list_empty(struct dlock_list_head __persubnode *dlock_head) { - int cpu; + int snid; - for_each_possible_cpu(cpu) - if (!list_empty(_cpu_ptr(dlock_head, cpu)->list)) + for_each_subnode(snid) + if (!list_empty(_subnode_ptr(dlock_head, snid)->list)) return false; return true; } /* - * Helper function to find the first entry of the next per-cpu list - * It works somewhat like for_each_possible_cpu(cpu). + * Helper function to find the first entry of the next per-subnode list + * It works somewhat like for_each_subnode(snid). * * Return: true if the entry is found, false if all the lists exhausted */ static __always_inline bool -__dlock_list_next_cpu(struct dlock_list_head __percpu *head, +__dlock_list_next_subnode(struct dlock_list_head __persubnode *head, struct dlock_list_state *state) { if (state->lock) spin_unlock(state->lock); -next_cpu: +next_subnode: /* -* for_each_possible_cpu(cpu) +* for_each_subnode(snid) */ - state->cpu = cpumask_next(state->cpu, cpu_possible_mask); - if (state->cpu >= nr_cpu_ids) - return false; /* All the per-cpu lists iterated */ + state->snid = cpumask_next(state->snid, subnode_mask); + if (state->snid >= nr_subnode_ids) + return false; /* All the per-subnode lists iterated */ - state->head = _cpu_ptr(head, state->cpu)->list; + state->head = _subnode_ptr(head, state->snid)->list; if (list_empty(state->head)) - goto next_cpu; + goto next_subnode; - state->lock = _cpu_ptr(head, state->cpu)->lock; + state->lock = _subnode_ptr(head, state->snid)->lock; spin_lock(state->lock);
[PATCH v2 3/7] fsnotify: Simplify inode iteration on umount
From: Jan Kara fsnotify_unmount_inodes() played complex tricks to pin next inode in the sb->s_inodes list when iterating over all inodes. If we switch to keeping current inode pinned somewhat longer, we can make the code much simpler and standard. Signed-off-by: Jan Kara Signed-off-by: Waiman Long --- fs/notify/inode_mark.c | 45 + 1 files changed, 9 insertions(+), 36 deletions(-) diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 741077d..a364524 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -150,12 +150,10 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, */ void fsnotify_unmount_inodes(struct super_block *sb) { - struct inode *inode, *next_i, *need_iput = NULL; + struct inode *inode, *iput_inode = NULL; spin_lock(>s_inode_list_lock); - list_for_each_entry_safe(inode, next_i, >s_inodes, i_sb_list) { - struct inode *need_iput_tmp; - + list_for_each_entry(inode, >s_inodes, i_sb_list) { /* * We cannot __iget() an inode in state I_FREEING, * I_WILL_FREE, or I_NEW which is fine because by that point @@ -178,49 +176,24 @@ void fsnotify_unmount_inodes(struct super_block *sb) continue; } - need_iput_tmp = need_iput; - need_iput = NULL; - - /* In case fsnotify_inode_delete() drops a reference. */ - if (inode != need_iput_tmp) - __iget(inode); - else - need_iput_tmp = NULL; + __iget(inode); spin_unlock(>i_lock); - - /* In case the dropping of a reference would nuke next_i. */ - while (_i->i_sb_list != >s_inodes) { - spin_lock(_i->i_lock); - if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) && - atomic_read(_i->i_count)) { - __iget(next_i); - need_iput = next_i; - spin_unlock(_i->i_lock); - break; - } - spin_unlock(_i->i_lock); - next_i = list_next_entry(next_i, i_sb_list); - } - - /* -* We can safely drop s_inode_list_lock here because either -* we actually hold references on both inode and next_i or -* end of list. Also no new inodes will be added since the -* umount has begun. -*/ spin_unlock(>s_inode_list_lock); - if (need_iput_tmp) - iput(need_iput_tmp); + if (iput_inode) + iput(iput_inode); /* for each watch, send FS_UNMOUNT and then remove it */ fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); fsnotify_inode_delete(inode); - iput(inode); + iput_inode = inode; spin_lock(>s_inode_list_lock); } spin_unlock(>s_inode_list_lock); + + if (iput_inode) + iput(iput_inode); } -- 1.7.1
[PATCH v2 5/7] vfs: Use dlock list for superblock's inode list
When many threads are trying to add or delete inode to or from a superblock's s_inodes list, spinlock contention on the list can become a performance bottleneck. This patch changes the s_inodes field to become a dlock list which is a distributed set of lists with per-list spinlocks. As a result, the following superblock inode list (sb->s_inodes) iteration functions in vfs are also being modified: 1. iterate_bdevs() 2. drop_pagecache_sb() 3. wait_sb_inodes() 4. evict_inodes() 5. invalidate_inodes() 6. fsnotify_unmount_inodes() 7. add_dquot_ref() 8. remove_dquot_ref() With an exit microbenchmark that creates a large number of threads, attachs many inodes to them and then exits. The runtimes of that microbenchmark with 1000 threads before and after the patch on a 4-socket Intel E7-4820 v3 system (40 cores, 80 threads) were as follows: KernelElapsed TimeSystem Time ----- Vanilla 4.5-rc4 65.29s 82m14s Patched 4.5-rc4 22.81s 23m03s Before the patch, spinlock contention at the inode_sb_list_add() function at the startup phase and the inode_sb_list_del() function at the exit phase were about 79% and 93% of total CPU time respectively (as measured by perf). After the patch, the percpu_list_add() function consumed only about 0.04% of CPU time at startup phase. The percpu_list_del() function consumed about 0.4% of CPU time at exit phase. There were still some spinlock contention, but they happened elsewhere. Signed-off-by: Waiman LongReviewed-by: Jan Kara --- fs/block_dev.c | 13 +++-- fs/drop_caches.c | 10 +- fs/fs-writeback.c | 13 +++-- fs/inode.c | 36 +++- fs/notify/inode_mark.c | 10 +- fs/quota/dquot.c | 16 fs/super.c |7 --- include/linux/fs.h |8 8 files changed, 55 insertions(+), 58 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 71ccab1..21e9064 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1896,11 +1896,13 @@ EXPORT_SYMBOL(__invalidate_device); void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) { struct inode *inode, *old_inode = NULL; + DEFINE_DLOCK_LIST_STATE(state); - spin_lock(_superblock->s_inode_list_lock); - list_for_each_entry(inode, _superblock->s_inodes, i_sb_list) { - struct address_space *mapping = inode->i_mapping; + while (dlock_list_iterate(blockdev_superblock->s_inodes, )) { + struct address_space *mapping; + inode = list_entry(state.curr, struct inode, i_sb_list); + mapping = inode->i_mapping; spin_lock(>i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) || mapping->nrpages == 0) { @@ -1909,7 +1911,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) } __iget(inode); spin_unlock(>i_lock); - spin_unlock(_superblock->s_inode_list_lock); + spin_unlock(state.lock); /* * We hold a reference to 'inode' so it couldn't have been * removed from s_inodes list while we dropped the @@ -1923,8 +1925,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) func(I_BDEV(inode), arg); - spin_lock(_superblock->s_inode_list_lock); + spin_lock(state.lock); } - spin_unlock(_superblock->s_inode_list_lock); iput(old_inode); } diff --git a/fs/drop_caches.c b/fs/drop_caches.c index d72d52b..26b6c68 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -16,9 +16,10 @@ int sysctl_drop_caches; static void drop_pagecache_sb(struct super_block *sb, void *unused) { struct inode *inode, *toput_inode = NULL; + DEFINE_DLOCK_LIST_STATE(state); - spin_lock(>s_inode_list_lock); - list_for_each_entry(inode, >s_inodes, i_sb_list) { + while (dlock_list_iterate(sb->s_inodes, )) { + inode = list_entry(state.curr, struct inode, i_sb_list); spin_lock(>i_lock); if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || (inode->i_mapping->nrpages == 0)) { @@ -27,15 +28,14 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) } __iget(inode); spin_unlock(>i_lock); - spin_unlock(>s_inode_list_lock); + spin_unlock(state.lock); invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode; - spin_lock(>s_inode_list_lock); + spin_lock(state.lock); } -
[PATCH v2 5/7] vfs: Use dlock list for superblock's inode list
When many threads are trying to add or delete inode to or from a superblock's s_inodes list, spinlock contention on the list can become a performance bottleneck. This patch changes the s_inodes field to become a dlock list which is a distributed set of lists with per-list spinlocks. As a result, the following superblock inode list (sb->s_inodes) iteration functions in vfs are also being modified: 1. iterate_bdevs() 2. drop_pagecache_sb() 3. wait_sb_inodes() 4. evict_inodes() 5. invalidate_inodes() 6. fsnotify_unmount_inodes() 7. add_dquot_ref() 8. remove_dquot_ref() With an exit microbenchmark that creates a large number of threads, attachs many inodes to them and then exits. The runtimes of that microbenchmark with 1000 threads before and after the patch on a 4-socket Intel E7-4820 v3 system (40 cores, 80 threads) were as follows: KernelElapsed TimeSystem Time ----- Vanilla 4.5-rc4 65.29s 82m14s Patched 4.5-rc4 22.81s 23m03s Before the patch, spinlock contention at the inode_sb_list_add() function at the startup phase and the inode_sb_list_del() function at the exit phase were about 79% and 93% of total CPU time respectively (as measured by perf). After the patch, the percpu_list_add() function consumed only about 0.04% of CPU time at startup phase. The percpu_list_del() function consumed about 0.4% of CPU time at exit phase. There were still some spinlock contention, but they happened elsewhere. Signed-off-by: Waiman Long Reviewed-by: Jan Kara --- fs/block_dev.c | 13 +++-- fs/drop_caches.c | 10 +- fs/fs-writeback.c | 13 +++-- fs/inode.c | 36 +++- fs/notify/inode_mark.c | 10 +- fs/quota/dquot.c | 16 fs/super.c |7 --- include/linux/fs.h |8 8 files changed, 55 insertions(+), 58 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 71ccab1..21e9064 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1896,11 +1896,13 @@ EXPORT_SYMBOL(__invalidate_device); void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) { struct inode *inode, *old_inode = NULL; + DEFINE_DLOCK_LIST_STATE(state); - spin_lock(_superblock->s_inode_list_lock); - list_for_each_entry(inode, _superblock->s_inodes, i_sb_list) { - struct address_space *mapping = inode->i_mapping; + while (dlock_list_iterate(blockdev_superblock->s_inodes, )) { + struct address_space *mapping; + inode = list_entry(state.curr, struct inode, i_sb_list); + mapping = inode->i_mapping; spin_lock(>i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) || mapping->nrpages == 0) { @@ -1909,7 +1911,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) } __iget(inode); spin_unlock(>i_lock); - spin_unlock(_superblock->s_inode_list_lock); + spin_unlock(state.lock); /* * We hold a reference to 'inode' so it couldn't have been * removed from s_inodes list while we dropped the @@ -1923,8 +1925,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) func(I_BDEV(inode), arg); - spin_lock(_superblock->s_inode_list_lock); + spin_lock(state.lock); } - spin_unlock(_superblock->s_inode_list_lock); iput(old_inode); } diff --git a/fs/drop_caches.c b/fs/drop_caches.c index d72d52b..26b6c68 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -16,9 +16,10 @@ int sysctl_drop_caches; static void drop_pagecache_sb(struct super_block *sb, void *unused) { struct inode *inode, *toput_inode = NULL; + DEFINE_DLOCK_LIST_STATE(state); - spin_lock(>s_inode_list_lock); - list_for_each_entry(inode, >s_inodes, i_sb_list) { + while (dlock_list_iterate(sb->s_inodes, )) { + inode = list_entry(state.curr, struct inode, i_sb_list); spin_lock(>i_lock); if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || (inode->i_mapping->nrpages == 0)) { @@ -27,15 +28,14 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) } __iget(inode); spin_unlock(>i_lock); - spin_unlock(>s_inode_list_lock); + spin_unlock(state.lock); invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode; - spin_lock(>s_inode_list_lock); + spin_lock(state.lock); } - spin_unlock(>s_inode_list_lock);
[PATCH v2 2/7] lib/dlock-list: Add __percpu modifier for parameters
From: Boqun FengAdd __percpu modifier properly to help: 1. Differ pointers to actual structures with those to percpu structures, which could improve readability. 2. Prevent sparse from complaining about "different address spaces" Signed-off-by: Boqun Feng Signed-off-by: Waiman Long --- include/linux/dlock-list.h | 18 ++ lib/dlock-list.c |5 +++-- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h index 43355f8..a8e1fd2 100644 --- a/include/linux/dlock-list.h +++ b/include/linux/dlock-list.h @@ -108,7 +108,8 @@ static inline void init_dlock_list_node(struct dlock_list_node *node) node->lockptr = NULL; } -static inline void free_dlock_list_head(struct dlock_list_head **pdlock_head) +static inline void +free_dlock_list_head(struct dlock_list_head __percpu **pdlock_head) { free_percpu(*pdlock_head); *pdlock_head = NULL; @@ -117,7 +118,7 @@ static inline void free_dlock_list_head(struct dlock_list_head **pdlock_head) /* * Check if all the per-cpu lists are empty */ -static inline bool dlock_list_empty(struct dlock_list_head *dlock_head) +static inline bool dlock_list_empty(struct dlock_list_head __percpu *dlock_head) { int cpu; @@ -134,7 +135,7 @@ static inline bool dlock_list_empty(struct dlock_list_head *dlock_head) * Return: true if the entry is found, false if all the lists exhausted */ static __always_inline bool -__dlock_list_next_cpu(struct dlock_list_head *head, +__dlock_list_next_cpu(struct dlock_list_head __percpu *head, struct dlock_list_state *state) { if (state->lock) @@ -172,7 +173,7 @@ next_cpu: * * Return: true if the next entry is found, false if all the entries iterated */ -static inline bool dlock_list_iterate(struct dlock_list_head *head, +static inline bool dlock_list_iterate(struct dlock_list_head __percpu *head, struct dlock_list_state *state) { /* @@ -200,8 +201,9 @@ static inline bool dlock_list_iterate(struct dlock_list_head *head, * * Return: true if the next entry is found, false if all the entries iterated */ -static inline bool dlock_list_iterate_safe(struct dlock_list_head *head, - struct dlock_list_state *state) +static inline bool +dlock_list_iterate_safe(struct dlock_list_head __percpu *head, + struct dlock_list_state *state) { /* * Find next entry @@ -226,8 +228,8 @@ static inline bool dlock_list_iterate_safe(struct dlock_list_head *head, } extern void dlock_list_add(struct dlock_list_node *node, - struct dlock_list_head *head); + struct dlock_list_head __percpu *head); extern void dlock_list_del(struct dlock_list_node *node); -extern int init_dlock_list_head(struct dlock_list_head **pdlock_head); +extern int init_dlock_list_head(struct dlock_list_head __percpu **pdlock_head); #endif /* __LINUX_DLOCK_LIST_H */ diff --git a/lib/dlock-list.c b/lib/dlock-list.c index 84d4623..e1a1930 100644 --- a/lib/dlock-list.c +++ b/lib/dlock-list.c @@ -27,7 +27,7 @@ static struct lock_class_key dlock_list_key; /* * Initialize the per-cpu list head */ -int init_dlock_list_head(struct dlock_list_head **pdlock_head) +int init_dlock_list_head(struct dlock_list_head __percpu **pdlock_head) { struct dlock_list_head *dlock_head; int cpu; @@ -53,7 +53,8 @@ int init_dlock_list_head(struct dlock_list_head **pdlock_head) * function is called. However, deletion may be done by a different CPU. * So we still need to use a lock to protect the content of the list. */ -void dlock_list_add(struct dlock_list_node *node, struct dlock_list_head *head) +void dlock_list_add(struct dlock_list_node *node, + struct dlock_list_head __percpu *head) { struct dlock_list_head *myhead; -- 1.7.1
[PATCH v2 1/7] lib/dlock-list: Distributed and lock-protected lists
Linked list is used everywhere in the Linux kernel. However, if many threads are trying to add or delete entries into the same linked list, it can create a performance bottleneck. This patch introduces a new list APIs that provide a set of distributed lists (one per CPU), each of which is protected by its own spinlock. To the callers, however, the set of lists acts like a single consolidated list. This allows list entries insertion and deletion operations to happen in parallel instead of being serialized with a global list and lock. List entry insertion is strictly per cpu. List deletion, however, can happen in a cpu other than the one that did the insertion. So we still need lock to protect the list. Because of that, there may still be a small amount of contention when deletion is being done. A new header file include/linux/dlock-list.h will be added with the associated dlock_list_head and dlock_list_node structures. The following functions are provided to manage the per-cpu list: 1. int init_dlock_list_head(struct dlock_list_head **pdlock_head) 2. void dlock_list_add(struct dlock_list_node *node, struct dlock_list_head *head) 3. void dlock_list_del(struct dlock_list *node) Iteration of all the list entries within a group of per-cpu lists is done by calling either the dlock_list_iterate() or dlock_list_iterate_safe() functions in a while loop. They correspond to the list_for_each_entry() and list_for_each_entry_safe() macros respectively. The iteration states are keep in a dlock_list_state structure that is passed to the iteration functions. Signed-off-by: Waiman LongReviewed-by: Jan Kara --- include/linux/dlock-list.h | 233 lib/Makefile |2 +- lib/dlock-list.c | 100 +++ 3 files changed, 334 insertions(+), 1 deletions(-) create mode 100644 include/linux/dlock-list.h create mode 100644 lib/dlock-list.c diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h new file mode 100644 index 000..43355f8 --- /dev/null +++ b/include/linux/dlock-list.h @@ -0,0 +1,233 @@ +/* + * Distributed/locked list + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * (C) Copyright 2016 Hewlett-Packard Enterprise Development LP + * + * Authors: Waiman Long + */ +#ifndef __LINUX_DLOCK_LIST_H +#define __LINUX_DLOCK_LIST_H + +#include +#include +#include + +/* + * include/linux/dlock-list.h + * + * A distributed (per-cpu) set of lists each of which is protected by its + * own spinlock, but acts like a single consolidated list to the callers. + * + * The dlock_list_head structure contains the spinlock, the other + * dlock_list_node structures only contains a pointer to the spinlock in + * dlock_list_head. + */ +struct dlock_list_head { + struct list_head list; + spinlock_t lock; +}; + +#define DLOCK_LIST_HEAD_INIT(name) \ + { \ + .list.prev = ,\ + .list.next = ,\ + .list.lock = __SPIN_LOCK_UNLOCKED(name),\ + } + +/* + * Per-cpu list iteration state + */ +struct dlock_list_state { + int cpu; + spinlock_t *lock; + struct list_head*head; /* List head of current per-cpu list */ + struct dlock_list_node *curr; + struct dlock_list_node *next; +}; + +#define DLOCK_LIST_STATE_INIT()\ + { \ + .cpu = -1, \ + .lock = NULL, \ + .head = NULL, \ + .curr = NULL, \ + .next = NULL, \ + } + +#define DEFINE_DLOCK_LIST_STATE(s) \ + struct dlock_list_state s = DLOCK_LIST_STATE_INIT() + +static inline void init_dlock_list_state(struct dlock_list_state *state) +{ + state->cpu = -1; + state->lock = NULL; + state->head = NULL; + state->curr = NULL; + state->next = NULL; +} + +#ifdef CONFIG_DEBUG_SPINLOCK +#define DLOCK_LIST_WARN_ON(x) WARN_ON(x) +#else +#define DLOCK_LIST_WARN_ON(x) +#endif + +/* + * Next per-cpu list entry + */ +#define dlock_list_next_entry(pos, member) list_next_entry(pos, member.list) + +/* + * Per-cpu node data
[RFC PATCH v2 6/7] lib/persubnode: Introducing a simple per-subnode APIs
The percpu APIs are extensively used in the Linux kernel to reduce cacheline contention and improve performance. For some use cases, the percpu APIs may be too fine-grain for distributed resources whereas a per-node based allocation may be too coarse as we can have dozens of CPUs in a NUMA node in some high-end systems. This patch introduces a simple per-subnode APIs where each of the distributed resources will be shared by only a handful of CPUs within a NUMA node. The per-subnode APIs are built on top of the percpu APIs and hence requires the same amount of memory as if the percpu APIs are used. However, it helps to reduce the total number of separate resources that needed to be managed. As a result, it can speed up code that need to iterate all the resources compared with using the percpu APIs. Cacheline contention, however, will increases slightly as each resource is shared by more than one CPU. As long as the number of CPUs in each subnode is small, the performance impact won't be significant. In this patch, at most 2 sibling groups can be put into a subnode. For an x86-64 CPU, at most 4 CPUs will be in a subnode when HT is enabled and 2 when it is not. Signed-off-by: Waiman Long--- include/linux/persubnode.h | 80 + init/main.c|2 + lib/Makefile |2 + lib/persubnode.c | 119 4 files changed, 203 insertions(+), 0 deletions(-) create mode 100644 include/linux/persubnode.h create mode 100644 lib/persubnode.c diff --git a/include/linux/persubnode.h b/include/linux/persubnode.h new file mode 100644 index 000..b777daa --- /dev/null +++ b/include/linux/persubnode.h @@ -0,0 +1,80 @@ +/* + * Per-subnode definitions + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * (C) Copyright 2016 Hewlett-Packard Enterprise Development LP + * + * Authors: Waiman Long + */ +#ifndef __LINUX_PERSUBNODE_H +#define __LINUX_PERSUBNODE_H + +#include +#include + +/* + * Per-subnode APIs + */ +#define __persubnode __percpu +#define nr_subnode_ids nr_cpu_ids +#define alloc_persubnode(type) alloc_percpu(type) +#define free_persubnode(var) free_percpu(var) +#define for_each_subnode(snode)for_each_cpu(snode, subnode_mask) +#define per_subnode_ptr(ptr, subnode) per_cpu_ptr(ptr, subnode) +#define per_subnode(var, subnode) per_cpu(var, subnode) + +#ifdef CONFIG_SMP + +extern struct cpumask __subnode_mask __read_mostly; +DECLARE_PER_CPU_READ_MOSTLY(int, cpu_subnode_id); + +#define subnode_mask (&__subnode_mask) + +static inline int this_cpu_to_subnode(void) +{ + return *this_cpu_ptr(_subnode_id); +} + +/* + * For safety, preemption should be disabled before using this_subnode_ptr(). + */ +#define this_subnode_ptr(ptr) \ +({ \ + int _snid = this_cpu_to_subnode(); \ + per_cpu_ptr(ptr, _snid);\ +}) + +#define get_subnode_ptr(ptr) \ +({ \ + preempt_disable(); \ + this_subnode_ptr(ptr); \ +}) + +#define put_subnode_ptr(ptr) \ +do { \ + (void)(ptr);\ + preempt_enable(); \ +} while (0) + +extern void __init subnode_early_init(void); + +#else /* CONFIG_SMP */ + +#define subnode_mask cpu_possible_mask +#define this_subnode_ptr(ptr) this_cpu_ptr(ptr) +#define get_subnode_ptr(ptr) get_cpu_ptr(ptr) +#define put_subnode_ptr(ptr) put_cpu_ptr(ptr) + +static inline void subnode_early_init(void) { } + +#endif /* CONFIG_SMP */ +#endif /* __LINUX_PERSUBNODE_H */ diff --git a/init/main.c b/init/main.c index 4c17fda..28e4425 100644 --- a/init/main.c +++ b/init/main.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include @@ -524,6 +525,7 @@ asmlinkage __visible void __init start_kernel(void) NULL, set_init_arg); jump_label_init(); + subnode_early_init(); /* * These use large bootmem allocations and must precede diff --git a/lib/Makefile b/lib/Makefile index 92e8c38..440152c 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -232,3 +232,5 @@ obj-$(CONFIG_UCS2_STRING) += ucs2_string.o obj-$(CONFIG_UBSAN)
[PATCH v2 2/7] lib/dlock-list: Add __percpu modifier for parameters
From: Boqun Feng Add __percpu modifier properly to help: 1. Differ pointers to actual structures with those to percpu structures, which could improve readability. 2. Prevent sparse from complaining about "different address spaces" Signed-off-by: Boqun Feng Signed-off-by: Waiman Long --- include/linux/dlock-list.h | 18 ++ lib/dlock-list.c |5 +++-- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h index 43355f8..a8e1fd2 100644 --- a/include/linux/dlock-list.h +++ b/include/linux/dlock-list.h @@ -108,7 +108,8 @@ static inline void init_dlock_list_node(struct dlock_list_node *node) node->lockptr = NULL; } -static inline void free_dlock_list_head(struct dlock_list_head **pdlock_head) +static inline void +free_dlock_list_head(struct dlock_list_head __percpu **pdlock_head) { free_percpu(*pdlock_head); *pdlock_head = NULL; @@ -117,7 +118,7 @@ static inline void free_dlock_list_head(struct dlock_list_head **pdlock_head) /* * Check if all the per-cpu lists are empty */ -static inline bool dlock_list_empty(struct dlock_list_head *dlock_head) +static inline bool dlock_list_empty(struct dlock_list_head __percpu *dlock_head) { int cpu; @@ -134,7 +135,7 @@ static inline bool dlock_list_empty(struct dlock_list_head *dlock_head) * Return: true if the entry is found, false if all the lists exhausted */ static __always_inline bool -__dlock_list_next_cpu(struct dlock_list_head *head, +__dlock_list_next_cpu(struct dlock_list_head __percpu *head, struct dlock_list_state *state) { if (state->lock) @@ -172,7 +173,7 @@ next_cpu: * * Return: true if the next entry is found, false if all the entries iterated */ -static inline bool dlock_list_iterate(struct dlock_list_head *head, +static inline bool dlock_list_iterate(struct dlock_list_head __percpu *head, struct dlock_list_state *state) { /* @@ -200,8 +201,9 @@ static inline bool dlock_list_iterate(struct dlock_list_head *head, * * Return: true if the next entry is found, false if all the entries iterated */ -static inline bool dlock_list_iterate_safe(struct dlock_list_head *head, - struct dlock_list_state *state) +static inline bool +dlock_list_iterate_safe(struct dlock_list_head __percpu *head, + struct dlock_list_state *state) { /* * Find next entry @@ -226,8 +228,8 @@ static inline bool dlock_list_iterate_safe(struct dlock_list_head *head, } extern void dlock_list_add(struct dlock_list_node *node, - struct dlock_list_head *head); + struct dlock_list_head __percpu *head); extern void dlock_list_del(struct dlock_list_node *node); -extern int init_dlock_list_head(struct dlock_list_head **pdlock_head); +extern int init_dlock_list_head(struct dlock_list_head __percpu **pdlock_head); #endif /* __LINUX_DLOCK_LIST_H */ diff --git a/lib/dlock-list.c b/lib/dlock-list.c index 84d4623..e1a1930 100644 --- a/lib/dlock-list.c +++ b/lib/dlock-list.c @@ -27,7 +27,7 @@ static struct lock_class_key dlock_list_key; /* * Initialize the per-cpu list head */ -int init_dlock_list_head(struct dlock_list_head **pdlock_head) +int init_dlock_list_head(struct dlock_list_head __percpu **pdlock_head) { struct dlock_list_head *dlock_head; int cpu; @@ -53,7 +53,8 @@ int init_dlock_list_head(struct dlock_list_head **pdlock_head) * function is called. However, deletion may be done by a different CPU. * So we still need to use a lock to protect the content of the list. */ -void dlock_list_add(struct dlock_list_node *node, struct dlock_list_head *head) +void dlock_list_add(struct dlock_list_node *node, + struct dlock_list_head __percpu *head) { struct dlock_list_head *myhead; -- 1.7.1
[PATCH v2 1/7] lib/dlock-list: Distributed and lock-protected lists
Linked list is used everywhere in the Linux kernel. However, if many threads are trying to add or delete entries into the same linked list, it can create a performance bottleneck. This patch introduces a new list APIs that provide a set of distributed lists (one per CPU), each of which is protected by its own spinlock. To the callers, however, the set of lists acts like a single consolidated list. This allows list entries insertion and deletion operations to happen in parallel instead of being serialized with a global list and lock. List entry insertion is strictly per cpu. List deletion, however, can happen in a cpu other than the one that did the insertion. So we still need lock to protect the list. Because of that, there may still be a small amount of contention when deletion is being done. A new header file include/linux/dlock-list.h will be added with the associated dlock_list_head and dlock_list_node structures. The following functions are provided to manage the per-cpu list: 1. int init_dlock_list_head(struct dlock_list_head **pdlock_head) 2. void dlock_list_add(struct dlock_list_node *node, struct dlock_list_head *head) 3. void dlock_list_del(struct dlock_list *node) Iteration of all the list entries within a group of per-cpu lists is done by calling either the dlock_list_iterate() or dlock_list_iterate_safe() functions in a while loop. They correspond to the list_for_each_entry() and list_for_each_entry_safe() macros respectively. The iteration states are keep in a dlock_list_state structure that is passed to the iteration functions. Signed-off-by: Waiman Long Reviewed-by: Jan Kara --- include/linux/dlock-list.h | 233 lib/Makefile |2 +- lib/dlock-list.c | 100 +++ 3 files changed, 334 insertions(+), 1 deletions(-) create mode 100644 include/linux/dlock-list.h create mode 100644 lib/dlock-list.c diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h new file mode 100644 index 000..43355f8 --- /dev/null +++ b/include/linux/dlock-list.h @@ -0,0 +1,233 @@ +/* + * Distributed/locked list + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * (C) Copyright 2016 Hewlett-Packard Enterprise Development LP + * + * Authors: Waiman Long + */ +#ifndef __LINUX_DLOCK_LIST_H +#define __LINUX_DLOCK_LIST_H + +#include +#include +#include + +/* + * include/linux/dlock-list.h + * + * A distributed (per-cpu) set of lists each of which is protected by its + * own spinlock, but acts like a single consolidated list to the callers. + * + * The dlock_list_head structure contains the spinlock, the other + * dlock_list_node structures only contains a pointer to the spinlock in + * dlock_list_head. + */ +struct dlock_list_head { + struct list_head list; + spinlock_t lock; +}; + +#define DLOCK_LIST_HEAD_INIT(name) \ + { \ + .list.prev = ,\ + .list.next = ,\ + .list.lock = __SPIN_LOCK_UNLOCKED(name),\ + } + +/* + * Per-cpu list iteration state + */ +struct dlock_list_state { + int cpu; + spinlock_t *lock; + struct list_head*head; /* List head of current per-cpu list */ + struct dlock_list_node *curr; + struct dlock_list_node *next; +}; + +#define DLOCK_LIST_STATE_INIT()\ + { \ + .cpu = -1, \ + .lock = NULL, \ + .head = NULL, \ + .curr = NULL, \ + .next = NULL, \ + } + +#define DEFINE_DLOCK_LIST_STATE(s) \ + struct dlock_list_state s = DLOCK_LIST_STATE_INIT() + +static inline void init_dlock_list_state(struct dlock_list_state *state) +{ + state->cpu = -1; + state->lock = NULL; + state->head = NULL; + state->curr = NULL; + state->next = NULL; +} + +#ifdef CONFIG_DEBUG_SPINLOCK +#define DLOCK_LIST_WARN_ON(x) WARN_ON(x) +#else +#define DLOCK_LIST_WARN_ON(x) +#endif + +/* + * Next per-cpu list entry + */ +#define dlock_list_next_entry(pos, member) list_next_entry(pos, member.list) + +/* + * Per-cpu node data structure + */ +struct dlock_list_node { + struct
[RFC PATCH v2 6/7] lib/persubnode: Introducing a simple per-subnode APIs
The percpu APIs are extensively used in the Linux kernel to reduce cacheline contention and improve performance. For some use cases, the percpu APIs may be too fine-grain for distributed resources whereas a per-node based allocation may be too coarse as we can have dozens of CPUs in a NUMA node in some high-end systems. This patch introduces a simple per-subnode APIs where each of the distributed resources will be shared by only a handful of CPUs within a NUMA node. The per-subnode APIs are built on top of the percpu APIs and hence requires the same amount of memory as if the percpu APIs are used. However, it helps to reduce the total number of separate resources that needed to be managed. As a result, it can speed up code that need to iterate all the resources compared with using the percpu APIs. Cacheline contention, however, will increases slightly as each resource is shared by more than one CPU. As long as the number of CPUs in each subnode is small, the performance impact won't be significant. In this patch, at most 2 sibling groups can be put into a subnode. For an x86-64 CPU, at most 4 CPUs will be in a subnode when HT is enabled and 2 when it is not. Signed-off-by: Waiman Long --- include/linux/persubnode.h | 80 + init/main.c|2 + lib/Makefile |2 + lib/persubnode.c | 119 4 files changed, 203 insertions(+), 0 deletions(-) create mode 100644 include/linux/persubnode.h create mode 100644 lib/persubnode.c diff --git a/include/linux/persubnode.h b/include/linux/persubnode.h new file mode 100644 index 000..b777daa --- /dev/null +++ b/include/linux/persubnode.h @@ -0,0 +1,80 @@ +/* + * Per-subnode definitions + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * (C) Copyright 2016 Hewlett-Packard Enterprise Development LP + * + * Authors: Waiman Long + */ +#ifndef __LINUX_PERSUBNODE_H +#define __LINUX_PERSUBNODE_H + +#include +#include + +/* + * Per-subnode APIs + */ +#define __persubnode __percpu +#define nr_subnode_ids nr_cpu_ids +#define alloc_persubnode(type) alloc_percpu(type) +#define free_persubnode(var) free_percpu(var) +#define for_each_subnode(snode)for_each_cpu(snode, subnode_mask) +#define per_subnode_ptr(ptr, subnode) per_cpu_ptr(ptr, subnode) +#define per_subnode(var, subnode) per_cpu(var, subnode) + +#ifdef CONFIG_SMP + +extern struct cpumask __subnode_mask __read_mostly; +DECLARE_PER_CPU_READ_MOSTLY(int, cpu_subnode_id); + +#define subnode_mask (&__subnode_mask) + +static inline int this_cpu_to_subnode(void) +{ + return *this_cpu_ptr(_subnode_id); +} + +/* + * For safety, preemption should be disabled before using this_subnode_ptr(). + */ +#define this_subnode_ptr(ptr) \ +({ \ + int _snid = this_cpu_to_subnode(); \ + per_cpu_ptr(ptr, _snid);\ +}) + +#define get_subnode_ptr(ptr) \ +({ \ + preempt_disable(); \ + this_subnode_ptr(ptr); \ +}) + +#define put_subnode_ptr(ptr) \ +do { \ + (void)(ptr);\ + preempt_enable(); \ +} while (0) + +extern void __init subnode_early_init(void); + +#else /* CONFIG_SMP */ + +#define subnode_mask cpu_possible_mask +#define this_subnode_ptr(ptr) this_cpu_ptr(ptr) +#define get_subnode_ptr(ptr) get_cpu_ptr(ptr) +#define put_subnode_ptr(ptr) put_cpu_ptr(ptr) + +static inline void subnode_early_init(void) { } + +#endif /* CONFIG_SMP */ +#endif /* __LINUX_PERSUBNODE_H */ diff --git a/init/main.c b/init/main.c index 4c17fda..28e4425 100644 --- a/init/main.c +++ b/init/main.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include @@ -524,6 +525,7 @@ asmlinkage __visible void __init start_kernel(void) NULL, set_init_arg); jump_label_init(); + subnode_early_init(); /* * These use large bootmem allocations and must precede diff --git a/lib/Makefile b/lib/Makefile index 92e8c38..440152c 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -232,3 +232,5 @@ obj-$(CONFIG_UCS2_STRING) += ucs2_string.o obj-$(CONFIG_UBSAN) += ubsan.o UBSAN_SANITIZE_ubsan.o := n +
Re: [PATCH -next] bpf: make inode code explicitly non-modular
On 07/11/2016 06:51 PM, Paul Gortmaker wrote: The Kconfig currently controlling compilation of this code is: init/Kconfig:config BPF_SYSCALL init/Kconfig: bool "Enable bpf() system call" ...meaning that it currently is not being built as a module by anyone. Lets remove the couple traces of modular infrastructure use, so that when reading the driver there is no doubt it is builtin-only. Note that MODULE_ALIAS is a no-op for non-modular code. We replace module.h with init.h since the file does use __init. Cc: Alexei StarovoitovCc: net...@vger.kernel.org Signed-off-by: Paul Gortmaker (Patch is for net-next tree then.) Acked-by: Daniel Borkmann
Re: [PATCH -next] bpf: make inode code explicitly non-modular
On 07/11/2016 06:51 PM, Paul Gortmaker wrote: The Kconfig currently controlling compilation of this code is: init/Kconfig:config BPF_SYSCALL init/Kconfig: bool "Enable bpf() system call" ...meaning that it currently is not being built as a module by anyone. Lets remove the couple traces of modular infrastructure use, so that when reading the driver there is no doubt it is builtin-only. Note that MODULE_ALIAS is a no-op for non-modular code. We replace module.h with init.h since the file does use __init. Cc: Alexei Starovoitov Cc: net...@vger.kernel.org Signed-off-by: Paul Gortmaker (Patch is for net-next tree then.) Acked-by: Daniel Borkmann
[PATCH] staging: lustre: o2iblnd: iov fixes for kiblnd_send
With the move to iov_iter handling two issues merged for the ko2iblnd driver. The first fix address a simple typo of the wrong flag being used with iov_iter_kvec. The second fix adds the payload offset to the payload size. Signed-off-by: James Simmons--- drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index 3d597dc..437e149 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -1519,12 +1519,15 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) /* payload is either all vaddrs or all pages */ LASSERT(!(payload_kiov && payload_iov)); - if (payload_kiov) + if (payload_kiov) { iov_iter_bvec(, ITER_BVEC | WRITE, - payload_kiov, payload_niov, payload_nob); - else - iov_iter_kvec(, ITER_BVEC | WRITE, - payload_iov, payload_niov, payload_nob); + payload_kiov, payload_niov, + payload_nob + payload_offset); + } else { + iov_iter_kvec(, ITER_KVEC | WRITE, + payload_iov, payload_niov, + payload_nob + payload_offset); + } iov_iter_advance(, payload_offset); switch (type) { -- 2.7.4
[PATCH] staging: lustre: o2iblnd: iov fixes for kiblnd_send
With the move to iov_iter handling two issues merged for the ko2iblnd driver. The first fix address a simple typo of the wrong flag being used with iov_iter_kvec. The second fix adds the payload offset to the payload size. Signed-off-by: James Simmons --- drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index 3d597dc..437e149 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -1519,12 +1519,15 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) /* payload is either all vaddrs or all pages */ LASSERT(!(payload_kiov && payload_iov)); - if (payload_kiov) + if (payload_kiov) { iov_iter_bvec(, ITER_BVEC | WRITE, - payload_kiov, payload_niov, payload_nob); - else - iov_iter_kvec(, ITER_BVEC | WRITE, - payload_iov, payload_niov, payload_nob); + payload_kiov, payload_niov, + payload_nob + payload_offset); + } else { + iov_iter_kvec(, ITER_KVEC | WRITE, + payload_iov, payload_niov, + payload_nob + payload_offset); + } iov_iter_advance(, payload_offset); switch (type) { -- 2.7.4
Re: More parallel atomic_open/d_splice_alias fun with NFS and possibly more FSes.
> On Sun, Jul 10, 2016 at 07:14:18PM +0100, James Simmons wrote: > > > [ 111.210818] [] kiblnd_send+0x51d/0x9e0 [ko2iblnd] > > Mea culpa - in kiblnd_send() this > if (payload_kiov) > iov_iter_bvec(, ITER_BVEC | WRITE, > payload_kiov, payload_niov, payload_nob); > else > iov_iter_kvec(, ITER_BVEC | WRITE, > payload_iov, payload_niov, payload_nob); > should have s/BVEC/KVEC/ in the iov_iter_kvec() arguments. Cut'n'paste > braindamage... That is the fix. Also I believe payload_nob should be payload_nob + payload_offset instead. I will send a patch that against Oleg's tree that address these issues.
Re: More parallel atomic_open/d_splice_alias fun with NFS and possibly more FSes.
> On Sun, Jul 10, 2016 at 07:14:18PM +0100, James Simmons wrote: > > > [ 111.210818] [] kiblnd_send+0x51d/0x9e0 [ko2iblnd] > > Mea culpa - in kiblnd_send() this > if (payload_kiov) > iov_iter_bvec(, ITER_BVEC | WRITE, > payload_kiov, payload_niov, payload_nob); > else > iov_iter_kvec(, ITER_BVEC | WRITE, > payload_iov, payload_niov, payload_nob); > should have s/BVEC/KVEC/ in the iov_iter_kvec() arguments. Cut'n'paste > braindamage... That is the fix. Also I believe payload_nob should be payload_nob + payload_offset instead. I will send a patch that against Oleg's tree that address these issues.
Re: [PATCH 14/14] PCI: xgene: make it explicitly non-modular
On Sat, Jul 9, 2016 at 16:15 Paul Gortmakerwrote: > > [Re: [PATCH 14/14] PCI: xgene: make it explicitly non-modular] On 07/07/2016 > (Thu 15:42) Duc Dang wrote: > > > On Thu, Jul 7, 2016 at 3:35 PM, Tanmay Inamdar wrote: > > > > > > > > > On Sat, Jul 2, 2016 at 4:13 PM, Paul Gortmaker > > > wrote: > > >> > > >> The Kconfig currently controlling compilation of this code is: > > >> > > >> drivers/pci/host/Kconfig:config PCI_XGENE > > >> drivers/pci/host/Kconfig: bool "X-Gene PCIe controller" > > >> > > >> ...meaning that it currently is not being built as a module by anyone. > > >> > > >> Lets remove the few trace uses of modular code and macros, so that > > >> when reading the driver there is no doubt it is builtin-only. > > >> > > >> Since module_platform_driver() uses the same init level priority as > > >> builtin_platform_driver() the init ordering remains unchanged with > > >> this commit. > > >> > > >> We also delete the MODULE_LICENSE tag etc. since all that information > > >> is already contained at the top of the file in the comments. > > >> > > >> Cc: Tanmay Inamdar > > >> Cc: Bjorn Helgaas > > >> Cc: linux-...@vger.kernel.org > > >> Signed-off-by: Paul Gortmaker > > > > Thanks for taking care of this, Paul. > > > > I tested your patch and it worked fine on my X-Gene Mustang board. > > > > One minor comment below. > > > > >> --- > > >> drivers/pci/host/pci-xgene.c | 8 ++-- > > >> 1 file changed, 2 insertions(+), 6 deletions(-) > > >> > > >> diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c > > >> index 7eb20cc76dd3..a81273c23341 100644 > > >> --- a/drivers/pci/host/pci-xgene.c > > >> +++ b/drivers/pci/host/pci-xgene.c > > >> @@ -21,7 +21,7 @@ > > >> #include > > >> #include > > >> #include > > >> -#include > > >> +#include > > > > The platform_device.h already has builtin_platform_driver macro > > defined. So this init.h is not need? > > If you look, you will find that platform_device.h does not include the > init.h even though it references __init; it can do this w/o error since > all the references themselves are in a macro. However once code wants > to be a consumer of those macros, they will need init.h present. Often > you can overlook directly calling it out for inclusion since it gets > sourced by another header, but it is best policy to list what gets used. Ah, got it. Thanks, Paul! > > Thanks for testing! > > Paul. > -- > > > > > >> #include > > >> #include > > >> #include > > >> @@ -579,8 +579,4 @@ static struct platform_driver xgene_pcie_driver = { > > >> }, > > >> .probe = xgene_pcie_probe_bridge, > > >> }; > > >> -module_platform_driver(xgene_pcie_driver); > > >> - > > >> -MODULE_AUTHOR("Tanmay Inamdar "); > > >> -MODULE_DESCRIPTION("APM X-Gene PCIe driver"); > > >> -MODULE_LICENSE("GPL v2"); > > >> +builtin_platform_driver(xgene_pcie_driver); > > > > > > > > > Copying Duc. > > >> > > >> -- > > >> 2.8.4 > > >> > > > > > Regards, > > Duc Dang.
Re: [PATCH 14/14] PCI: xgene: make it explicitly non-modular
On Sat, Jul 9, 2016 at 16:15 Paul Gortmaker wrote: > > [Re: [PATCH 14/14] PCI: xgene: make it explicitly non-modular] On 07/07/2016 > (Thu 15:42) Duc Dang wrote: > > > On Thu, Jul 7, 2016 at 3:35 PM, Tanmay Inamdar wrote: > > > > > > > > > On Sat, Jul 2, 2016 at 4:13 PM, Paul Gortmaker > > > wrote: > > >> > > >> The Kconfig currently controlling compilation of this code is: > > >> > > >> drivers/pci/host/Kconfig:config PCI_XGENE > > >> drivers/pci/host/Kconfig: bool "X-Gene PCIe controller" > > >> > > >> ...meaning that it currently is not being built as a module by anyone. > > >> > > >> Lets remove the few trace uses of modular code and macros, so that > > >> when reading the driver there is no doubt it is builtin-only. > > >> > > >> Since module_platform_driver() uses the same init level priority as > > >> builtin_platform_driver() the init ordering remains unchanged with > > >> this commit. > > >> > > >> We also delete the MODULE_LICENSE tag etc. since all that information > > >> is already contained at the top of the file in the comments. > > >> > > >> Cc: Tanmay Inamdar > > >> Cc: Bjorn Helgaas > > >> Cc: linux-...@vger.kernel.org > > >> Signed-off-by: Paul Gortmaker > > > > Thanks for taking care of this, Paul. > > > > I tested your patch and it worked fine on my X-Gene Mustang board. > > > > One minor comment below. > > > > >> --- > > >> drivers/pci/host/pci-xgene.c | 8 ++-- > > >> 1 file changed, 2 insertions(+), 6 deletions(-) > > >> > > >> diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c > > >> index 7eb20cc76dd3..a81273c23341 100644 > > >> --- a/drivers/pci/host/pci-xgene.c > > >> +++ b/drivers/pci/host/pci-xgene.c > > >> @@ -21,7 +21,7 @@ > > >> #include > > >> #include > > >> #include > > >> -#include > > >> +#include > > > > The platform_device.h already has builtin_platform_driver macro > > defined. So this init.h is not need? > > If you look, you will find that platform_device.h does not include the > init.h even though it references __init; it can do this w/o error since > all the references themselves are in a macro. However once code wants > to be a consumer of those macros, they will need init.h present. Often > you can overlook directly calling it out for inclusion since it gets > sourced by another header, but it is best policy to list what gets used. Ah, got it. Thanks, Paul! > > Thanks for testing! > > Paul. > -- > > > > > >> #include > > >> #include > > >> #include > > >> @@ -579,8 +579,4 @@ static struct platform_driver xgene_pcie_driver = { > > >> }, > > >> .probe = xgene_pcie_probe_bridge, > > >> }; > > >> -module_platform_driver(xgene_pcie_driver); > > >> - > > >> -MODULE_AUTHOR("Tanmay Inamdar "); > > >> -MODULE_DESCRIPTION("APM X-Gene PCIe driver"); > > >> -MODULE_LICENSE("GPL v2"); > > >> +builtin_platform_driver(xgene_pcie_driver); > > > > > > > > > Copying Duc. > > >> > > >> -- > > >> 2.8.4 > > >> > > > > > Regards, > > Duc Dang.
Re: [PATCH] capabilities: audit capability use
Hello, On Mon, Jul 11, 2016 at 02:14:31PM +0300, Topi Miettinen wrote: > [ 28.443674] audit: type=1327 audit(1468234333.144:520): > proctitle=6D6B6E6F64002F6465762F7A5F343639006300310032 > [ 28.465888] audit: type=1330 audit(1468234333.144:520): > cap_used=0800 > [ 28.482080] audit: type=1331 audit(1468234333.144:520): cgroups=:/test; Please don't put additions of the two different audit types into one patch and I don't think the cgroup audit logging makes much sense. Without logging all migrations, it doesn't help auditing all that much. Also, printing all cgroup membership like that can be problematic for audit it can be arbitrarily long. Thanks. -- tejun
Re: [PATCH] capabilities: audit capability use
Hello, On Mon, Jul 11, 2016 at 02:14:31PM +0300, Topi Miettinen wrote: > [ 28.443674] audit: type=1327 audit(1468234333.144:520): > proctitle=6D6B6E6F64002F6465762F7A5F343639006300310032 > [ 28.465888] audit: type=1330 audit(1468234333.144:520): > cap_used=0800 > [ 28.482080] audit: type=1331 audit(1468234333.144:520): cgroups=:/test; Please don't put additions of the two different audit types into one patch and I don't think the cgroup audit logging makes much sense. Without logging all migrations, it doesn't help auditing all that much. Also, printing all cgroup membership like that can be problematic for audit it can be arbitrarily long. Thanks. -- tejun
Re: [PATCH] spi: spi-ti-qspi: clear wlen field while setting word length.
On Mon, Jul 11, 2016 at 3:53 PM, Vignesh Rwrote: > > > On Monday 11 July 2016 02:49 PM, prahlad venkata wrote: >> On Mon, Jul 11, 2016 at 2:45 PM, Vignesh R wrote: > [...] > diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c > index 29ea8d2..6c61f54 100644 > --- a/drivers/spi/spi-ti-qspi.c > +++ b/drivers/spi/spi-ti-qspi.c > @@ -276,9 +276,9 @@ static int qspi_write_msg(struct ti_qspi *qspi, > struct spi_transfer *t, > cmd |= QSPI_WLEN(QSPI_WLEN_MAX_BITS); > } else { > writeb(*txbuf, qspi->base + > QSPI_SPI_DATA_REG); > - cmd = qspi->cmd | QSPI_WR_SNGL; >> >> This is wrong. Deleting this line means QSPI_WR_SNGL is not set and no >> data is sent out on the wire. > QSPI_WR_SNGL is already set as soon as we enter the function. >> qspi->cmd always has WLEN field cleared and set to WLEN = 1 byte (see ti_qspi_start_transfer_one()). And hence variable 'cmd' will also have WLEN set to 1 byte. >>> Even though WLEN=1 is set in the ti_qspi_transfer_one, if we ask for a >>> transfer of large data, >>> say 300 bytes in length, for attaining faster data rate WLEN 128 is >>> selected for the first two >>> transactions and remaining 44 bytes will be transmitted with WLEN 1. >>> During that case, >>> WLEN will be changed inside qspi_write_msg function itself and the >>> field should be cleared >>> first while doing that. >> >> In qspi_write_msg(), qspi->cmd will always have WLEN set to >> QSPI_WLEN(t->bits_per_word) and qspi->cmd is never changed within this >> function. >> It is the value of local variable 'cmd' that is changed to appropriate >> WLEN (128bit or 8bit) as necessary. >>> 'cmd' is written back to qspi->cmd for every transaction. >>> >>> You mean qspi->cmd = cmd ? >>> I don't see this happening anywhere in the driver. Can you point me to >>> that line of code? >> line 296: >> ti_qspi_write(qspi, cmd, QSPI_SPI_CMD_REG); > > Sorry, I don't understand. QSPI_SPI_CMD_REG and qspi->cmd are different. > qspi->cmd does not represent the QSPI_SPI_CMD_REG register. qspi->cmd is > just local driver data for book-keeping. > > Please add some prints in driver to see how 'cmd' (and qspi->cmd) > variable changes in case of 128bit mode and 8bit mode. I don't have hardware setup to verify this. Is there anyway to verify this without hardware? > > Regards > Vignesh > > > -- > Regards > Vignesh -- Regards, Prahlad.
Re: [PATCH] spi: spi-ti-qspi: clear wlen field while setting word length.
On Mon, Jul 11, 2016 at 3:53 PM, Vignesh R wrote: > > > On Monday 11 July 2016 02:49 PM, prahlad venkata wrote: >> On Mon, Jul 11, 2016 at 2:45 PM, Vignesh R wrote: > [...] > diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c > index 29ea8d2..6c61f54 100644 > --- a/drivers/spi/spi-ti-qspi.c > +++ b/drivers/spi/spi-ti-qspi.c > @@ -276,9 +276,9 @@ static int qspi_write_msg(struct ti_qspi *qspi, > struct spi_transfer *t, > cmd |= QSPI_WLEN(QSPI_WLEN_MAX_BITS); > } else { > writeb(*txbuf, qspi->base + > QSPI_SPI_DATA_REG); > - cmd = qspi->cmd | QSPI_WR_SNGL; >> >> This is wrong. Deleting this line means QSPI_WR_SNGL is not set and no >> data is sent out on the wire. > QSPI_WR_SNGL is already set as soon as we enter the function. >> qspi->cmd always has WLEN field cleared and set to WLEN = 1 byte (see ti_qspi_start_transfer_one()). And hence variable 'cmd' will also have WLEN set to 1 byte. >>> Even though WLEN=1 is set in the ti_qspi_transfer_one, if we ask for a >>> transfer of large data, >>> say 300 bytes in length, for attaining faster data rate WLEN 128 is >>> selected for the first two >>> transactions and remaining 44 bytes will be transmitted with WLEN 1. >>> During that case, >>> WLEN will be changed inside qspi_write_msg function itself and the >>> field should be cleared >>> first while doing that. >> >> In qspi_write_msg(), qspi->cmd will always have WLEN set to >> QSPI_WLEN(t->bits_per_word) and qspi->cmd is never changed within this >> function. >> It is the value of local variable 'cmd' that is changed to appropriate >> WLEN (128bit or 8bit) as necessary. >>> 'cmd' is written back to qspi->cmd for every transaction. >>> >>> You mean qspi->cmd = cmd ? >>> I don't see this happening anywhere in the driver. Can you point me to >>> that line of code? >> line 296: >> ti_qspi_write(qspi, cmd, QSPI_SPI_CMD_REG); > > Sorry, I don't understand. QSPI_SPI_CMD_REG and qspi->cmd are different. > qspi->cmd does not represent the QSPI_SPI_CMD_REG register. qspi->cmd is > just local driver data for book-keeping. > > Please add some prints in driver to see how 'cmd' (and qspi->cmd) > variable changes in case of 128bit mode and 8bit mode. I don't have hardware setup to verify this. Is there anyway to verify this without hardware? > > Regards > Vignesh > > > -- > Regards > Vignesh -- Regards, Prahlad.
Re: Resend: Another 4.4 to 4.5 floppy issue
On 07/11/2016 11:36 AM, Jiri Kosina wrote: On Tue, 5 Jul 2016, Mark Hounschell wrote: From: Jiri KosinaCommit 09954bad4 ("floppy: refactor open() flags handling"), as a side-effect, causes open(/dev/fdX, O_ACCMODE) to fail. It turns out that this is being used setfdprm userspace for ioctl-only open(). Reintroduce back the original behavior wrt !(FMODE_READ|FMODE_WRITE) modes, while still keeping the original O_NDELAY bug fixed. Cc: sta...@vger.kernel.org # v4.5+ Reported-by: Wim Osterholt Tested-by: Wim Osterholt Signed-off-by: Jiri Kosina --- [ ... snip ... ] But this does not completely fix all the problems induced by the original changes from 4.4 to 4.5. The following is what we use to open the floppy. fd = open(device, O_RDWR | O_NDELAY); The FMODE_NDELAY check that was removed now prevents one from doing an open of the device with no media inserted. It also prevents one from doing an open of the device with media inserted that is not already formatted in a "standard" format. I do both of these things a lot. I deal with a few very non-standard formats and this change prevents me from doing what I've been doing for YEARS. Could we please get the original behavior back in the floppy driver. Hi Mark, thanks for the regression report. For my better understanding of your issue -- what behavior/semantics exactly does your userspace think it'll be getting from opening /dev/fd0 with O_NDELAY? Thanks, Hi Jiri. Well, all that was specified in my original post. I can no longer open the floppy drive with no floppy media inserted. Worse, I can also no longer open a floppy with media inserted that is not a "linux" recognized format. A floppy drive is a removable media device and should be treated as such. The original implementation of the O_NDELAY flag allowed it to be. Any removable media device should be capable of being opened with no, or even unrecognizable media installed. The kernel and its utilities should not "assume" to much when it comes to removable media. Consider a SCSI tape drive or even a removable media SCSI disk drive. How would you explain an open failure to someone trying to open a SCSI tape drive that had no tape or even a "non-tar" formatted tape media in it??? Or better yet, trying to open a removable media device the was write protected but didn't include O_RDONLY in the open? The original behavior of the floppy driver was correct. I have no idea what BUG these changes were supposed to fix but the "fix" obviously broke user land. Was this bug reported by some new ROBOT test or something? The kernel floppy driver has been stable for years now so I am really confused as to why these changes were induced. As for the "O_RDONLY | O_WRONLY" thing you decided to change back, which I'm happy to see, was wrong. Almost ALL removable media devices have W/R protection built into the media. For ever, I understood that it was MY responsibility to write protect my removable media. An open of a removable device should never even care about that stuff. It is the users responsibility. We use extensively, the FDRAWCMD ioctl API. It is totally borked now for us without maintaining our own kernel patch that reverts the changes from 4.4 to 4.5. Regards Mark
Re: Resend: Another 4.4 to 4.5 floppy issue
On 07/11/2016 11:36 AM, Jiri Kosina wrote: On Tue, 5 Jul 2016, Mark Hounschell wrote: From: Jiri Kosina Commit 09954bad4 ("floppy: refactor open() flags handling"), as a side-effect, causes open(/dev/fdX, O_ACCMODE) to fail. It turns out that this is being used setfdprm userspace for ioctl-only open(). Reintroduce back the original behavior wrt !(FMODE_READ|FMODE_WRITE) modes, while still keeping the original O_NDELAY bug fixed. Cc: sta...@vger.kernel.org # v4.5+ Reported-by: Wim Osterholt Tested-by: Wim Osterholt Signed-off-by: Jiri Kosina --- [ ... snip ... ] But this does not completely fix all the problems induced by the original changes from 4.4 to 4.5. The following is what we use to open the floppy. fd = open(device, O_RDWR | O_NDELAY); The FMODE_NDELAY check that was removed now prevents one from doing an open of the device with no media inserted. It also prevents one from doing an open of the device with media inserted that is not already formatted in a "standard" format. I do both of these things a lot. I deal with a few very non-standard formats and this change prevents me from doing what I've been doing for YEARS. Could we please get the original behavior back in the floppy driver. Hi Mark, thanks for the regression report. For my better understanding of your issue -- what behavior/semantics exactly does your userspace think it'll be getting from opening /dev/fd0 with O_NDELAY? Thanks, Hi Jiri. Well, all that was specified in my original post. I can no longer open the floppy drive with no floppy media inserted. Worse, I can also no longer open a floppy with media inserted that is not a "linux" recognized format. A floppy drive is a removable media device and should be treated as such. The original implementation of the O_NDELAY flag allowed it to be. Any removable media device should be capable of being opened with no, or even unrecognizable media installed. The kernel and its utilities should not "assume" to much when it comes to removable media. Consider a SCSI tape drive or even a removable media SCSI disk drive. How would you explain an open failure to someone trying to open a SCSI tape drive that had no tape or even a "non-tar" formatted tape media in it??? Or better yet, trying to open a removable media device the was write protected but didn't include O_RDONLY in the open? The original behavior of the floppy driver was correct. I have no idea what BUG these changes were supposed to fix but the "fix" obviously broke user land. Was this bug reported by some new ROBOT test or something? The kernel floppy driver has been stable for years now so I am really confused as to why these changes were induced. As for the "O_RDONLY | O_WRONLY" thing you decided to change back, which I'm happy to see, was wrong. Almost ALL removable media devices have W/R protection built into the media. For ever, I understood that it was MY responsibility to write protect my removable media. An open of a removable device should never even care about that stuff. It is the users responsibility. We use extensively, the FDRAWCMD ioctl API. It is totally borked now for us without maintaining our own kernel patch that reverts the changes from 4.4 to 4.5. Regards Mark
Re: [PATCH 3/3] time: drop local_irq_save/restore from irqtime_account_irq
On 11/07/2016 18:53, r...@redhat.com wrote: > From: Rik van Riel> > Paolo pointed out that irqs are already blocked when irqtime_account_irq > is called. That means there is no reason to call local_irq_save/restore > again. > > Signed-off-by: Rik van Riel > Suggested-by: Paolo Bonzini > --- > kernel/sched/cputime.c | 4 > 1 file changed, 4 deletions(-) > > diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c > index ca7e33cb0967..7b6fa4d7ad4c 100644 > --- a/kernel/sched/cputime.c > +++ b/kernel/sched/cputime.c > @@ -49,15 +49,12 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq); > */ > void irqtime_account_irq(struct task_struct *curr) > { > - unsigned long flags; > s64 delta; > int cpu; > > if (!sched_clock_irqtime) > return; > > - local_irq_save(flags); > - > cpu = smp_processor_id(); > delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); > __this_cpu_add(irq_start_time, delta); > @@ -75,7 +72,6 @@ void irqtime_account_irq(struct task_struct *curr) > __this_cpu_add(cpu_softirq_time, delta); > > irq_time_write_end(); > - local_irq_restore(flags); > } > EXPORT_SYMBOL_GPL(irqtime_account_irq); > > Reviewed-by: Paolo Bonzini
Re: [PATCH 3/3] time: drop local_irq_save/restore from irqtime_account_irq
On 11/07/2016 18:53, r...@redhat.com wrote: > From: Rik van Riel > > Paolo pointed out that irqs are already blocked when irqtime_account_irq > is called. That means there is no reason to call local_irq_save/restore > again. > > Signed-off-by: Rik van Riel > Suggested-by: Paolo Bonzini > --- > kernel/sched/cputime.c | 4 > 1 file changed, 4 deletions(-) > > diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c > index ca7e33cb0967..7b6fa4d7ad4c 100644 > --- a/kernel/sched/cputime.c > +++ b/kernel/sched/cputime.c > @@ -49,15 +49,12 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq); > */ > void irqtime_account_irq(struct task_struct *curr) > { > - unsigned long flags; > s64 delta; > int cpu; > > if (!sched_clock_irqtime) > return; > > - local_irq_save(flags); > - > cpu = smp_processor_id(); > delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); > __this_cpu_add(irq_start_time, delta); > @@ -75,7 +72,6 @@ void irqtime_account_irq(struct task_struct *curr) > __this_cpu_add(cpu_softirq_time, delta); > > irq_time_write_end(); > - local_irq_restore(flags); > } > EXPORT_SYMBOL_GPL(irqtime_account_irq); > > Reviewed-by: Paolo Bonzini
Re: [PATCH] Input: /input/mouse/elan_i2c_core.c Fix some Asus touchapod which casue TP no funciton sometimes, the patch detect some specific touchpad and run a special initialize
On Mon, Jul 11, 2016 at 08:40:58PM +0800, 廖崇榮 wrote: > > + > > + error = data->ops->get_sm_version(client, >ic_type, > > + >sm_version); > > + if (error) > > + return false; > > That means we'd be fetching product ID and IC type twice when initializing > the device. Can we come with a way to do it once? > [KT]:Because the elan_query_device_info() is behind the elan_initialize(). > That's why I fetching product ID and IC type in the elan_initialize() > I will discuss with FW team and then execute elan_query_device_info() > first to get product_id and ic_type. We might need to split fetching product ID and IC type form the rest of the device info. Thanks. -- Dmitry
Re: [PATCH] Input: /input/mouse/elan_i2c_core.c Fix some Asus touchapod which casue TP no funciton sometimes, the patch detect some specific touchpad and run a special initialize
On Mon, Jul 11, 2016 at 08:40:58PM +0800, 廖崇榮 wrote: > > + > > + error = data->ops->get_sm_version(client, >ic_type, > > + >sm_version); > > + if (error) > > + return false; > > That means we'd be fetching product ID and IC type twice when initializing > the device. Can we come with a way to do it once? > [KT]:Because the elan_query_device_info() is behind the elan_initialize(). > That's why I fetching product ID and IC type in the elan_initialize() > I will discuss with FW team and then execute elan_query_device_info() > first to get product_id and ic_type. We might need to split fetching product ID and IC type form the rest of the device info. Thanks. -- Dmitry
Re: [PATCH 3/7] security,overlayfs: Provide security hook for copy up of xattrs for overlay file
On Mon, Jul 11, 2016 at 11:31:47AM -0400, Stephen Smalley wrote: [..] > > +static inline int security_inode_copy_up_xattr(const char *name) > > +{ > > + -EOPNOTSUPP; > > return? Yes, this one I fixed it in my patches now. kbuild also flagged this. Vivek
Re: [PATCH 3/7] security,overlayfs: Provide security hook for copy up of xattrs for overlay file
On Mon, Jul 11, 2016 at 11:31:47AM -0400, Stephen Smalley wrote: [..] > > +static inline int security_inode_copy_up_xattr(const char *name) > > +{ > > + -EOPNOTSUPP; > > return? Yes, this one I fixed it in my patches now. kbuild also flagged this. Vivek
Re: [PATCH RESEND] iwlwifi, Do not implement thermal zone unless ucode is loaded
On 07/11/2016 12:07 PM, Coelho, Luciano wrote: > On Mon, 2016-07-11 at 11:18 -0400, Prarit Bhargava wrote: >> Didn't get any feedback or review comments on this patch. Resending >> ... >> >> P. > > Sorry, this got flooded down my inbox. NP, Luciano -- My worry was that it hadn't been seen or didn't make it out to the list. I'm being a bit impatient too ;) P. > > >> ---8<--- >> >> The iwlwifi driver implements a thermal zone and hwmon device, but >> returns -EIO on temperature reads if the firmware isn't loaded. This >> results in the error >> >> iwlwifi-virtual-0 >> Adapter: Virtual device >> ERROR: Can't get value of subfeature temp1_input: I/O error >> temp1:N/A >> >> being output when using sensors from the lm-sensors package. Since >> the temperature cannot be read unless the ucode is loaded there is no >> reason to add the interface only to have it return an error 100% of >> the time. >> >> This patch moves the firmware check to >> iwl_mvm_thermal_zone_register() and >> stops the thermal zone from being created if the ucode hasn't been >> loaded. >> >> Signed-off-by: Prarit Bhargava>> Cc: Johannes Berg >> Cc: Emmanuel Grumbach >> Cc: Luca Coelho >> Cc: Intel Linux Wireless >> Cc: Kalle Valo >> Cc: Chaya Rachel Ivgi >> Cc: Sara Sharon >> Cc: linux-wirel...@vger.kernel.org >> Cc: net...@vger.kernel.org >> --- > > I have now sent it for review on our internal tree. > > -- > Luca. >
Re: [PATCH RESEND] iwlwifi, Do not implement thermal zone unless ucode is loaded
On 07/11/2016 12:07 PM, Coelho, Luciano wrote: > On Mon, 2016-07-11 at 11:18 -0400, Prarit Bhargava wrote: >> Didn't get any feedback or review comments on this patch. Resending >> ... >> >> P. > > Sorry, this got flooded down my inbox. NP, Luciano -- My worry was that it hadn't been seen or didn't make it out to the list. I'm being a bit impatient too ;) P. > > >> ---8<--- >> >> The iwlwifi driver implements a thermal zone and hwmon device, but >> returns -EIO on temperature reads if the firmware isn't loaded. This >> results in the error >> >> iwlwifi-virtual-0 >> Adapter: Virtual device >> ERROR: Can't get value of subfeature temp1_input: I/O error >> temp1:N/A >> >> being output when using sensors from the lm-sensors package. Since >> the temperature cannot be read unless the ucode is loaded there is no >> reason to add the interface only to have it return an error 100% of >> the time. >> >> This patch moves the firmware check to >> iwl_mvm_thermal_zone_register() and >> stops the thermal zone from being created if the ucode hasn't been >> loaded. >> >> Signed-off-by: Prarit Bhargava >> Cc: Johannes Berg >> Cc: Emmanuel Grumbach >> Cc: Luca Coelho >> Cc: Intel Linux Wireless >> Cc: Kalle Valo >> Cc: Chaya Rachel Ivgi >> Cc: Sara Sharon >> Cc: linux-wirel...@vger.kernel.org >> Cc: net...@vger.kernel.org >> --- > > I have now sent it for review on our internal tree. > > -- > Luca. >
Re: [kernel-hardening] Re: [PATCH v3 06/13] fork: Add generic vmalloced stack support
2016-06-21 21:32 GMT+03:00 Rik van Riel: > On Tue, 2016-06-21 at 10:13 -0700, Kees Cook wrote: >> On Tue, Jun 21, 2016 at 9:59 AM, Andy Lutomirski > > wrote: >> > >> > I'm tempted to explicitly disallow VM_NO_GUARD in the vmalloc >> > range. >> > It has no in-tree users for non-fixed addresses right now. >> What about the lack of pre-range guard page? That seems like a >> critical feature for this. :) > > If VM_NO_GUARD is disallowed, and every vmalloc area has > a guard area behind it, then every subsequent vmalloc area > will have a guard page ahead of it. > > I think disallowing VM_NO_GUARD will be all that is required. > VM_NO_GUARD is a flag of vm_struct. But some vmalloc areas don't have vm_struct (see vm_map_ram()) and don't have guard pages too. Once, vm_map_ram() had guard pages, but they were removed in 248ac0e1943a ("mm/vmalloc: remove guard page from between vmap blocks") due to exhaustion of vmalloc space on 32-bits. I guess we can resurrect guard page on 64bits without any problems. AFAICS per-cpu vmap blocks also don't have guard pages. pcpu vmaps have vm_struct *without* VM_NO_GUARD, but don't actually have the guard pages. It seems to be a harmless bug, because pcpu vmaps use their own alloc/free paths (pcp_get_vm_areas()/pcpu_free_vm_areas()) and just don't care about vm->flags content. Fortunately, pcpu_get_vm_areas() allocates from top of vmalloc, so the gap between pcpu vmap and regular vmalloc() should be huge. > The only thing we may want to verify on the architectures that > we care about is that there is nothing mapped immediately before > the start of the vmalloc range, otherwise the first vmalloced > area will not have a guard page below it. > > I suspect all the 64 bit architectures are fine in that regard, > with enormous gaps between kernel memory ranges. > > -- > All Rights Reversed. >
Re: [kernel-hardening] Re: [PATCH v3 06/13] fork: Add generic vmalloced stack support
2016-06-21 21:32 GMT+03:00 Rik van Riel : > On Tue, 2016-06-21 at 10:13 -0700, Kees Cook wrote: >> On Tue, Jun 21, 2016 at 9:59 AM, Andy Lutomirski > > wrote: >> > >> > I'm tempted to explicitly disallow VM_NO_GUARD in the vmalloc >> > range. >> > It has no in-tree users for non-fixed addresses right now. >> What about the lack of pre-range guard page? That seems like a >> critical feature for this. :) > > If VM_NO_GUARD is disallowed, and every vmalloc area has > a guard area behind it, then every subsequent vmalloc area > will have a guard page ahead of it. > > I think disallowing VM_NO_GUARD will be all that is required. > VM_NO_GUARD is a flag of vm_struct. But some vmalloc areas don't have vm_struct (see vm_map_ram()) and don't have guard pages too. Once, vm_map_ram() had guard pages, but they were removed in 248ac0e1943a ("mm/vmalloc: remove guard page from between vmap blocks") due to exhaustion of vmalloc space on 32-bits. I guess we can resurrect guard page on 64bits without any problems. AFAICS per-cpu vmap blocks also don't have guard pages. pcpu vmaps have vm_struct *without* VM_NO_GUARD, but don't actually have the guard pages. It seems to be a harmless bug, because pcpu vmaps use their own alloc/free paths (pcp_get_vm_areas()/pcpu_free_vm_areas()) and just don't care about vm->flags content. Fortunately, pcpu_get_vm_areas() allocates from top of vmalloc, so the gap between pcpu vmap and regular vmalloc() should be huge. > The only thing we may want to verify on the architectures that > we care about is that there is nothing mapped immediately before > the start of the vmalloc range, otherwise the first vmalloced > area will not have a guard page below it. > > I suspect all the 64 bit architectures are fine in that regard, > with enormous gaps between kernel memory ranges. > > -- > All Rights Reversed. >
Re: [PATCH 3/7] security,overlayfs: Provide security hook for copy up of xattrs for overlay file
On Mon, Jul 11, 2016 at 11:31:47AM -0400, Stephen Smalley wrote: > On 07/08/2016 12:19 PM, Vivek Goyal wrote: > > Provide a security hook which is called when xattrs of a file are being > > copied up. This hook is called once for each xattr and LSM can return 0 > > to access the xattr, 1 to reject xattr, -EOPNOTSUPP if none of the lsms > > claim to know xattr and a negative error code if something went terribly > > wrong. > > 0 if the security module wants the xattr to be copied up, 1 if the > security module wants the xattr to be discarded on the copy, -EOPNOTSUPP > if the security module does not handle/manage the xattr, or a -errno > upon an error. Ok, will change the description. > > > > > If 0 or -EOPNOTSUPP is returned, xattr will be copied up, if 1 is returned, > > xattr will not be copied up and if negative error code is returned, copy up > > will be aborted. > > Not sure I understand the benefit of the 0 vs -EOPNOTSUPP distinction. I am not sure either. Casey wanted to have four states so I introduced it. Thanks Vivek
Re: [PATCH 3/7] security,overlayfs: Provide security hook for copy up of xattrs for overlay file
On Mon, Jul 11, 2016 at 11:31:47AM -0400, Stephen Smalley wrote: > On 07/08/2016 12:19 PM, Vivek Goyal wrote: > > Provide a security hook which is called when xattrs of a file are being > > copied up. This hook is called once for each xattr and LSM can return 0 > > to access the xattr, 1 to reject xattr, -EOPNOTSUPP if none of the lsms > > claim to know xattr and a negative error code if something went terribly > > wrong. > > 0 if the security module wants the xattr to be copied up, 1 if the > security module wants the xattr to be discarded on the copy, -EOPNOTSUPP > if the security module does not handle/manage the xattr, or a -errno > upon an error. Ok, will change the description. > > > > > If 0 or -EOPNOTSUPP is returned, xattr will be copied up, if 1 is returned, > > xattr will not be copied up and if negative error code is returned, copy up > > will be aborted. > > Not sure I understand the benefit of the 0 vs -EOPNOTSUPP distinction. I am not sure either. Casey wanted to have four states so I introduced it. Thanks Vivek
Re: [PATCH 2/2] trace-cmd: Use tracecmd_peek_next_data() in fgraph_ent_handler
On Fri, 8 Jul 2016 14:56:12 +0900 Namhyung Kimwrote: > When a task was migrated to other cpu in the middle of a function, the > fgraph_exit record will be in a different cpu than the enter record. > But currently fgraph_ent_handler() only peeks at the same cpu so it > could read an incorrect record. > > For example, please see following raw records: > > bash-10478 [007]73.454273: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [000]73.454650: funcgraph_exit:func=0x8123bf90 > calltime=0x111a37483c rettime=0x111a3d0285 overrun=0x0 depth=0 > bash-10478 [000]74.456383: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [000]74.456655: funcgraph_exit:func=0x8123bf90 > calltime=0x1155f24337 rettime=0x1155f66559 overrun=0x0 depth=0 > bash-10478 [000]75.458517: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [001]75.458849: funcgraph_exit:func=0x8123bf90 > calltime=0x1191ad9de0 rettime=0x1191b2a6aa overrun=0x0 depth=0 > bash-10478 [001]76.460482: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [000]76.460679: funcgraph_exit:func=0x8123bf90 > calltime=0x11cd6662b4 rettime=0x11cd695e03 overrun=0x0 depth=0 > bash-10478 [000]77.462334: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [004]77.462564: funcgraph_exit:func=0x8123bf90 > calltime=0x12091d71c4 rettime=0x120920e977 overrun=0x0 depth=0 > bash-10478 [004]78.464315: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [001]78.464644: funcgraph_exit:func=0x8123bf90 > calltime=0x1244d674de rettime=0x1244db7329 overrun=0x0 depth=0 > bash-10478 [001]79.466018: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [004]79.466326: funcgraph_exit:func=0x8123bf90 > calltime=0x12808b3940 rettime=0x12808fe819 overrun=0x0 depth=0 > bash-10478 [004]80.468005: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [002]80.468291: funcgraph_exit:func=0x8123bf90 > calltime=0x12bc44551f rettime=0x12bc48ac9a overrun=0x0 depth=0 > bash-10478 [002]81.469718: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [007]81.470088: funcgraph_exit:func=0x8123bf90 > calltime=0x12f7f945b8 rettime=0x12f7fee028 overrun=0x0 depth=0 > > The first entry was call to cma_alloc function, it was on cpu 7 but the > task was migrated to cpu 0 before returning from the function. > Currently trace-cmd shows like below: > > bash-10478 [007]73.454273: funcgraph_entry: ! 367.216 us | > cma_alloc(); > bash-10478 [000]73.454650: funcgraph_exit: ! 375.369 us | } > bash-10478 [000]74.456383: funcgraph_entry: ! 270.882 us | > cma_alloc(); > bash-10478 [000]75.458517: funcgraph_entry: ! 195.407 us | > cma_alloc(); > bash-10478 [001]75.458849: funcgraph_exit: ! 329.930 us | } > bash-10478 [001]76.460482: funcgraph_entry: ! 327.243 us | > cma_alloc(); > bash-10478 [000]77.462334: funcgraph_entry: ! 293.465 us | > cma_alloc(); > bash-10478 [004]77.462564: funcgraph_exit: ! 227.251 us | } > bash-10478 [004]78.464315: funcgraph_entry: ! 306.905 us | > cma_alloc(); > bash-10478 [001]79.466018: funcgraph_entry: ! 303.196 us | > cma_alloc(); > bash-10478 [004]80.468005: funcgraph_entry: | > cma_alloc() { > bash-10478 [002]80.468291: funcgraph_exit: ! 284.539 us | } > bash-10478 [002]81.469718: funcgraph_entry: ! 323.215 us | > cma_alloc(); > > This is because the first funcgraph_entry on cpu 7 matched to the last > funcgraph_exit on cpu 7. And second funcgraph_exit on cpu 0 was shown > alone. We need to match record from all cpu rather than the same cpu. > In this case, entry on cpu 7 should be paired with exit on cpu 0. > > With this patch, the output look like below: > > bash-10478 [007]73.454273: funcgraph_entry: ! 375.369 us | > cma_alloc(); > bash-10478 [000]74.456383: funcgraph_entry: ! 270.882 us | > cma_alloc(); > bash-10478 [000]75.458517: funcgraph_entry: ! 329.930 us | > cma_alloc(); > bash-10478 [001]76.460482: funcgraph_entry: ! 195.407 us | > cma_alloc(); > bash-10478 [000]77.462334: funcgraph_entry: ! 227.251 us | > cma_alloc(); > bash-10478 [004]78.464315: funcgraph_entry: ! 327.243 us | > cma_alloc(); > bash-10478 [001]79.466018: funcgraph_entry: ! 306.905 us | > cma_alloc(); > bash-10478 [004]80.468005: funcgraph_entry: ! 284.539 us | > cma_alloc(); > bash-10478 [002]81.469718: funcgraph_entry: ! 367.216 us | > cma_alloc(); > > Maybe we can separate enter and exit if they happened on different > cpu. Anyway the
Re: [PATCH 2/2] trace-cmd: Use tracecmd_peek_next_data() in fgraph_ent_handler
On Fri, 8 Jul 2016 14:56:12 +0900 Namhyung Kim wrote: > When a task was migrated to other cpu in the middle of a function, the > fgraph_exit record will be in a different cpu than the enter record. > But currently fgraph_ent_handler() only peeks at the same cpu so it > could read an incorrect record. > > For example, please see following raw records: > > bash-10478 [007]73.454273: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [000]73.454650: funcgraph_exit:func=0x8123bf90 > calltime=0x111a37483c rettime=0x111a3d0285 overrun=0x0 depth=0 > bash-10478 [000]74.456383: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [000]74.456655: funcgraph_exit:func=0x8123bf90 > calltime=0x1155f24337 rettime=0x1155f66559 overrun=0x0 depth=0 > bash-10478 [000]75.458517: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [001]75.458849: funcgraph_exit:func=0x8123bf90 > calltime=0x1191ad9de0 rettime=0x1191b2a6aa overrun=0x0 depth=0 > bash-10478 [001]76.460482: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [000]76.460679: funcgraph_exit:func=0x8123bf90 > calltime=0x11cd6662b4 rettime=0x11cd695e03 overrun=0x0 depth=0 > bash-10478 [000]77.462334: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [004]77.462564: funcgraph_exit:func=0x8123bf90 > calltime=0x12091d71c4 rettime=0x120920e977 overrun=0x0 depth=0 > bash-10478 [004]78.464315: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [001]78.464644: funcgraph_exit:func=0x8123bf90 > calltime=0x1244d674de rettime=0x1244db7329 overrun=0x0 depth=0 > bash-10478 [001]79.466018: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [004]79.466326: funcgraph_exit:func=0x8123bf90 > calltime=0x12808b3940 rettime=0x12808fe819 overrun=0x0 depth=0 > bash-10478 [004]80.468005: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [002]80.468291: funcgraph_exit:func=0x8123bf90 > calltime=0x12bc44551f rettime=0x12bc48ac9a overrun=0x0 depth=0 > bash-10478 [002]81.469718: funcgraph_entry: func=0x8123bf90 > depth=0 > bash-10478 [007]81.470088: funcgraph_exit:func=0x8123bf90 > calltime=0x12f7f945b8 rettime=0x12f7fee028 overrun=0x0 depth=0 > > The first entry was call to cma_alloc function, it was on cpu 7 but the > task was migrated to cpu 0 before returning from the function. > Currently trace-cmd shows like below: > > bash-10478 [007]73.454273: funcgraph_entry: ! 367.216 us | > cma_alloc(); > bash-10478 [000]73.454650: funcgraph_exit: ! 375.369 us | } > bash-10478 [000]74.456383: funcgraph_entry: ! 270.882 us | > cma_alloc(); > bash-10478 [000]75.458517: funcgraph_entry: ! 195.407 us | > cma_alloc(); > bash-10478 [001]75.458849: funcgraph_exit: ! 329.930 us | } > bash-10478 [001]76.460482: funcgraph_entry: ! 327.243 us | > cma_alloc(); > bash-10478 [000]77.462334: funcgraph_entry: ! 293.465 us | > cma_alloc(); > bash-10478 [004]77.462564: funcgraph_exit: ! 227.251 us | } > bash-10478 [004]78.464315: funcgraph_entry: ! 306.905 us | > cma_alloc(); > bash-10478 [001]79.466018: funcgraph_entry: ! 303.196 us | > cma_alloc(); > bash-10478 [004]80.468005: funcgraph_entry: | > cma_alloc() { > bash-10478 [002]80.468291: funcgraph_exit: ! 284.539 us | } > bash-10478 [002]81.469718: funcgraph_entry: ! 323.215 us | > cma_alloc(); > > This is because the first funcgraph_entry on cpu 7 matched to the last > funcgraph_exit on cpu 7. And second funcgraph_exit on cpu 0 was shown > alone. We need to match record from all cpu rather than the same cpu. > In this case, entry on cpu 7 should be paired with exit on cpu 0. > > With this patch, the output look like below: > > bash-10478 [007]73.454273: funcgraph_entry: ! 375.369 us | > cma_alloc(); > bash-10478 [000]74.456383: funcgraph_entry: ! 270.882 us | > cma_alloc(); > bash-10478 [000]75.458517: funcgraph_entry: ! 329.930 us | > cma_alloc(); > bash-10478 [001]76.460482: funcgraph_entry: ! 195.407 us | > cma_alloc(); > bash-10478 [000]77.462334: funcgraph_entry: ! 227.251 us | > cma_alloc(); > bash-10478 [004]78.464315: funcgraph_entry: ! 327.243 us | > cma_alloc(); > bash-10478 [001]79.466018: funcgraph_entry: ! 306.905 us | > cma_alloc(); > bash-10478 [004]80.468005: funcgraph_entry: ! 284.539 us | > cma_alloc(); > bash-10478 [002]81.469718: funcgraph_entry: ! 367.216 us | > cma_alloc(); > > Maybe we can separate enter and exit if they happened on different > cpu. Anyway the time duration has
[PATCH 3/3] time: drop local_irq_save/restore from irqtime_account_irq
From: Rik van RielPaolo pointed out that irqs are already blocked when irqtime_account_irq is called. That means there is no reason to call local_irq_save/restore again. Signed-off-by: Rik van Riel Suggested-by: Paolo Bonzini --- kernel/sched/cputime.c | 4 1 file changed, 4 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index ca7e33cb0967..7b6fa4d7ad4c 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -49,15 +49,12 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq); */ void irqtime_account_irq(struct task_struct *curr) { - unsigned long flags; s64 delta; int cpu; if (!sched_clock_irqtime) return; - local_irq_save(flags); - cpu = smp_processor_id(); delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); __this_cpu_add(irq_start_time, delta); @@ -75,7 +72,6 @@ void irqtime_account_irq(struct task_struct *curr) __this_cpu_add(cpu_softirq_time, delta); irq_time_write_end(); - local_irq_restore(flags); } EXPORT_SYMBOL_GPL(irqtime_account_irq); -- 2.7.4
[PATCH 3/3] time: drop local_irq_save/restore from irqtime_account_irq
From: Rik van Riel Paolo pointed out that irqs are already blocked when irqtime_account_irq is called. That means there is no reason to call local_irq_save/restore again. Signed-off-by: Rik van Riel Suggested-by: Paolo Bonzini --- kernel/sched/cputime.c | 4 1 file changed, 4 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index ca7e33cb0967..7b6fa4d7ad4c 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -49,15 +49,12 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq); */ void irqtime_account_irq(struct task_struct *curr) { - unsigned long flags; s64 delta; int cpu; if (!sched_clock_irqtime) return; - local_irq_save(flags); - cpu = smp_processor_id(); delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); __this_cpu_add(irq_start_time, delta); @@ -75,7 +72,6 @@ void irqtime_account_irq(struct task_struct *curr) __this_cpu_add(cpu_softirq_time, delta); irq_time_write_end(); - local_irq_restore(flags); } EXPORT_SYMBOL_GPL(irqtime_account_irq); -- 2.7.4
[PATCH 2/3] nohz,cputime: replace VTIME_GEN irq time code with IRQ_TIME_ACCOUNTING code
From: Rik van RielThe CONFIG_VIRT_CPU_ACCOUNTING_GEN irq time tracking code does not appear to currently work right. On CPUs without nohz_full=, only tick based irq time sampling is done, which breaks down when dealing with a nohz_idle CPU. On firewalls and similar systems, no ticks may happen on a CPU for a while, and the irq time spent may never get accounted properly. This can cause issues with capacity planning and power saving, which use the CPU statistics as inputs in decision making. Replace the VTIME_GEN vtime irq time code, and replace it with the IRQ_TIME_ACCOUNTING code, when selected as a config option by the user. Signed-off-by: Rik van Riel --- include/linux/vtime.h | 32 ++-- init/Kconfig | 6 +++--- kernel/sched/cputime.c | 16 +++- 3 files changed, 20 insertions(+), 34 deletions(-) diff --git a/include/linux/vtime.h b/include/linux/vtime.h index fa2196990f84..d1977d84ebdf 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -14,6 +14,18 @@ struct task_struct; */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline bool vtime_accounting_cpu_enabled(void) { return true; } + +#ifdef __ARCH_HAS_VTIME_ACCOUNT +extern void vtime_account_irq_enter(struct task_struct *tsk); +#else +extern void vtime_common_account_irq_enter(struct task_struct *tsk); +static inline void vtime_account_irq_enter(struct task_struct *tsk) +{ + if (vtime_accounting_cpu_enabled()) + vtime_common_account_irq_enter(tsk); +} +#endif /* __ARCH_HAS_VTIME_ACCOUNT */ + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN @@ -64,17 +76,6 @@ extern void vtime_account_system(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); -#ifdef __ARCH_HAS_VTIME_ACCOUNT -extern void vtime_account_irq_enter(struct task_struct *tsk); -#else -extern void vtime_common_account_irq_enter(struct task_struct *tsk); -static inline void vtime_account_irq_enter(struct task_struct *tsk) -{ - if (vtime_accounting_cpu_enabled()) - vtime_common_account_irq_enter(tsk); -} -#endif /* __ARCH_HAS_VTIME_ACCOUNT */ - #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ static inline void vtime_task_switch(struct task_struct *prev) { } @@ -85,13 +86,8 @@ static inline void vtime_account_irq_enter(struct task_struct *tsk) { } #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void arch_vtime_task_switch(struct task_struct *tsk); -extern void vtime_gen_account_irq_exit(struct task_struct *tsk); - -static inline void vtime_account_irq_exit(struct task_struct *tsk) -{ - if (vtime_accounting_cpu_enabled()) - vtime_gen_account_irq_exit(tsk); -} +static inline void vtime_account_irq_enter(struct task_struct *tsk) { } +static inline void vtime_account_irq_exit(struct task_struct *tsk) { } extern void vtime_user_enter(struct task_struct *tsk); diff --git a/init/Kconfig b/init/Kconfig index 0dfd09d54c65..4c7ee4f136cf 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -375,9 +375,11 @@ config VIRT_CPU_ACCOUNTING_GEN If unsure, say N. +endchoice + config IRQ_TIME_ACCOUNTING bool "Fine granularity task level IRQ time accounting" - depends on HAVE_IRQ_TIME_ACCOUNTING && !NO_HZ_FULL + depends on HAVE_IRQ_TIME_ACCOUNTING && !VIRT_CPU_ACCOUNTING_NATIVE help Select this option to enable fine granularity task irq time accounting. This is done by reading a timestamp on each @@ -386,8 +388,6 @@ config IRQ_TIME_ACCOUNTING If in doubt, say N here. -endchoice - config BSD_PROCESS_ACCT bool "BSD Process Accounting" depends on MULTIUSER diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index db82ae12cf01..ca7e33cb0967 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -711,14 +711,14 @@ static cputime_t vtime_delta(struct task_struct *tsk) static cputime_t get_vtime_delta(struct task_struct *tsk) { unsigned long now = READ_ONCE(jiffies); - cputime_t delta, steal; + cputime_t delta, other; delta = jiffies_to_cputime(now - tsk->vtime_snap); - steal = steal_account_process_time(delta); + other = account_other_time(delta); WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); tsk->vtime_snap = now; - return delta - steal; + return delta - other; } static void __vtime_account_system(struct task_struct *tsk) @@ -738,16 +738,6 @@ void vtime_account_system(struct task_struct *tsk) write_seqcount_end(>vtime_seqcount); } -void vtime_gen_account_irq_exit(struct task_struct *tsk) -{ - write_seqcount_begin(>vtime_seqcount); - if (vtime_delta(tsk)) - __vtime_account_system(tsk); - if (context_tracking_in_user()) - tsk->vtime_snap_whence = VTIME_USER; -
[PATCH 2/3] nohz,cputime: replace VTIME_GEN irq time code with IRQ_TIME_ACCOUNTING code
From: Rik van Riel The CONFIG_VIRT_CPU_ACCOUNTING_GEN irq time tracking code does not appear to currently work right. On CPUs without nohz_full=, only tick based irq time sampling is done, which breaks down when dealing with a nohz_idle CPU. On firewalls and similar systems, no ticks may happen on a CPU for a while, and the irq time spent may never get accounted properly. This can cause issues with capacity planning and power saving, which use the CPU statistics as inputs in decision making. Replace the VTIME_GEN vtime irq time code, and replace it with the IRQ_TIME_ACCOUNTING code, when selected as a config option by the user. Signed-off-by: Rik van Riel --- include/linux/vtime.h | 32 ++-- init/Kconfig | 6 +++--- kernel/sched/cputime.c | 16 +++- 3 files changed, 20 insertions(+), 34 deletions(-) diff --git a/include/linux/vtime.h b/include/linux/vtime.h index fa2196990f84..d1977d84ebdf 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -14,6 +14,18 @@ struct task_struct; */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline bool vtime_accounting_cpu_enabled(void) { return true; } + +#ifdef __ARCH_HAS_VTIME_ACCOUNT +extern void vtime_account_irq_enter(struct task_struct *tsk); +#else +extern void vtime_common_account_irq_enter(struct task_struct *tsk); +static inline void vtime_account_irq_enter(struct task_struct *tsk) +{ + if (vtime_accounting_cpu_enabled()) + vtime_common_account_irq_enter(tsk); +} +#endif /* __ARCH_HAS_VTIME_ACCOUNT */ + #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN @@ -64,17 +76,6 @@ extern void vtime_account_system(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); -#ifdef __ARCH_HAS_VTIME_ACCOUNT -extern void vtime_account_irq_enter(struct task_struct *tsk); -#else -extern void vtime_common_account_irq_enter(struct task_struct *tsk); -static inline void vtime_account_irq_enter(struct task_struct *tsk) -{ - if (vtime_accounting_cpu_enabled()) - vtime_common_account_irq_enter(tsk); -} -#endif /* __ARCH_HAS_VTIME_ACCOUNT */ - #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ static inline void vtime_task_switch(struct task_struct *prev) { } @@ -85,13 +86,8 @@ static inline void vtime_account_irq_enter(struct task_struct *tsk) { } #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void arch_vtime_task_switch(struct task_struct *tsk); -extern void vtime_gen_account_irq_exit(struct task_struct *tsk); - -static inline void vtime_account_irq_exit(struct task_struct *tsk) -{ - if (vtime_accounting_cpu_enabled()) - vtime_gen_account_irq_exit(tsk); -} +static inline void vtime_account_irq_enter(struct task_struct *tsk) { } +static inline void vtime_account_irq_exit(struct task_struct *tsk) { } extern void vtime_user_enter(struct task_struct *tsk); diff --git a/init/Kconfig b/init/Kconfig index 0dfd09d54c65..4c7ee4f136cf 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -375,9 +375,11 @@ config VIRT_CPU_ACCOUNTING_GEN If unsure, say N. +endchoice + config IRQ_TIME_ACCOUNTING bool "Fine granularity task level IRQ time accounting" - depends on HAVE_IRQ_TIME_ACCOUNTING && !NO_HZ_FULL + depends on HAVE_IRQ_TIME_ACCOUNTING && !VIRT_CPU_ACCOUNTING_NATIVE help Select this option to enable fine granularity task irq time accounting. This is done by reading a timestamp on each @@ -386,8 +388,6 @@ config IRQ_TIME_ACCOUNTING If in doubt, say N here. -endchoice - config BSD_PROCESS_ACCT bool "BSD Process Accounting" depends on MULTIUSER diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index db82ae12cf01..ca7e33cb0967 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -711,14 +711,14 @@ static cputime_t vtime_delta(struct task_struct *tsk) static cputime_t get_vtime_delta(struct task_struct *tsk) { unsigned long now = READ_ONCE(jiffies); - cputime_t delta, steal; + cputime_t delta, other; delta = jiffies_to_cputime(now - tsk->vtime_snap); - steal = steal_account_process_time(delta); + other = account_other_time(delta); WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); tsk->vtime_snap = now; - return delta - steal; + return delta - other; } static void __vtime_account_system(struct task_struct *tsk) @@ -738,16 +738,6 @@ void vtime_account_system(struct task_struct *tsk) write_seqcount_end(>vtime_seqcount); } -void vtime_gen_account_irq_exit(struct task_struct *tsk) -{ - write_seqcount_begin(>vtime_seqcount); - if (vtime_delta(tsk)) - __vtime_account_system(tsk); - if (context_tracking_in_user()) - tsk->vtime_snap_whence = VTIME_USER; -
[PATCH v4 0/3] sched,time: fix irq time accounting with nohz_idle
Currently irq time accounting only works in these cases: 1) purely ticke based accounting 2) nohz_full accounting, but only on housekeeping & nohz_full CPUs 3) architectures with native vtime accounting On nohz_idle CPUs, which are probably the majority nowadays, irq time accounting is currently broken. This leads to systems reporting a dramatically lower amount of irq & softirq time than is actually spent handling them, with all the time spent while the system is in the idle task being accounted as idle. This patch set seems to bring the amount of irq time reported by top (and /proc/stat) roughly in line with that measured when I do a "perf record -g -a" run to see what is using all that time. The amount of irq time used, especially softirq, is shockingly high, to the point of me thinking this patch set may be wrong, but the numbers seem to match what perf is giving me... These patches apply on top of Wanpeng Li's steal time patches. CONFIG_IRQ_TIME_ACCOUNTING is now a config option that is available as a separate choice from tick based / nohz_idle / nohz_full mode, a suggested by Frederic Weisbecker. Next up: look at the things that are using CPU time on an otherwise idle system, and see if I can make those a little faster :) v2: address Peterz's concerns, some more cleanups v3: rewrite the code along Frederic's suggestions, now cputime_t is used everywhere v4: greatly simplify the local_irq_save/restore optimisation, thanks to Paolo pointing out irqs are already blocked by the callers
[PATCH v4 0/3] sched,time: fix irq time accounting with nohz_idle
Currently irq time accounting only works in these cases: 1) purely ticke based accounting 2) nohz_full accounting, but only on housekeeping & nohz_full CPUs 3) architectures with native vtime accounting On nohz_idle CPUs, which are probably the majority nowadays, irq time accounting is currently broken. This leads to systems reporting a dramatically lower amount of irq & softirq time than is actually spent handling them, with all the time spent while the system is in the idle task being accounted as idle. This patch set seems to bring the amount of irq time reported by top (and /proc/stat) roughly in line with that measured when I do a "perf record -g -a" run to see what is using all that time. The amount of irq time used, especially softirq, is shockingly high, to the point of me thinking this patch set may be wrong, but the numbers seem to match what perf is giving me... These patches apply on top of Wanpeng Li's steal time patches. CONFIG_IRQ_TIME_ACCOUNTING is now a config option that is available as a separate choice from tick based / nohz_idle / nohz_full mode, a suggested by Frederic Weisbecker. Next up: look at the things that are using CPU time on an otherwise idle system, and see if I can make those a little faster :) v2: address Peterz's concerns, some more cleanups v3: rewrite the code along Frederic's suggestions, now cputime_t is used everywhere v4: greatly simplify the local_irq_save/restore optimisation, thanks to Paolo pointing out irqs are already blocked by the callers
Re: [PATCH 1/7] security, overlayfs: provide copy up security hook for unioned files
On Mon, Jul 11, 2016 at 11:24:26AM -0400, Stephen Smalley wrote: > On 07/08/2016 12:19 PM, Vivek Goyal wrote: > > Provide a security hook to label new file correctly when a file is copied > > up from lower layer to upper layer of a overlay/union mount. > > > > This hook can prepare a new set of creds which are suitable for new file > > creation during copy up. Caller will use new creds to create file and then > > revert back to old creds and release new creds. > > > > Signed-off-by: Vivek Goyal> > --- > > fs/overlayfs/copy_up.c| 18 ++ > > include/linux/lsm_hooks.h | 11 +++ > > include/linux/security.h | 6 ++ > > security/security.c | 8 > > 4 files changed, 43 insertions(+) > > > > diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c > > index 80aa6f1..8ebea18 100644 > > --- a/fs/overlayfs/copy_up.c > > +++ b/fs/overlayfs/copy_up.c > > @@ -246,6 +246,8 @@ static int ovl_copy_up_locked(struct dentry *workdir, > > struct dentry *upperdir, > > struct dentry *upper = NULL; > > umode_t mode = stat->mode; > > int err; > > + const struct cred *old_creds = NULL; > > + struct cred *new_creds = NULL; > > > > newdentry = ovl_lookup_temp(workdir, dentry); > > err = PTR_ERR(newdentry); > > @@ -258,10 +260,26 @@ static int ovl_copy_up_locked(struct dentry *workdir, > > struct dentry *upperdir, > > if (IS_ERR(upper)) > > goto out1; > > > > + err = security_inode_copy_up(dentry, _creds); > > + if (err < 0) { > > + if (new_creds) > > + put_cred(new_creds); > > Why do we need a put_cred() here? Being paranoid for the case of stacked modules. Say first module allocated creds but second module returned error, in that case creds will have to be freed. I can get rid of it for now and if in future two LSMs implement this hook, one can change it, if need be. Thanks Vivek
Re: [PATCH 1/7] security, overlayfs: provide copy up security hook for unioned files
On Mon, Jul 11, 2016 at 11:24:26AM -0400, Stephen Smalley wrote: > On 07/08/2016 12:19 PM, Vivek Goyal wrote: > > Provide a security hook to label new file correctly when a file is copied > > up from lower layer to upper layer of a overlay/union mount. > > > > This hook can prepare a new set of creds which are suitable for new file > > creation during copy up. Caller will use new creds to create file and then > > revert back to old creds and release new creds. > > > > Signed-off-by: Vivek Goyal > > --- > > fs/overlayfs/copy_up.c| 18 ++ > > include/linux/lsm_hooks.h | 11 +++ > > include/linux/security.h | 6 ++ > > security/security.c | 8 > > 4 files changed, 43 insertions(+) > > > > diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c > > index 80aa6f1..8ebea18 100644 > > --- a/fs/overlayfs/copy_up.c > > +++ b/fs/overlayfs/copy_up.c > > @@ -246,6 +246,8 @@ static int ovl_copy_up_locked(struct dentry *workdir, > > struct dentry *upperdir, > > struct dentry *upper = NULL; > > umode_t mode = stat->mode; > > int err; > > + const struct cred *old_creds = NULL; > > + struct cred *new_creds = NULL; > > > > newdentry = ovl_lookup_temp(workdir, dentry); > > err = PTR_ERR(newdentry); > > @@ -258,10 +260,26 @@ static int ovl_copy_up_locked(struct dentry *workdir, > > struct dentry *upperdir, > > if (IS_ERR(upper)) > > goto out1; > > > > + err = security_inode_copy_up(dentry, _creds); > > + if (err < 0) { > > + if (new_creds) > > + put_cred(new_creds); > > Why do we need a put_cred() here? Being paranoid for the case of stacked modules. Say first module allocated creds but second module returned error, in that case creds will have to be freed. I can get rid of it for now and if in future two LSMs implement this hook, one can change it, if need be. Thanks Vivek
[PATCH 1/3] sched,time: count actually elapsed irq & softirq time
From: Rik van RielCurrently, if there was any irq or softirq time during 'ticks' jiffies, the entire period will be accounted as irq or softirq time. This is inaccurate if only a subset of the time was actually spent handling irqs, and could conceivably mis-count all of the ticks during a period as irq time, when there was some irq and some softirq time. This can actually happen when irqtime_account_process_tick is called from account_idle_ticks, which can pass a larger number of ticks down all at once. Fix this by changing irqtime_account_hi_update, irqtime_account_si_update, and steal_account_process_ticks to work with cputime_t time units, and return the amount of time spent in each mode. Rename steal_account_process_ticks to steal_account_process_time, to reflect that time is now accounted in cputime_t, instead of ticks. Additionally, have irqtime_account_process_tick take into account how much time was spent in each of steal, irq, and softirq time. The latter could help improve the accuracy of cputime accounting when returning from idle on a NO_HZ_IDLE CPU. Properly accounting how much time was spent in hardirq and softirq time will also allow the NO_HZ_FULL code to re-use these same functions for hardirq and softirq accounting. Signed-off-by: Rik van Riel --- include/asm-generic/cputime_nsecs.h | 2 + kernel/sched/cputime.c | 124 ++-- 2 files changed, 79 insertions(+), 47 deletions(-) diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index 0f1c6f315cdc..918ebb01486c 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -50,6 +50,8 @@ typedef u64 __nocast cputime64_t; (__force u64)(__ct) #define nsecs_to_cputime(__nsecs) \ (__force cputime_t)(__nsecs) +#define nsecs_to_cputime64(__nsecs)\ + (__force cputime_t)(__nsecs) /* diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 3d60e5d76fdb..db82ae12cf01 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -79,40 +79,50 @@ void irqtime_account_irq(struct task_struct *curr) } EXPORT_SYMBOL_GPL(irqtime_account_irq); -static int irqtime_account_hi_update(void) +static cputime_t irqtime_account_hi_update(cputime_t maxtime) { u64 *cpustat = kcpustat_this_cpu->cpustat; unsigned long flags; - u64 latest_ns; - int ret = 0; + cputime_t irq_cputime; local_irq_save(flags); - latest_ns = this_cpu_read(cpu_hardirq_time); - if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) - ret = 1; + irq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time)) - + cpustat[CPUTIME_IRQ]; + irq_cputime = min(irq_cputime, maxtime); + cpustat[CPUTIME_IRQ] += irq_cputime; local_irq_restore(flags); - return ret; + return irq_cputime; } -static int irqtime_account_si_update(void) +static cputime_t irqtime_account_si_update(cputime_t maxtime) { u64 *cpustat = kcpustat_this_cpu->cpustat; unsigned long flags; - u64 latest_ns; - int ret = 0; + cputime_t softirq_cputime; local_irq_save(flags); - latest_ns = this_cpu_read(cpu_softirq_time); - if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) - ret = 1; + softirq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time)) - + cpustat[CPUTIME_SOFTIRQ]; + softirq_cputime = min(softirq_cputime, maxtime); + cpustat[CPUTIME_SOFTIRQ] += softirq_cputime; local_irq_restore(flags); - return ret; + return softirq_cputime; } #else /* CONFIG_IRQ_TIME_ACCOUNTING */ #define sched_clock_irqtime(0) +static cputime_t irqtime_account_hi_update(cputime_t dummy) +{ + return 0; +} + +static cputime_t irqtime_account_si_update(cputime_t dummy) +{ + return 0; +} + #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ static inline void task_group_account_field(struct task_struct *p, int index, @@ -257,32 +267,45 @@ void account_idle_time(cputime_t cputime) cpustat[CPUTIME_IDLE] += (__force u64) cputime; } -static __always_inline unsigned long steal_account_process_tick(unsigned long max_jiffies) +static __always_inline cputime_t steal_account_process_time(cputime_t maxtime) { #ifdef CONFIG_PARAVIRT if (static_key_false(_steal_enabled)) { + cputime_t steal_cputime; u64 steal; - unsigned long steal_jiffies; steal = paravirt_steal_clock(smp_processor_id()); steal -= this_rq()->prev_steal_time; - /* -* steal is in nsecs but our caller is expecting steal -* time in jiffies. Lets cast the result to jiffies -* granularity and account the rest on the next rounds. -
[PATCH 1/3] sched,time: count actually elapsed irq & softirq time
From: Rik van Riel Currently, if there was any irq or softirq time during 'ticks' jiffies, the entire period will be accounted as irq or softirq time. This is inaccurate if only a subset of the time was actually spent handling irqs, and could conceivably mis-count all of the ticks during a period as irq time, when there was some irq and some softirq time. This can actually happen when irqtime_account_process_tick is called from account_idle_ticks, which can pass a larger number of ticks down all at once. Fix this by changing irqtime_account_hi_update, irqtime_account_si_update, and steal_account_process_ticks to work with cputime_t time units, and return the amount of time spent in each mode. Rename steal_account_process_ticks to steal_account_process_time, to reflect that time is now accounted in cputime_t, instead of ticks. Additionally, have irqtime_account_process_tick take into account how much time was spent in each of steal, irq, and softirq time. The latter could help improve the accuracy of cputime accounting when returning from idle on a NO_HZ_IDLE CPU. Properly accounting how much time was spent in hardirq and softirq time will also allow the NO_HZ_FULL code to re-use these same functions for hardirq and softirq accounting. Signed-off-by: Rik van Riel --- include/asm-generic/cputime_nsecs.h | 2 + kernel/sched/cputime.c | 124 ++-- 2 files changed, 79 insertions(+), 47 deletions(-) diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index 0f1c6f315cdc..918ebb01486c 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -50,6 +50,8 @@ typedef u64 __nocast cputime64_t; (__force u64)(__ct) #define nsecs_to_cputime(__nsecs) \ (__force cputime_t)(__nsecs) +#define nsecs_to_cputime64(__nsecs)\ + (__force cputime_t)(__nsecs) /* diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 3d60e5d76fdb..db82ae12cf01 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -79,40 +79,50 @@ void irqtime_account_irq(struct task_struct *curr) } EXPORT_SYMBOL_GPL(irqtime_account_irq); -static int irqtime_account_hi_update(void) +static cputime_t irqtime_account_hi_update(cputime_t maxtime) { u64 *cpustat = kcpustat_this_cpu->cpustat; unsigned long flags; - u64 latest_ns; - int ret = 0; + cputime_t irq_cputime; local_irq_save(flags); - latest_ns = this_cpu_read(cpu_hardirq_time); - if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) - ret = 1; + irq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time)) - + cpustat[CPUTIME_IRQ]; + irq_cputime = min(irq_cputime, maxtime); + cpustat[CPUTIME_IRQ] += irq_cputime; local_irq_restore(flags); - return ret; + return irq_cputime; } -static int irqtime_account_si_update(void) +static cputime_t irqtime_account_si_update(cputime_t maxtime) { u64 *cpustat = kcpustat_this_cpu->cpustat; unsigned long flags; - u64 latest_ns; - int ret = 0; + cputime_t softirq_cputime; local_irq_save(flags); - latest_ns = this_cpu_read(cpu_softirq_time); - if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) - ret = 1; + softirq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time)) - + cpustat[CPUTIME_SOFTIRQ]; + softirq_cputime = min(softirq_cputime, maxtime); + cpustat[CPUTIME_SOFTIRQ] += softirq_cputime; local_irq_restore(flags); - return ret; + return softirq_cputime; } #else /* CONFIG_IRQ_TIME_ACCOUNTING */ #define sched_clock_irqtime(0) +static cputime_t irqtime_account_hi_update(cputime_t dummy) +{ + return 0; +} + +static cputime_t irqtime_account_si_update(cputime_t dummy) +{ + return 0; +} + #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ static inline void task_group_account_field(struct task_struct *p, int index, @@ -257,32 +267,45 @@ void account_idle_time(cputime_t cputime) cpustat[CPUTIME_IDLE] += (__force u64) cputime; } -static __always_inline unsigned long steal_account_process_tick(unsigned long max_jiffies) +static __always_inline cputime_t steal_account_process_time(cputime_t maxtime) { #ifdef CONFIG_PARAVIRT if (static_key_false(_steal_enabled)) { + cputime_t steal_cputime; u64 steal; - unsigned long steal_jiffies; steal = paravirt_steal_clock(smp_processor_id()); steal -= this_rq()->prev_steal_time; - /* -* steal is in nsecs but our caller is expecting steal -* time in jiffies. Lets cast the result to jiffies -* granularity and account the rest on the next rounds. -*/ -
Re: [PATCH v3 12/14] regulator: pwm: Retrieve correct voltage
Hi, On Mon, Jul 11, 2016 at 12:02 AM, Thierry Redingwrote: > On Sat, Jul 09, 2016 at 11:47:18AM +0200, Mark Brown wrote: >> On Fri, Jul 08, 2016 at 05:43:02PM +0200, Thierry Reding wrote: >> >> > Mark, do you want me to provide a stable branch with the PWM regulator >> > patches and resolve that conflict in your tree? Or would you rather take >> > the whole set based on a stable branch from the PWM tree? Or maybe yet >> > another possibility would be to base the PWM tree on a stable branch >> > from the regulator tree containing the above commit. >> >> Probably easiest to use this signed tag and resolve it in your tree: >> >> The following changes since commit 1a695a905c18548062509178b98bc91e67510864: >> >> Linux 4.7-rc1 (2016-05-29 09:29:24 -0700) >> >> are available in the git repository at: >> >> git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git >> tags/pwm-modernization >> >> for you to fetch changes up to c2588393e6315ab68207323d37d2a73713d6bc81: >> >> regulator: pwm: Fix regulator ramp delay for continuous mode (2016-07-07 >> 11:45:06 +0200) >> >> >> regulator: Provide a branch for moderninzation of the PWM code >> >> There's a new, improved PWM API which allows a lot of improvements in >> the PWM regulator driver. Since the bulk of the changes are in the PWM >> API this is being managed in the PWM tree, merge pending regulator API >> changes to allow this to be resolved more easily. >> >> >> Alexandre Courbot (1): >> regulator: pwm: Support for enable GPIO >> >> Boris Brezillon (1): >> regulator: pwm: Drop unneeded pwm_enable() call >> >> Douglas Anderson (1): >> regulator: pwm: Fix regulator ramp delay for continuous mode >> >> .../bindings/regulator/pwm-regulator.txt | 7 +++- >> drivers/regulator/pwm-regulator.c | 40 >> ++ >> 2 files changed, 39 insertions(+), 8 deletions(-) > > Merged into for-4.8/regulator of the PWM tree and rebased Boris' > pwm-regulator patches on top. > > Boris, everything looks right to me, but can you take a quick look to > see if it all matches up with what you expect? As I mentioned in the other thread about the linuxnext conflict, pwm_regulator_set_voltage() is wrong. You have: ramp_delay = DIV_ROUND_UP(abs(min_uV - old_uV), ramp_delay); You should have: ramp_delay = DIV_ROUND_UP(abs(req_min_uV - old_uV), ramp_delay); -Doug
Re: [PATCH v3 12/14] regulator: pwm: Retrieve correct voltage
Hi, On Mon, Jul 11, 2016 at 12:02 AM, Thierry Reding wrote: > On Sat, Jul 09, 2016 at 11:47:18AM +0200, Mark Brown wrote: >> On Fri, Jul 08, 2016 at 05:43:02PM +0200, Thierry Reding wrote: >> >> > Mark, do you want me to provide a stable branch with the PWM regulator >> > patches and resolve that conflict in your tree? Or would you rather take >> > the whole set based on a stable branch from the PWM tree? Or maybe yet >> > another possibility would be to base the PWM tree on a stable branch >> > from the regulator tree containing the above commit. >> >> Probably easiest to use this signed tag and resolve it in your tree: >> >> The following changes since commit 1a695a905c18548062509178b98bc91e67510864: >> >> Linux 4.7-rc1 (2016-05-29 09:29:24 -0700) >> >> are available in the git repository at: >> >> git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git >> tags/pwm-modernization >> >> for you to fetch changes up to c2588393e6315ab68207323d37d2a73713d6bc81: >> >> regulator: pwm: Fix regulator ramp delay for continuous mode (2016-07-07 >> 11:45:06 +0200) >> >> >> regulator: Provide a branch for moderninzation of the PWM code >> >> There's a new, improved PWM API which allows a lot of improvements in >> the PWM regulator driver. Since the bulk of the changes are in the PWM >> API this is being managed in the PWM tree, merge pending regulator API >> changes to allow this to be resolved more easily. >> >> >> Alexandre Courbot (1): >> regulator: pwm: Support for enable GPIO >> >> Boris Brezillon (1): >> regulator: pwm: Drop unneeded pwm_enable() call >> >> Douglas Anderson (1): >> regulator: pwm: Fix regulator ramp delay for continuous mode >> >> .../bindings/regulator/pwm-regulator.txt | 7 +++- >> drivers/regulator/pwm-regulator.c | 40 >> ++ >> 2 files changed, 39 insertions(+), 8 deletions(-) > > Merged into for-4.8/regulator of the PWM tree and rebased Boris' > pwm-regulator patches on top. > > Boris, everything looks right to me, but can you take a quick look to > see if it all matches up with what you expect? As I mentioned in the other thread about the linuxnext conflict, pwm_regulator_set_voltage() is wrong. You have: ramp_delay = DIV_ROUND_UP(abs(min_uV - old_uV), ramp_delay); You should have: ramp_delay = DIV_ROUND_UP(abs(req_min_uV - old_uV), ramp_delay); -Doug
[PATCH -next] bpf: make inode code explicitly non-modular
The Kconfig currently controlling compilation of this code is: init/Kconfig:config BPF_SYSCALL init/Kconfig: bool "Enable bpf() system call" ...meaning that it currently is not being built as a module by anyone. Lets remove the couple traces of modular infrastructure use, so that when reading the driver there is no doubt it is builtin-only. Note that MODULE_ALIAS is a no-op for non-modular code. We replace module.h with init.h since the file does use __init. Cc: Alexei StarovoitovCc: net...@vger.kernel.org Signed-off-by: Paul Gortmaker --- kernel/bpf/inode.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 318858edb1cd..5967b870a895 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -11,7 +11,7 @@ * version 2 as published by the Free Software Foundation. */ -#include +#include #include #include #include @@ -367,8 +367,6 @@ static struct file_system_type bpf_fs_type = { .kill_sb= kill_litter_super, }; -MODULE_ALIAS_FS("bpf"); - static int __init bpf_init(void) { int ret; -- 2.8.4
[PATCH -next] bpf: make inode code explicitly non-modular
The Kconfig currently controlling compilation of this code is: init/Kconfig:config BPF_SYSCALL init/Kconfig: bool "Enable bpf() system call" ...meaning that it currently is not being built as a module by anyone. Lets remove the couple traces of modular infrastructure use, so that when reading the driver there is no doubt it is builtin-only. Note that MODULE_ALIAS is a no-op for non-modular code. We replace module.h with init.h since the file does use __init. Cc: Alexei Starovoitov Cc: net...@vger.kernel.org Signed-off-by: Paul Gortmaker --- kernel/bpf/inode.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 318858edb1cd..5967b870a895 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -11,7 +11,7 @@ * version 2 as published by the Free Software Foundation. */ -#include +#include #include #include #include @@ -367,8 +367,6 @@ static struct file_system_type bpf_fs_type = { .kill_sb= kill_litter_super, }; -MODULE_ALIAS_FS("bpf"); - static int __init bpf_init(void) { int ret; -- 2.8.4
Re: [PATCH 7/7] ARM: dts: NSP: Add bgmac entries
On 07/08/2016 08:56 AM, Jon Mason wrote: > Add device tree entries for the ethernet devices present on the > Broadcom Northstar Plus SoCs > > Signed-off-by: Jon MasonApplied to devicetree/next, with s/bgmac/AMAC/ in the subject, thanks! -- Florian
Re: [PATCH 7/7] ARM: dts: NSP: Add bgmac entries
On 07/08/2016 08:56 AM, Jon Mason wrote: > Add device tree entries for the ethernet devices present on the > Broadcom Northstar Plus SoCs > > Signed-off-by: Jon Mason Applied to devicetree/next, with s/bgmac/AMAC/ in the subject, thanks! -- Florian
Re: [PATCH v2 6/6] dt-bindings: net: bgmac: add bindings documentation for bgmac
On 07/07/2016 04:08 PM, Jon Mason wrote: > Signed-off-by: Jon MasonApplied to devicetree/next, thanks -- Florian
Re: [PATCH v2 6/6] dt-bindings: net: bgmac: add bindings documentation for bgmac
On 07/07/2016 04:08 PM, Jon Mason wrote: > Signed-off-by: Jon Mason Applied to devicetree/next, thanks -- Florian
Re: linux-next: manual merge of the pwm tree with the regulator tree
Hi, On Sun, Jul 10, 2016 at 11:56 PM, Stephen Rothwellwrote: > Hi Thierry, > > Today's linux-next merge of the pwm tree got a conflict in: > > drivers/regulator/pwm-regulator.c > > between commit: > > 830583004e61 ("regulator: pwm: Drop unneeded pwm_enable() call") > 27bfa8893b15 ("regulator: pwm: Support for enable GPIO") > c2588393e631 ("regulator: pwm: Fix regulator ramp delay for continuous > mode") > > from the regulator tree and commit: > > b0303deaa480 ("regulator: pwm: Adjust PWM config at probe time") > 8bd57ca236d0 ("regulator: pwm: Switch to the atomic PWM API") > 25d16595935b ("regulator: pwm: Retrieve correct voltage") > 53f239af4c14 ("regulator: pwm: Support extra continuous mode cases") > > from the pwm tree. > > I fixed it up (I think, please check - see below) and can carry the fix > as necessary. This is now fixed as far as linux-next is concerned, but > any non trivial conflicts should be mentioned to your upstream maintainer > when your tree is submitted for merging. You may also want to consider > cooperating with the maintainer of the conflicting tree to minimise any > particularly complex conflicts. > > -- > Cheers, > Stephen Rothwell [ cut ] > - /* Delay required by PWM regulator to settle to the new voltage */ > - usleep_range(ramp_delay, ramp_delay + 1000); > + /* Ramp delay is in uV/uS. Adjust to uS and delay */ > + ramp_delay = DIV_ROUND_UP(abs(min_uV - old_uV), ramp_delay); This was what I was worried about and why I originally sent my patch based upon Boris's series. The above should be: ramp_delay = DIV_ROUND_UP(abs(req_min_uV - old_uV), ramp_delay); Specifically note the use of "req_min_uV" and not "min_uV". -Doug
Re: linux-next: manual merge of the pwm tree with the regulator tree
Hi, On Sun, Jul 10, 2016 at 11:56 PM, Stephen Rothwell wrote: > Hi Thierry, > > Today's linux-next merge of the pwm tree got a conflict in: > > drivers/regulator/pwm-regulator.c > > between commit: > > 830583004e61 ("regulator: pwm: Drop unneeded pwm_enable() call") > 27bfa8893b15 ("regulator: pwm: Support for enable GPIO") > c2588393e631 ("regulator: pwm: Fix regulator ramp delay for continuous > mode") > > from the regulator tree and commit: > > b0303deaa480 ("regulator: pwm: Adjust PWM config at probe time") > 8bd57ca236d0 ("regulator: pwm: Switch to the atomic PWM API") > 25d16595935b ("regulator: pwm: Retrieve correct voltage") > 53f239af4c14 ("regulator: pwm: Support extra continuous mode cases") > > from the pwm tree. > > I fixed it up (I think, please check - see below) and can carry the fix > as necessary. This is now fixed as far as linux-next is concerned, but > any non trivial conflicts should be mentioned to your upstream maintainer > when your tree is submitted for merging. You may also want to consider > cooperating with the maintainer of the conflicting tree to minimise any > particularly complex conflicts. > > -- > Cheers, > Stephen Rothwell [ cut ] > - /* Delay required by PWM regulator to settle to the new voltage */ > - usleep_range(ramp_delay, ramp_delay + 1000); > + /* Ramp delay is in uV/uS. Adjust to uS and delay */ > + ramp_delay = DIV_ROUND_UP(abs(min_uV - old_uV), ramp_delay); This was what I was worried about and why I originally sent my patch based upon Boris's series. The above should be: ramp_delay = DIV_ROUND_UP(abs(req_min_uV - old_uV), ramp_delay); Specifically note the use of "req_min_uV" and not "min_uV". -Doug
Re: [PATCH v7 3/4] perf: xgene: Add APM X-Gene SoC Performance Monitoring Unit driver
Hi Mark, On Mon, Jul 11, 2016 at 4:39 AM, Mark Rutlandwrote: > Please add some commit messge text, e.g. > > This patch adds a driver for the SoC-wide (AKA uncore) PMU hardware > found in APM X-Gene SoCs. > > On Wed, Jul 06, 2016 at 05:07:24PM -0700, Tai Nguyen wrote: >> Signed-off-by: Tai Nguyen > > Modulo that, and Paul's comments: > > Reviewed-by: Mark Rutland Thanks a lot. I'll fix it. Regards, Tai [...] > > Thanks, > Mark. > >> --- >> Documentation/perf/xgene-pmu.txt | 48 ++ >> drivers/perf/Kconfig |7 + >> drivers/perf/Makefile|1 + >> drivers/perf/xgene_pmu.c | 1398 >> ++ >> 4 files changed, 1454 insertions(+) >> create mode 100644 Documentation/perf/xgene-pmu.txt >> create mode 100644 drivers/perf/xgene_pmu.c >> >> diff --git a/Documentation/perf/xgene-pmu.txt >> b/Documentation/perf/xgene-pmu.txt >> new file mode 100644 >> index 000..d7cff44 >> --- /dev/null >> +++ b/Documentation/perf/xgene-pmu.txt >> @@ -0,0 +1,48 @@ >> +APM X-Gene SoC Performance Monitoring Unit (PMU) >> + >> + >> +X-Gene SoC PMU consists of various independent system device PMUs such as >> +L3 cache(s), I/O bridge(s), memory controller bridge(s) and memory >> +controller(s). These PMU devices are loosely architected to follow the >> +same model as the PMU for ARM cores. The PMUs share the same top level >> +interrupt and status CSR region. >> + >> +PMU (perf) driver >> +- >> + >> +The xgene-pmu driver registers several perf PMU drivers. Each of the perf >> +driver provides description of its available events and configuration >> options >> +in sysfs, see /sys/devices//. >> + >> +The "format" directory describes format of the config (event ID), >> +config1 (agent ID) fields of the perf_event_attr structure. The "events" >> +directory provides configuration templates for all supported event types >> that >> +can be used with perf tool. For example, "l3c0/bank-fifo-full/" is an >> +equivalent of "l3c0/config=0x0b/". >> + >> +Most of the SoC PMU has a specific list of agent ID used for monitoring >> +performance of a specific datapath. For example, agents of a L3 cache can be >> +a specific CPU or an I/O bridge. Each PMU has a set of 2 registers capable >> of >> +masking the agents from which the request come from. If the bit with >> +the bit number corresponding to the agent is set, the event is counted only >> if >> +it is caused by a request from that agent. Each agent ID bit is inversely >> mapped >> +to a corresponding bit in "config1" field. By default, the event will be >> +counted for all agent requests (config1 = 0x0). For all the supported >> agents of >> +each PMU, please refer to APM X-Gene User Manual. >> + >> +Each perf driver also provides a "cpumask" sysfs attribute, which contains a >> +single CPU ID of the processor which will be used to handle all the PMU >> events. >> + >> +Example for perf tool use: >> + >> + / # perf list | grep -e l3c -e iob -e mcb -e mc >> + l3c0/ackq-full/[Kernel PMU event] >> + <...> >> + mcb1/mcb-csw-stall/[Kernel PMU event] >> + >> + / # perf stat -a -e l3c0/read-miss/,mcb1/csw-write-request/ sleep 1 >> + >> + / # perf stat -a -e l3c0/read-miss,config1=0xfffe/ sleep 1 >> + >> +The driver does not support sampling, therefore "perf record" will >> +not work. Per-task (without "-a") perf sessions are not supported. >> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig >> index 04e2653..4d5c5f9 100644 >> --- a/drivers/perf/Kconfig >> +++ b/drivers/perf/Kconfig >> @@ -12,4 +12,11 @@ config ARM_PMU >> Say y if you want to use CPU performance monitors on ARM-based >> systems. >> >> +config XGENE_PMU >> +depends on PERF_EVENTS && ARCH_XGENE >> +bool "APM X-Gene SoC PMU" >> +default n >> +help >> + Say y if you want to use APM X-Gene SoC performance monitors. >> + >> endmenu >> diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile >> index acd2397..b116e98 100644 >> --- a/drivers/perf/Makefile >> +++ b/drivers/perf/Makefile >> @@ -1 +1,2 @@ >> obj-$(CONFIG_ARM_PMU) += arm_pmu.o >> +obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o >> diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c >> new file mode 100644 >> index 000..907a6cc >> --- /dev/null >> +++ b/drivers/perf/xgene_pmu.c >> @@ -0,0 +1,1398 @@ >> +/* >> + * APM X-Gene SoC PMU (Performance Monitor Unit) >> + * >> + * Copyright (c) 2016, Applied Micro Circuits Corporation >> + * Author: Hoan Tran >> + * Tai Nguyen >> + * >> + * This program is free software; you can redistribute it and/or modify it >> + * under the terms of the GNU General Public License as published by the >> + * Free Software Foundation;
Re: [PATCH v7 3/4] perf: xgene: Add APM X-Gene SoC Performance Monitoring Unit driver
Hi Mark, On Mon, Jul 11, 2016 at 4:39 AM, Mark Rutland wrote: > Please add some commit messge text, e.g. > > This patch adds a driver for the SoC-wide (AKA uncore) PMU hardware > found in APM X-Gene SoCs. > > On Wed, Jul 06, 2016 at 05:07:24PM -0700, Tai Nguyen wrote: >> Signed-off-by: Tai Nguyen > > Modulo that, and Paul's comments: > > Reviewed-by: Mark Rutland Thanks a lot. I'll fix it. Regards, Tai [...] > > Thanks, > Mark. > >> --- >> Documentation/perf/xgene-pmu.txt | 48 ++ >> drivers/perf/Kconfig |7 + >> drivers/perf/Makefile|1 + >> drivers/perf/xgene_pmu.c | 1398 >> ++ >> 4 files changed, 1454 insertions(+) >> create mode 100644 Documentation/perf/xgene-pmu.txt >> create mode 100644 drivers/perf/xgene_pmu.c >> >> diff --git a/Documentation/perf/xgene-pmu.txt >> b/Documentation/perf/xgene-pmu.txt >> new file mode 100644 >> index 000..d7cff44 >> --- /dev/null >> +++ b/Documentation/perf/xgene-pmu.txt >> @@ -0,0 +1,48 @@ >> +APM X-Gene SoC Performance Monitoring Unit (PMU) >> + >> + >> +X-Gene SoC PMU consists of various independent system device PMUs such as >> +L3 cache(s), I/O bridge(s), memory controller bridge(s) and memory >> +controller(s). These PMU devices are loosely architected to follow the >> +same model as the PMU for ARM cores. The PMUs share the same top level >> +interrupt and status CSR region. >> + >> +PMU (perf) driver >> +- >> + >> +The xgene-pmu driver registers several perf PMU drivers. Each of the perf >> +driver provides description of its available events and configuration >> options >> +in sysfs, see /sys/devices//. >> + >> +The "format" directory describes format of the config (event ID), >> +config1 (agent ID) fields of the perf_event_attr structure. The "events" >> +directory provides configuration templates for all supported event types >> that >> +can be used with perf tool. For example, "l3c0/bank-fifo-full/" is an >> +equivalent of "l3c0/config=0x0b/". >> + >> +Most of the SoC PMU has a specific list of agent ID used for monitoring >> +performance of a specific datapath. For example, agents of a L3 cache can be >> +a specific CPU or an I/O bridge. Each PMU has a set of 2 registers capable >> of >> +masking the agents from which the request come from. If the bit with >> +the bit number corresponding to the agent is set, the event is counted only >> if >> +it is caused by a request from that agent. Each agent ID bit is inversely >> mapped >> +to a corresponding bit in "config1" field. By default, the event will be >> +counted for all agent requests (config1 = 0x0). For all the supported >> agents of >> +each PMU, please refer to APM X-Gene User Manual. >> + >> +Each perf driver also provides a "cpumask" sysfs attribute, which contains a >> +single CPU ID of the processor which will be used to handle all the PMU >> events. >> + >> +Example for perf tool use: >> + >> + / # perf list | grep -e l3c -e iob -e mcb -e mc >> + l3c0/ackq-full/[Kernel PMU event] >> + <...> >> + mcb1/mcb-csw-stall/[Kernel PMU event] >> + >> + / # perf stat -a -e l3c0/read-miss/,mcb1/csw-write-request/ sleep 1 >> + >> + / # perf stat -a -e l3c0/read-miss,config1=0xfffe/ sleep 1 >> + >> +The driver does not support sampling, therefore "perf record" will >> +not work. Per-task (without "-a") perf sessions are not supported. >> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig >> index 04e2653..4d5c5f9 100644 >> --- a/drivers/perf/Kconfig >> +++ b/drivers/perf/Kconfig >> @@ -12,4 +12,11 @@ config ARM_PMU >> Say y if you want to use CPU performance monitors on ARM-based >> systems. >> >> +config XGENE_PMU >> +depends on PERF_EVENTS && ARCH_XGENE >> +bool "APM X-Gene SoC PMU" >> +default n >> +help >> + Say y if you want to use APM X-Gene SoC performance monitors. >> + >> endmenu >> diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile >> index acd2397..b116e98 100644 >> --- a/drivers/perf/Makefile >> +++ b/drivers/perf/Makefile >> @@ -1 +1,2 @@ >> obj-$(CONFIG_ARM_PMU) += arm_pmu.o >> +obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o >> diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c >> new file mode 100644 >> index 000..907a6cc >> --- /dev/null >> +++ b/drivers/perf/xgene_pmu.c >> @@ -0,0 +1,1398 @@ >> +/* >> + * APM X-Gene SoC PMU (Performance Monitor Unit) >> + * >> + * Copyright (c) 2016, Applied Micro Circuits Corporation >> + * Author: Hoan Tran >> + * Tai Nguyen >> + * >> + * This program is free software; you can redistribute it and/or modify it >> + * under the terms of the GNU General Public License as published by the >> + * Free Software Foundation; either version 2 of the License, or (at your >> + * option) any later version. >> + * >> + * This
Re: Linux 4.6.4
diff --git a/Makefile b/Makefile index c62b531d5a85..cd374426114a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 6 -SUBLEVEL = 3 +SUBLEVEL = 4 EXTRAVERSION = NAME = Charred Weasel diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 43fe85f20d57..7097a3395b25 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -455,6 +455,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = { [CRYPTO_MSG_NEWALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_DELALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_UPDATEALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), + [CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_DELRNG - CRYPTO_MSG_BASE] = 0, }; diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 574e87c7f2b8..9acccad26928 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -781,7 +781,7 @@ static int hash_process_data(struct hash_device_data *device_data, _data->state); memmove(req_ctx->state.buffer, device_data->state.buffer, - HASH_BLOCK_SIZE / sizeof(u32)); + HASH_BLOCK_SIZE); if (ret) { dev_err(device_data->dev, "%s: hash_resume_state() failed!\n", @@ -832,7 +832,7 @@ static int hash_process_data(struct hash_device_data *device_data, memmove(device_data->state.buffer, req_ctx->state.buffer, - HASH_BLOCK_SIZE / sizeof(u32)); + HASH_BLOCK_SIZE); if (ret) { dev_err(device_data->dev, "%s: hash_save_state() failed!\n", __func__); diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 495577b6d31b..94ad5c0adbcb 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = { .cra_name = "cbc(aes)", .cra_driver_name = "p8_aes_cbc", .cra_module = THIS_MODULE, - .cra_priority = 1000, + .cra_priority = 2000, .cra_type = _blkcipher_type, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_alignmask = 0, diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index 0a3c1b04cf3c..38ed10d761d0 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = { .cra_name = "ctr(aes)", .cra_driver_name = "p8_aes_ctr", .cra_module = THIS_MODULE, - .cra_priority = 1000, + .cra_priority = 2000, .cra_type = _blkcipher_type, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_alignmask = 0, diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 6dc810bce295..944a6dca0fcb 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -44,6 +44,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Creative SB Audigy 2 NX */ { USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME }, + /* USB3503 */ + { USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Microsoft Wireless Laser Mouse 6000 Receiver */ { USB_DEVICE(0x045e, 0x00e1), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -173,6 +176,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* MAYA44USB sound device */ { USB_DEVICE(0x0a92, 0x0091), .driver_info = USB_QUIRK_RESET_RESUME }, + /* ASUS Base Station(T100) */ + { USB_DEVICE(0x0b05, 0x17e0), .driver_info = + USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* Action Semiconductor flash disk */ { USB_DEVICE(0x10d6, 0x2200), .driver_info = USB_QUIRK_STRING_FETCH_255 }, @@ -188,26 +195,22 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1908, 0x1315), .driver_info = USB_QUIRK_HONOR_BNUMINTERFACES }, - /* INTEL VALUE SSD */ - { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, - - /* USB3503 */ - { USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME }, - - /* ASUS Base Station(T100) */ - { USB_DEVICE(0x0b05, 0x17e0), .driver_info = - USB_QUIRK_IGNORE_REMOTE_WAKEUP }, - /* Protocol and OTG Electrical Test Device */ { USB_DEVICE(0x1a0a, 0x0200),
Re: Linux 4.6.4
diff --git a/Makefile b/Makefile index c62b531d5a85..cd374426114a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 6 -SUBLEVEL = 3 +SUBLEVEL = 4 EXTRAVERSION = NAME = Charred Weasel diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 43fe85f20d57..7097a3395b25 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -455,6 +455,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = { [CRYPTO_MSG_NEWALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_DELALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_UPDATEALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), + [CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_DELRNG - CRYPTO_MSG_BASE] = 0, }; diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 574e87c7f2b8..9acccad26928 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -781,7 +781,7 @@ static int hash_process_data(struct hash_device_data *device_data, _data->state); memmove(req_ctx->state.buffer, device_data->state.buffer, - HASH_BLOCK_SIZE / sizeof(u32)); + HASH_BLOCK_SIZE); if (ret) { dev_err(device_data->dev, "%s: hash_resume_state() failed!\n", @@ -832,7 +832,7 @@ static int hash_process_data(struct hash_device_data *device_data, memmove(device_data->state.buffer, req_ctx->state.buffer, - HASH_BLOCK_SIZE / sizeof(u32)); + HASH_BLOCK_SIZE); if (ret) { dev_err(device_data->dev, "%s: hash_save_state() failed!\n", __func__); diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 495577b6d31b..94ad5c0adbcb 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = { .cra_name = "cbc(aes)", .cra_driver_name = "p8_aes_cbc", .cra_module = THIS_MODULE, - .cra_priority = 1000, + .cra_priority = 2000, .cra_type = _blkcipher_type, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_alignmask = 0, diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index 0a3c1b04cf3c..38ed10d761d0 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = { .cra_name = "ctr(aes)", .cra_driver_name = "p8_aes_ctr", .cra_module = THIS_MODULE, - .cra_priority = 1000, + .cra_priority = 2000, .cra_type = _blkcipher_type, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_alignmask = 0, diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 6dc810bce295..944a6dca0fcb 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -44,6 +44,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Creative SB Audigy 2 NX */ { USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME }, + /* USB3503 */ + { USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Microsoft Wireless Laser Mouse 6000 Receiver */ { USB_DEVICE(0x045e, 0x00e1), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -173,6 +176,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* MAYA44USB sound device */ { USB_DEVICE(0x0a92, 0x0091), .driver_info = USB_QUIRK_RESET_RESUME }, + /* ASUS Base Station(T100) */ + { USB_DEVICE(0x0b05, 0x17e0), .driver_info = + USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* Action Semiconductor flash disk */ { USB_DEVICE(0x10d6, 0x2200), .driver_info = USB_QUIRK_STRING_FETCH_255 }, @@ -188,26 +195,22 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1908, 0x1315), .driver_info = USB_QUIRK_HONOR_BNUMINTERFACES }, - /* INTEL VALUE SSD */ - { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, - - /* USB3503 */ - { USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME }, - - /* ASUS Base Station(T100) */ - { USB_DEVICE(0x0b05, 0x17e0), .driver_info = - USB_QUIRK_IGNORE_REMOTE_WAKEUP }, - /* Protocol and OTG Electrical Test Device */ { USB_DEVICE(0x1a0a, 0x0200),
Re: Linux 4.4.15
diff --git a/Makefile b/Makefile index fadbb9d73c6d..979088079338 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 14 +SUBLEVEL = 15 EXTRAVERSION = NAME = Blurry Fish Butt diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 43fe85f20d57..7097a3395b25 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -455,6 +455,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = { [CRYPTO_MSG_NEWALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_DELALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_UPDATEALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), + [CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_DELRNG - CRYPTO_MSG_BASE] = 0, }; diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 66b1c3313e2e..cd4398498495 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -797,7 +797,7 @@ static int hash_process_data(struct hash_device_data *device_data, _data->state); memmove(req_ctx->state.buffer, device_data->state.buffer, - HASH_BLOCK_SIZE / sizeof(u32)); + HASH_BLOCK_SIZE); if (ret) { dev_err(device_data->dev, "%s: hash_resume_state() failed!\n", @@ -848,7 +848,7 @@ static int hash_process_data(struct hash_device_data *device_data, memmove(device_data->state.buffer, req_ctx->state.buffer, - HASH_BLOCK_SIZE / sizeof(u32)); + HASH_BLOCK_SIZE); if (ret) { dev_err(device_data->dev, "%s: hash_save_state() failed!\n", __func__); diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 0b8fe2ec5315..f3801b983f42 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = { .cra_name = "cbc(aes)", .cra_driver_name = "p8_aes_cbc", .cra_module = THIS_MODULE, - .cra_priority = 1000, + .cra_priority = 2000, .cra_type = _blkcipher_type, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_alignmask = 0, diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index ee1306cd8f59..404a1b69a3ab 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = { .cra_name = "ctr(aes)", .cra_driver_name = "p8_aes_ctr", .cra_module = THIS_MODULE, - .cra_priority = 1000, + .cra_priority = 2000, .cra_type = _blkcipher_type, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_alignmask = 0, diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index bd377a6b067d..df54475d163b 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -86,9 +86,14 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp) while (!cur_buf->skb && next != rxq->read_idx) { struct alx_rfd *rfd = >rfd[cur]; - skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp); + skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size + 64, gfp); if (!skb) break; + + /* Workround for the HW RX DMA overflow issue */ + if (((unsigned long)skb->data & 0xfff) == 0xfc0) + skb_reserve(skb, 64); + dma = dma_map_single(>hw.pdev->dev, skb->data, alx->rxbuf_size, DMA_FROM_DEVICE); diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 169059c92f80..8d54e7b41bbf 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2405,9 +2405,9 @@ static int macb_init(struct platform_device *pdev) if (bp->phy_interface == PHY_INTERFACE_MODE_RGMII) val = GEM_BIT(RGMII); else if (bp->phy_interface == PHY_INTERFACE_MODE_RMII && -(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII)) +(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII)) val = MACB_BIT(RMII); - else if (!(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII)) + else if (!(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII)) val =
Linux 4.6.4
I'm announcing the release of the 4.6.4 kernel. All users of the 4.6 kernel series must upgrade. The updated 4.6.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.6.y and can be browsed at the normal kernel.org git web browser: http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary thanks, greg k-h Makefile |2 - crypto/crypto_user.c |1 drivers/crypto/ux500/hash/hash_core.c |4 +- drivers/crypto/vmx/aes_cbc.c |2 - drivers/crypto/vmx/aes_ctr.c |2 - drivers/usb/core/quirks.c | 23 --- drivers/usb/dwc3/dwc3-exynos.c| 19 +++- drivers/usb/gadget/legacy/inode.c | 17 --- drivers/usb/host/ehci-tegra.c |2 - drivers/usb/host/xhci-pci.c |5 +++ drivers/usb/host/xhci-plat.c |3 + drivers/usb/host/xhci-ring.c | 30 +++ drivers/usb/host/xhci.c | 27 + drivers/usb/musb/musb_core.c |3 + drivers/usb/musb/musb_host.c | 23 +-- drivers/usb/serial/mos7720.c |1 drivers/usb/storage/uas.c |1 include/linux/bpf.h |4 ++ include/linux/net.h |3 + include/linux/sock_diag.h |6 +++ kernel/events/core.c |2 - net/ax25/af_ax25.c|3 + net/ax25/ax25_ds_timer.c |5 ++- net/ax25/ax25_std_timer.c |5 ++- net/ax25/ax25_subr.c |3 + net/bridge/br_multicast.c |4 ++ net/bridge/br_private.h | 23 --- net/core/neighbour.c |6 +++ net/ipv4/esp4.c | 52 -- net/ipv4/ipmr.c |4 +- net/ipv6/ip6mr.c |1 net/ipv6/sit.c|4 +- net/kcm/kcmproc.c |1 net/sched/act_ipt.c |7 +++- net/sched/sch_fifo.c |4 ++ net/sched/sch_netem.c | 12 +++ 36 files changed, 216 insertions(+), 98 deletions(-) Andrew Goodbody (2): usb: musb: Stop bulk endpoint while queue is rotated usb: musb: Ensure rx reinit occurs for shared_fifo endpoints Anton Blanchard (1): crypto: vmx - Increase priority of aes-cbc cipher Basil Gunn (1): AX.25: Close socket connection on session completion Bin Liu (3): usb: musb: only restore devctl when session was set in backup usb: musb: host: correct cppi dma channel for isoch transfer usb: gadget: fix spinlock dead lock in gadgetfs Daniel Borkmann (1): bpf, perf: delay release of BPF prog after grace period David Barroso (1): neigh: Explicitly declare RCU-bh read side critical section in neigh_xmit() Eric Dumazet (2): net_sched: fix pfifo_head_drop behavior vs backlog netem: fix a use after free Gabriel Krisman Bertazi (1): xhci: Cleanup only when releasing primary hcd Greg Kroah-Hartman (1): Linux 4.6.4 Hans de Goede (4): USB: uas: Fix slave queue_depth not being set usb: quirks: Fix sorting usb: quirks: Add no-lpm quirk for Acer C120 LED Projector USB: xhci: Add broken streams quirk for Frescologic device id 1009 Herbert Xu (1): esp: Fix ESN generation under UDP encapsulation Jason A. Donenfeld (1): net: Don't forget pr_fmt on net_dbg_ratelimited for CONFIG_DYNAMIC_DEBUG Jiri Slaby (1): kcm: fix /proc memory leak Linus Walleij (1): crypto: ux500 - memmove the right size Mathias Krause (1): crypto: user - re-add size check for CRYPTO_MSG_GETALG Mathias Nyman (1): xhci: Fix handling timeouted commands on hosts in weird states. Simon Horman (1): sit: correct IP protocol used in ipip6_err Steinar H. Gunderson (1): usb: dwc3: exynos: Fix deferred probing storm. Sudip Mukherjee (1): USB: mos7720: delete parport Thierry Reding (1): usb: host: ehci-tegra: Grab the correct UTMI pads reset Thomas Petazzoni (1): usb: xhci-plat: properly handle probe deferral for devm_clk_get() Tom Goff (1): ipmr/ip6mr: Initialize the last assert time of mfc entries. WANG Cong (1): act_ipt: fix a bind refcnt leak Willem de Bruijn (1): sock_diag: do not broadcast raw socket destruction daniel (1): Bridge: Fix ipv6 mc snooping if bridge has no ipv6 address signature.asc Description: PGP signature
Linux 4.4.15
I'm announcing the release of the 4.4.15 kernel. All users of the 4.4 kernel series must upgrade. The updated 4.4.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.4.y and can be browsed at the normal kernel.org git web browser: http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary thanks, greg k-h Makefile|2 - crypto/crypto_user.c|1 drivers/crypto/ux500/hash/hash_core.c |4 +- drivers/crypto/vmx/aes_cbc.c|2 - drivers/crypto/vmx/aes_ctr.c|2 - drivers/net/ethernet/atheros/alx/main.c |7 +++- drivers/net/ethernet/cadence/macb.c | 13 drivers/net/ethernet/cadence/macb.h |2 - drivers/usb/core/quirks.c | 23 -- drivers/usb/dwc3/dwc3-exynos.c | 19 ++- drivers/usb/gadget/legacy/inode.c | 17 -- drivers/usb/host/ehci-tegra.c |2 - drivers/usb/host/xhci-pci.c |5 +++ drivers/usb/host/xhci-plat.c|3 + drivers/usb/host/xhci-ring.c| 30 ++ drivers/usb/host/xhci.c | 27 +--- drivers/usb/musb/musb_core.c|3 + drivers/usb/musb/musb_host.c| 23 -- drivers/usb/serial/mos7720.c|1 drivers/usb/storage/uas.c |1 include/linux/bpf.h |4 ++ include/linux/net.h |3 + include/linux/skbuff.h |7 include/linux/sock_diag.h |6 +++ kernel/events/core.c|2 - net/ax25/af_ax25.c |3 + net/ax25/ax25_ds_timer.c|5 ++- net/ax25/ax25_std_timer.c |5 ++- net/ax25/ax25_subr.c|3 + net/bridge/br_multicast.c |4 ++ net/bridge/br_private.h | 23 +++--- net/core/filter.c | 18 ++- net/core/neighbour.c|6 +++ net/ipv4/esp4.c | 52 +++- net/ipv4/ipmr.c |4 +- net/ipv6/ip6mr.c|1 net/ipv6/sit.c |4 +- net/sched/act_csum.c|8 +--- net/sched/act_nat.c | 18 +++ net/sched/sch_fifo.c|4 ++ net/sched/sch_netem.c | 12 +++ 41 files changed, 248 insertions(+), 131 deletions(-) Andrew Goodbody (2): usb: musb: Stop bulk endpoint while queue is rotated usb: musb: Ensure rx reinit occurs for shared_fifo endpoints Anton Blanchard (1): crypto: vmx - Increase priority of aes-cbc cipher Basil Gunn (1): AX.25: Close socket connection on session completion Bin Liu (3): usb: musb: only restore devctl when session was set in backup usb: musb: host: correct cppi dma channel for isoch transfer usb: gadget: fix spinlock dead lock in gadgetfs Daniel Borkmann (2): bpf, perf: delay release of BPF prog after grace period bpf: try harder on clones when writing into skb David Barroso (1): neigh: Explicitly declare RCU-bh read side critical section in neigh_xmit() Eric Dumazet (2): net_sched: fix pfifo_head_drop behavior vs backlog netem: fix a use after free Feng Tang (1): net: alx: Work around the DMA RX overflow issue Gabriel Krisman Bertazi (1): xhci: Cleanup only when releasing primary hcd Greg Kroah-Hartman (1): Linux 4.4.15 Hans de Goede (4): USB: uas: Fix slave queue_depth not being set usb: quirks: Fix sorting usb: quirks: Add no-lpm quirk for Acer C120 LED Projector USB: xhci: Add broken streams quirk for Frescologic device id 1009 Herbert Xu (1): esp: Fix ESN generation under UDP encapsulation Jason A. Donenfeld (1): net: Don't forget pr_fmt on net_dbg_ratelimited for CONFIG_DYNAMIC_DEBUG Linus Walleij (1): crypto: ux500 - memmove the right size Mathias Krause (1): crypto: user - re-add size check for CRYPTO_MSG_GETALG Mathias Nyman (1): xhci: Fix handling timeouted commands on hosts in weird states. Nicolas Ferre (1): net: macb: fix default configuration for GMAC on AT91 Simon Horman (1): sit: correct IP protocol used in ipip6_err Steinar H. Gunderson (1): usb: dwc3: exynos: Fix deferred probing storm. Sudip Mukherjee (1): USB: mos7720: delete parport Thierry Reding (1): usb: host: ehci-tegra: Grab the correct UTMI pads reset Thomas Petazzoni (1): usb: xhci-plat: properly handle probe deferral for devm_clk_get() Tom Goff (1): ipmr/ip6mr: Initialize the last assert time of mfc entries. Willem de Bruijn (1): sock_diag: do not
Linux 4.6.4
I'm announcing the release of the 4.6.4 kernel. All users of the 4.6 kernel series must upgrade. The updated 4.6.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.6.y and can be browsed at the normal kernel.org git web browser: http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary thanks, greg k-h Makefile |2 - crypto/crypto_user.c |1 drivers/crypto/ux500/hash/hash_core.c |4 +- drivers/crypto/vmx/aes_cbc.c |2 - drivers/crypto/vmx/aes_ctr.c |2 - drivers/usb/core/quirks.c | 23 --- drivers/usb/dwc3/dwc3-exynos.c| 19 +++- drivers/usb/gadget/legacy/inode.c | 17 --- drivers/usb/host/ehci-tegra.c |2 - drivers/usb/host/xhci-pci.c |5 +++ drivers/usb/host/xhci-plat.c |3 + drivers/usb/host/xhci-ring.c | 30 +++ drivers/usb/host/xhci.c | 27 + drivers/usb/musb/musb_core.c |3 + drivers/usb/musb/musb_host.c | 23 +-- drivers/usb/serial/mos7720.c |1 drivers/usb/storage/uas.c |1 include/linux/bpf.h |4 ++ include/linux/net.h |3 + include/linux/sock_diag.h |6 +++ kernel/events/core.c |2 - net/ax25/af_ax25.c|3 + net/ax25/ax25_ds_timer.c |5 ++- net/ax25/ax25_std_timer.c |5 ++- net/ax25/ax25_subr.c |3 + net/bridge/br_multicast.c |4 ++ net/bridge/br_private.h | 23 --- net/core/neighbour.c |6 +++ net/ipv4/esp4.c | 52 -- net/ipv4/ipmr.c |4 +- net/ipv6/ip6mr.c |1 net/ipv6/sit.c|4 +- net/kcm/kcmproc.c |1 net/sched/act_ipt.c |7 +++- net/sched/sch_fifo.c |4 ++ net/sched/sch_netem.c | 12 +++ 36 files changed, 216 insertions(+), 98 deletions(-) Andrew Goodbody (2): usb: musb: Stop bulk endpoint while queue is rotated usb: musb: Ensure rx reinit occurs for shared_fifo endpoints Anton Blanchard (1): crypto: vmx - Increase priority of aes-cbc cipher Basil Gunn (1): AX.25: Close socket connection on session completion Bin Liu (3): usb: musb: only restore devctl when session was set in backup usb: musb: host: correct cppi dma channel for isoch transfer usb: gadget: fix spinlock dead lock in gadgetfs Daniel Borkmann (1): bpf, perf: delay release of BPF prog after grace period David Barroso (1): neigh: Explicitly declare RCU-bh read side critical section in neigh_xmit() Eric Dumazet (2): net_sched: fix pfifo_head_drop behavior vs backlog netem: fix a use after free Gabriel Krisman Bertazi (1): xhci: Cleanup only when releasing primary hcd Greg Kroah-Hartman (1): Linux 4.6.4 Hans de Goede (4): USB: uas: Fix slave queue_depth not being set usb: quirks: Fix sorting usb: quirks: Add no-lpm quirk for Acer C120 LED Projector USB: xhci: Add broken streams quirk for Frescologic device id 1009 Herbert Xu (1): esp: Fix ESN generation under UDP encapsulation Jason A. Donenfeld (1): net: Don't forget pr_fmt on net_dbg_ratelimited for CONFIG_DYNAMIC_DEBUG Jiri Slaby (1): kcm: fix /proc memory leak Linus Walleij (1): crypto: ux500 - memmove the right size Mathias Krause (1): crypto: user - re-add size check for CRYPTO_MSG_GETALG Mathias Nyman (1): xhci: Fix handling timeouted commands on hosts in weird states. Simon Horman (1): sit: correct IP protocol used in ipip6_err Steinar H. Gunderson (1): usb: dwc3: exynos: Fix deferred probing storm. Sudip Mukherjee (1): USB: mos7720: delete parport Thierry Reding (1): usb: host: ehci-tegra: Grab the correct UTMI pads reset Thomas Petazzoni (1): usb: xhci-plat: properly handle probe deferral for devm_clk_get() Tom Goff (1): ipmr/ip6mr: Initialize the last assert time of mfc entries. WANG Cong (1): act_ipt: fix a bind refcnt leak Willem de Bruijn (1): sock_diag: do not broadcast raw socket destruction daniel (1): Bridge: Fix ipv6 mc snooping if bridge has no ipv6 address signature.asc Description: PGP signature
Linux 4.4.15
I'm announcing the release of the 4.4.15 kernel. All users of the 4.4 kernel series must upgrade. The updated 4.4.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.4.y and can be browsed at the normal kernel.org git web browser: http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary thanks, greg k-h Makefile|2 - crypto/crypto_user.c|1 drivers/crypto/ux500/hash/hash_core.c |4 +- drivers/crypto/vmx/aes_cbc.c|2 - drivers/crypto/vmx/aes_ctr.c|2 - drivers/net/ethernet/atheros/alx/main.c |7 +++- drivers/net/ethernet/cadence/macb.c | 13 drivers/net/ethernet/cadence/macb.h |2 - drivers/usb/core/quirks.c | 23 -- drivers/usb/dwc3/dwc3-exynos.c | 19 ++- drivers/usb/gadget/legacy/inode.c | 17 -- drivers/usb/host/ehci-tegra.c |2 - drivers/usb/host/xhci-pci.c |5 +++ drivers/usb/host/xhci-plat.c|3 + drivers/usb/host/xhci-ring.c| 30 ++ drivers/usb/host/xhci.c | 27 +--- drivers/usb/musb/musb_core.c|3 + drivers/usb/musb/musb_host.c| 23 -- drivers/usb/serial/mos7720.c|1 drivers/usb/storage/uas.c |1 include/linux/bpf.h |4 ++ include/linux/net.h |3 + include/linux/skbuff.h |7 include/linux/sock_diag.h |6 +++ kernel/events/core.c|2 - net/ax25/af_ax25.c |3 + net/ax25/ax25_ds_timer.c|5 ++- net/ax25/ax25_std_timer.c |5 ++- net/ax25/ax25_subr.c|3 + net/bridge/br_multicast.c |4 ++ net/bridge/br_private.h | 23 +++--- net/core/filter.c | 18 ++- net/core/neighbour.c|6 +++ net/ipv4/esp4.c | 52 +++- net/ipv4/ipmr.c |4 +- net/ipv6/ip6mr.c|1 net/ipv6/sit.c |4 +- net/sched/act_csum.c|8 +--- net/sched/act_nat.c | 18 +++ net/sched/sch_fifo.c|4 ++ net/sched/sch_netem.c | 12 +++ 41 files changed, 248 insertions(+), 131 deletions(-) Andrew Goodbody (2): usb: musb: Stop bulk endpoint while queue is rotated usb: musb: Ensure rx reinit occurs for shared_fifo endpoints Anton Blanchard (1): crypto: vmx - Increase priority of aes-cbc cipher Basil Gunn (1): AX.25: Close socket connection on session completion Bin Liu (3): usb: musb: only restore devctl when session was set in backup usb: musb: host: correct cppi dma channel for isoch transfer usb: gadget: fix spinlock dead lock in gadgetfs Daniel Borkmann (2): bpf, perf: delay release of BPF prog after grace period bpf: try harder on clones when writing into skb David Barroso (1): neigh: Explicitly declare RCU-bh read side critical section in neigh_xmit() Eric Dumazet (2): net_sched: fix pfifo_head_drop behavior vs backlog netem: fix a use after free Feng Tang (1): net: alx: Work around the DMA RX overflow issue Gabriel Krisman Bertazi (1): xhci: Cleanup only when releasing primary hcd Greg Kroah-Hartman (1): Linux 4.4.15 Hans de Goede (4): USB: uas: Fix slave queue_depth not being set usb: quirks: Fix sorting usb: quirks: Add no-lpm quirk for Acer C120 LED Projector USB: xhci: Add broken streams quirk for Frescologic device id 1009 Herbert Xu (1): esp: Fix ESN generation under UDP encapsulation Jason A. Donenfeld (1): net: Don't forget pr_fmt on net_dbg_ratelimited for CONFIG_DYNAMIC_DEBUG Linus Walleij (1): crypto: ux500 - memmove the right size Mathias Krause (1): crypto: user - re-add size check for CRYPTO_MSG_GETALG Mathias Nyman (1): xhci: Fix handling timeouted commands on hosts in weird states. Nicolas Ferre (1): net: macb: fix default configuration for GMAC on AT91 Simon Horman (1): sit: correct IP protocol used in ipip6_err Steinar H. Gunderson (1): usb: dwc3: exynos: Fix deferred probing storm. Sudip Mukherjee (1): USB: mos7720: delete parport Thierry Reding (1): usb: host: ehci-tegra: Grab the correct UTMI pads reset Thomas Petazzoni (1): usb: xhci-plat: properly handle probe deferral for devm_clk_get() Tom Goff (1): ipmr/ip6mr: Initialize the last assert time of mfc entries. Willem de Bruijn (1): sock_diag: do not
Re: Linux 4.4.15
diff --git a/Makefile b/Makefile index fadbb9d73c6d..979088079338 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 14 +SUBLEVEL = 15 EXTRAVERSION = NAME = Blurry Fish Butt diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 43fe85f20d57..7097a3395b25 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -455,6 +455,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = { [CRYPTO_MSG_NEWALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_DELALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_UPDATEALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), + [CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg), [CRYPTO_MSG_DELRNG - CRYPTO_MSG_BASE] = 0, }; diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 66b1c3313e2e..cd4398498495 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -797,7 +797,7 @@ static int hash_process_data(struct hash_device_data *device_data, _data->state); memmove(req_ctx->state.buffer, device_data->state.buffer, - HASH_BLOCK_SIZE / sizeof(u32)); + HASH_BLOCK_SIZE); if (ret) { dev_err(device_data->dev, "%s: hash_resume_state() failed!\n", @@ -848,7 +848,7 @@ static int hash_process_data(struct hash_device_data *device_data, memmove(device_data->state.buffer, req_ctx->state.buffer, - HASH_BLOCK_SIZE / sizeof(u32)); + HASH_BLOCK_SIZE); if (ret) { dev_err(device_data->dev, "%s: hash_save_state() failed!\n", __func__); diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 0b8fe2ec5315..f3801b983f42 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = { .cra_name = "cbc(aes)", .cra_driver_name = "p8_aes_cbc", .cra_module = THIS_MODULE, - .cra_priority = 1000, + .cra_priority = 2000, .cra_type = _blkcipher_type, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_alignmask = 0, diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index ee1306cd8f59..404a1b69a3ab 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = { .cra_name = "ctr(aes)", .cra_driver_name = "p8_aes_ctr", .cra_module = THIS_MODULE, - .cra_priority = 1000, + .cra_priority = 2000, .cra_type = _blkcipher_type, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_alignmask = 0, diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index bd377a6b067d..df54475d163b 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -86,9 +86,14 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp) while (!cur_buf->skb && next != rxq->read_idx) { struct alx_rfd *rfd = >rfd[cur]; - skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp); + skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size + 64, gfp); if (!skb) break; + + /* Workround for the HW RX DMA overflow issue */ + if (((unsigned long)skb->data & 0xfff) == 0xfc0) + skb_reserve(skb, 64); + dma = dma_map_single(>hw.pdev->dev, skb->data, alx->rxbuf_size, DMA_FROM_DEVICE); diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 169059c92f80..8d54e7b41bbf 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2405,9 +2405,9 @@ static int macb_init(struct platform_device *pdev) if (bp->phy_interface == PHY_INTERFACE_MODE_RGMII) val = GEM_BIT(RGMII); else if (bp->phy_interface == PHY_INTERFACE_MODE_RMII && -(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII)) +(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII)) val = MACB_BIT(RMII); - else if (!(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII)) + else if (!(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII)) val =
Re: [kernel-hardening] [PATCH v4 26/29] sched: Allow putting thread_info into task_struct
On Mon, Jul 11, 2016 at 9:31 AM, Mark Rutlandwrote: >> >> So until you do the wire that actually disables preemption you can >> schedule away as much as you want, and after that write you no longer >> will. > > I was assuming a percpu pointer to current (or preempt count). So for the same reason that is ok *iff* you have - some kind of dedicated percpu register (or other base pointer - x86 has the segment thing) that gets updated when you schedule. - an instruction that can load 'current' directly off that register atomically. But yes, percpu data in general is obviously not safe to access without preemption. Linus
Re: [kernel-hardening] [PATCH v4 26/29] sched: Allow putting thread_info into task_struct
On Mon, Jul 11, 2016 at 9:31 AM, Mark Rutland wrote: >> >> So until you do the wire that actually disables preemption you can >> schedule away as much as you want, and after that write you no longer >> will. > > I was assuming a percpu pointer to current (or preempt count). So for the same reason that is ok *iff* you have - some kind of dedicated percpu register (or other base pointer - x86 has the segment thing) that gets updated when you schedule. - an instruction that can load 'current' directly off that register atomically. But yes, percpu data in general is obviously not safe to access without preemption. Linus
[GIT PULL 4/4] arm64: defconfig: Stuff for exynos for v4.8, last round
Hi, Last round of commits for v4.8. Best regards, Krzysztof The following changes since commit 1a695a905c18548062509178b98bc91e67510864: Linux 4.7-rc1 (2016-05-29 09:29:24 -0700) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git tags/samsung-defconfig64-4.8 for you to fetch changes up to 426f754be0bc258c269524bce162ae0ca1cb8927: arm64: defconfig: Enable more IP blocks for Exynos7 and Exynos5433 (2016-07-11 08:06:54 +0200) Samsung defconfig updates for ARM64 - enable drivers for Exynos7 and Exynos5433 based boards: 1. S2MPS clock driver, 2. SoC: RTC, SPI, watchdog, EHCI, OHCI, DWC3, ADC and PWM, 3. Enable Samsung SoC sound. Alim Akhtar (1): arm64: defconfig: Enable S2MPS11 clock and S3C RTC driver Krzysztof Kozlowski (1): arm64: defconfig: Enable more IP blocks for Exynos7 and Exynos5433 arch/arm64/configs/defconfig | 12 1 file changed, 12 insertions(+)
[GIT PULL 2/4] ARM: exynos: Stuff for v4.8, last round
Hi, Last round of commits for v4.8. On top of previous tag. Best regards, Krzysztof The following changes since commit 1c03274d68f4744afe582fcff1c2e5b1c5c34b5b: MAINTAINERS: Extend Samsung SoC entry with S3C/S5P drivers (2016-06-23 08:12:08 +0200) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git tags/samsung-soc-4.8-3 for you to fetch changes up to 3981b11fda14ea0b459043d97c68db0a614ec9f8: ARM: s3c64xx: smartq: Avoid sparse warnings (2016-07-11 17:44:11 +0200) Samsung mach/soc update for v4.8, part 3: Just cleanup - fix Sparse warning and constify passed iomem address. Krzysztof Kozlowski (1): ARM: SAMSUNG: Constify iomem address passed to s5p_init_cpu Thierry Reding (1): ARM: s3c64xx: smartq: Avoid sparse warnings arch/arm/mach-s3c64xx/mach-smartq.c | 1 + arch/arm/plat-samsung/cpu.c | 2 +- arch/arm/plat-samsung/include/plat/cpu.h | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-)
[GIT PULL 1/4] ARM: exynos: Drivers for v4.8, last round
Hi, Last round of commits for v4.8. On top of previous tag. Best regards, Krzysztof The following changes since commit 187364b6fcabb9f4bfefcb62fab4fcda019b5810: cpufreq: s5pv210: use relaxed IO accesors (2016-06-22 14:00:21 +0200) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git tags/samsung-drivers-4.8-3 for you to fetch changes up to aec6341e2ac76ea8703642e83535f216b8866162: soc: samsung: pmu: Constify arrays with PMU data (2016-07-06 10:35:45 +0200) Samsung drivers/soc update for v4.8, part 3 1. Fix size of allocation for Exynos SROM registers (too much was allocated). 2. Constify fix. Krzysztof Kozlowski (1): soc: samsung: pmu: Constify arrays with PMU data Seung-Woo Kim (1): memory: samsung: exynos-srom: Fix wrong count of registers drivers/memory/samsung/exynos-srom.c | 2 +- drivers/soc/samsung/exynos3250-pmu.c | 2 +- drivers/soc/samsung/exynos5420-pmu.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-)
[GIT PULL 4/4] arm64: defconfig: Stuff for exynos for v4.8, last round
Hi, Last round of commits for v4.8. Best regards, Krzysztof The following changes since commit 1a695a905c18548062509178b98bc91e67510864: Linux 4.7-rc1 (2016-05-29 09:29:24 -0700) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git tags/samsung-defconfig64-4.8 for you to fetch changes up to 426f754be0bc258c269524bce162ae0ca1cb8927: arm64: defconfig: Enable more IP blocks for Exynos7 and Exynos5433 (2016-07-11 08:06:54 +0200) Samsung defconfig updates for ARM64 - enable drivers for Exynos7 and Exynos5433 based boards: 1. S2MPS clock driver, 2. SoC: RTC, SPI, watchdog, EHCI, OHCI, DWC3, ADC and PWM, 3. Enable Samsung SoC sound. Alim Akhtar (1): arm64: defconfig: Enable S2MPS11 clock and S3C RTC driver Krzysztof Kozlowski (1): arm64: defconfig: Enable more IP blocks for Exynos7 and Exynos5433 arch/arm64/configs/defconfig | 12 1 file changed, 12 insertions(+)
[GIT PULL 2/4] ARM: exynos: Stuff for v4.8, last round
Hi, Last round of commits for v4.8. On top of previous tag. Best regards, Krzysztof The following changes since commit 1c03274d68f4744afe582fcff1c2e5b1c5c34b5b: MAINTAINERS: Extend Samsung SoC entry with S3C/S5P drivers (2016-06-23 08:12:08 +0200) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git tags/samsung-soc-4.8-3 for you to fetch changes up to 3981b11fda14ea0b459043d97c68db0a614ec9f8: ARM: s3c64xx: smartq: Avoid sparse warnings (2016-07-11 17:44:11 +0200) Samsung mach/soc update for v4.8, part 3: Just cleanup - fix Sparse warning and constify passed iomem address. Krzysztof Kozlowski (1): ARM: SAMSUNG: Constify iomem address passed to s5p_init_cpu Thierry Reding (1): ARM: s3c64xx: smartq: Avoid sparse warnings arch/arm/mach-s3c64xx/mach-smartq.c | 1 + arch/arm/plat-samsung/cpu.c | 2 +- arch/arm/plat-samsung/include/plat/cpu.h | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-)
[GIT PULL 1/4] ARM: exynos: Drivers for v4.8, last round
Hi, Last round of commits for v4.8. On top of previous tag. Best regards, Krzysztof The following changes since commit 187364b6fcabb9f4bfefcb62fab4fcda019b5810: cpufreq: s5pv210: use relaxed IO accesors (2016-06-22 14:00:21 +0200) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git tags/samsung-drivers-4.8-3 for you to fetch changes up to aec6341e2ac76ea8703642e83535f216b8866162: soc: samsung: pmu: Constify arrays with PMU data (2016-07-06 10:35:45 +0200) Samsung drivers/soc update for v4.8, part 3 1. Fix size of allocation for Exynos SROM registers (too much was allocated). 2. Constify fix. Krzysztof Kozlowski (1): soc: samsung: pmu: Constify arrays with PMU data Seung-Woo Kim (1): memory: samsung: exynos-srom: Fix wrong count of registers drivers/memory/samsung/exynos-srom.c | 2 +- drivers/soc/samsung/exynos3250-pmu.c | 2 +- drivers/soc/samsung/exynos5420-pmu.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-)
[GIT PULL 3/4] arm64: dts: exynos: Minor fix for v4.8, last round
Hi, Last round of commits for v4.8. Best regards, Krzysztof The following changes since commit 1a695a905c18548062509178b98bc91e67510864: Linux 4.7-rc1 (2016-05-29 09:29:24 -0700) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git tags/samsung-dt64-4.8-2 for you to fetch changes up to a1924466b784fbb64f10eeb213d335e3d1728b8b: arm64: dts: exynos: Modify the voltage range for BUCK2 for exynos7 (2016-07-06 09:43:42 +0200) Samsung DeviceTree changes for ARM64 for v4.8: 1. Adjust the voltage of CPU buck regulator so scaling could work. Abhilash Kesavan (1): arm64: dts: exynos: Modify the voltage range for BUCK2 for exynos7 arch/arm64/boot/dts/exynos/exynos7-espresso.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
[GIT PULL 3/4] arm64: dts: exynos: Minor fix for v4.8, last round
Hi, Last round of commits for v4.8. Best regards, Krzysztof The following changes since commit 1a695a905c18548062509178b98bc91e67510864: Linux 4.7-rc1 (2016-05-29 09:29:24 -0700) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git tags/samsung-dt64-4.8-2 for you to fetch changes up to a1924466b784fbb64f10eeb213d335e3d1728b8b: arm64: dts: exynos: Modify the voltage range for BUCK2 for exynos7 (2016-07-06 09:43:42 +0200) Samsung DeviceTree changes for ARM64 for v4.8: 1. Adjust the voltage of CPU buck regulator so scaling could work. Abhilash Kesavan (1): arm64: dts: exynos: Modify the voltage range for BUCK2 for exynos7 arch/arm64/boot/dts/exynos/exynos7-espresso.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
Re: [PATCH v4] [media] pci: Add tw5864 driver
On Mon, 2016-07-11 at 18:17 +0300, Andrey Utkin wrote: [] > diff --git a/drivers/media/pci/tw5864/tw5864-core.c > b/drivers/media/pci/tw5864/tw5864-core.c [] > +static const char * const artifacts_warning = > +"BEWARE OF KNOWN ISSUES WITH VIDEO QUALITY\n" > +"\n" > +"This driver was developed by Bluecherry LLC by deducing behaviour of\n" > +"original manufacturer's driver, from both source code and execution > traces.\n" > +"It is known that there are some artifacts on output video with this > driver:\n" > +" - on all known hardware samples: random pixels of wrong color (mostly\n" > +" white, red or blue) appearing and disappearing on sequences of > P-frames;\n" > +" - on some hardware samples (known with H.264 core version e006:2800):\n" > +" total madness on P-frames: blocks of wrong luminance; blocks of wrong\n" > +" colors \"creeping\" across the picture.\n" > +"There is a workaround for both issues: avoid P-frames by setting GOP size\n" > +"to 1. To do that, run this command on device files created by this > driver:\n" > +"\n" > +"v4l2-ctl --device /dev/videoX --set-ctrl=video_gop_size=1\n" > +"\n"; > + > +static char *artifacts_warning_continued = > +"These issues are not decoding errors; all produced H.264 streams are > decoded\n" > +"properly. Streams without P-frames don't have these artifacts so it's not\n" > +"analog-to-digital conversion issues nor internal memory errors; we > conclude\n" > +"it's internal H.264 encoder issues.\n" > +"We cannot even check the original driver's behaviour because it has never\n" > +"worked properly at all in our development environment. So these issues > may\n" > +"be actually related to firmware or hardware. However it may be that > there's\n" > +"just some more register settings missing in the driver which would please\n" > +"the hardware.\n" > +"Manufacturer didn't help much on our inquiries, but feel free to disturb\n" > +"again the support of Intersil (owner of former Techwell).\n" > +"\n"; [] > +static int tw5864_initdev(struct pci_dev *pci_dev, > + const struct pci_device_id *pci_id) > +{ [] > + dev_warn(_dev->dev, "%s", artifacts_warning); > + dev_warn(_dev->dev, "%s", artifacts_warning_continued); Is all that verbosity useful? And trivially: Each of these blocks will start with the dev_ prefix and the subsequent lines will not have the same prefix Perhaps it'd be better to write this something like: static const char * const artifacts_warning[] = { "BEWARE OF KNOWN ISSUES WITH VIDEO QUALITY", "", "This driver was developed by Bluecherry LLC by deducing behaviour of", "original manufacturer's driver, from both source code and execution traces.", "It is known that there are some artifacts on output video with this driver:", " - on all known hardware samples: random pixels of wrong color (mostly", " white, red or blue) appearing and disappearing on sequences of P-frames;", " - on some hardware samples (known with H.264 core version e006:2800):", " total madness on P-frames: blocks of wrong luminance; blocks of wrong", " colors \"creeping\" across the picture.", "There is a workaround for both issues: avoid P-frames by setting GOP size", "to 1. To do that, run this command on device files created by this driver:", "", "v4l2-ctl --device /dev/videoX --set-ctrl=video_gop_size=1", "", "These issues are not decoding errors; all produced H.264 streams are decoded", "properly. Streams without P-frames don't have these artifacts so it's not", "analog-to-digital conversion issues nor internal memory errors; we conclude", "it's internal H.264 encoder issues.", "We cannot even check the original driver's behaviour because it has never", "worked properly at all in our development environment. So these issues may", "be actually related to firmware or hardware. However it may be that there's", "just some more register settings missing in the driver which would please", "the hardware.", "Manufacturer didn't help much on our inquiries, but feel free to disturb", "again the support of Intersil (owner of former Techwell).\n" }; and use for (i = 0; i < ARRAY_SIZE(artifacts_warning), i++) dev_warn(_dev->dev, %s\n", artifacts_warning[i]); so that each line is prefixed. It also might be better to issue something like a single line dev_warn referring to the driver code and just leave this comment in the driver sources. Something like: dev_warn(_dev->dev, "This driver has known defects in video quality\n");
Re: [PATCH v4] [media] pci: Add tw5864 driver
On Mon, 2016-07-11 at 18:17 +0300, Andrey Utkin wrote: [] > diff --git a/drivers/media/pci/tw5864/tw5864-core.c > b/drivers/media/pci/tw5864/tw5864-core.c [] > +static const char * const artifacts_warning = > +"BEWARE OF KNOWN ISSUES WITH VIDEO QUALITY\n" > +"\n" > +"This driver was developed by Bluecherry LLC by deducing behaviour of\n" > +"original manufacturer's driver, from both source code and execution > traces.\n" > +"It is known that there are some artifacts on output video with this > driver:\n" > +" - on all known hardware samples: random pixels of wrong color (mostly\n" > +" white, red or blue) appearing and disappearing on sequences of > P-frames;\n" > +" - on some hardware samples (known with H.264 core version e006:2800):\n" > +" total madness on P-frames: blocks of wrong luminance; blocks of wrong\n" > +" colors \"creeping\" across the picture.\n" > +"There is a workaround for both issues: avoid P-frames by setting GOP size\n" > +"to 1. To do that, run this command on device files created by this > driver:\n" > +"\n" > +"v4l2-ctl --device /dev/videoX --set-ctrl=video_gop_size=1\n" > +"\n"; > + > +static char *artifacts_warning_continued = > +"These issues are not decoding errors; all produced H.264 streams are > decoded\n" > +"properly. Streams without P-frames don't have these artifacts so it's not\n" > +"analog-to-digital conversion issues nor internal memory errors; we > conclude\n" > +"it's internal H.264 encoder issues.\n" > +"We cannot even check the original driver's behaviour because it has never\n" > +"worked properly at all in our development environment. So these issues > may\n" > +"be actually related to firmware or hardware. However it may be that > there's\n" > +"just some more register settings missing in the driver which would please\n" > +"the hardware.\n" > +"Manufacturer didn't help much on our inquiries, but feel free to disturb\n" > +"again the support of Intersil (owner of former Techwell).\n" > +"\n"; [] > +static int tw5864_initdev(struct pci_dev *pci_dev, > + const struct pci_device_id *pci_id) > +{ [] > + dev_warn(_dev->dev, "%s", artifacts_warning); > + dev_warn(_dev->dev, "%s", artifacts_warning_continued); Is all that verbosity useful? And trivially: Each of these blocks will start with the dev_ prefix and the subsequent lines will not have the same prefix Perhaps it'd be better to write this something like: static const char * const artifacts_warning[] = { "BEWARE OF KNOWN ISSUES WITH VIDEO QUALITY", "", "This driver was developed by Bluecherry LLC by deducing behaviour of", "original manufacturer's driver, from both source code and execution traces.", "It is known that there are some artifacts on output video with this driver:", " - on all known hardware samples: random pixels of wrong color (mostly", " white, red or blue) appearing and disappearing on sequences of P-frames;", " - on some hardware samples (known with H.264 core version e006:2800):", " total madness on P-frames: blocks of wrong luminance; blocks of wrong", " colors \"creeping\" across the picture.", "There is a workaround for both issues: avoid P-frames by setting GOP size", "to 1. To do that, run this command on device files created by this driver:", "", "v4l2-ctl --device /dev/videoX --set-ctrl=video_gop_size=1", "", "These issues are not decoding errors; all produced H.264 streams are decoded", "properly. Streams without P-frames don't have these artifacts so it's not", "analog-to-digital conversion issues nor internal memory errors; we conclude", "it's internal H.264 encoder issues.", "We cannot even check the original driver's behaviour because it has never", "worked properly at all in our development environment. So these issues may", "be actually related to firmware or hardware. However it may be that there's", "just some more register settings missing in the driver which would please", "the hardware.", "Manufacturer didn't help much on our inquiries, but feel free to disturb", "again the support of Intersil (owner of former Techwell).\n" }; and use for (i = 0; i < ARRAY_SIZE(artifacts_warning), i++) dev_warn(_dev->dev, %s\n", artifacts_warning[i]); so that each line is prefixed. It also might be better to issue something like a single line dev_warn referring to the driver code and just leave this comment in the driver sources. Something like: dev_warn(_dev->dev, "This driver has known defects in video quality\n");
Re: [PATCH 2/2] soc: samsung: Add support for Exynos7 PMU
On 07/11/2016 04:44 PM, Abhilash Kesavan wrote: >>> + /* >>> >> +* Set clock freeze cycle count to 0 before and after arm clamp >>> >> or >>> >> +* reset signal transition >>> >> +*/ >>> >> + node = of_find_compatible_node(NULL, NULL, >>> >> + "samsung,exynos7-clock-atlas"); >>> >> + if (node) { >>> >> + atlas_cmu_base = of_iomap(node, 0); >>> >> + if (!atlas_cmu_base) >>> >> + return; >>> >> + >>> >> + __raw_writel(0x0, >>> >> + atlas_cmu_base + >>> >> EXYNOS7_CORE_ARMCLK_STOPCTRL); >>> >> + iounmap(atlas_cmu_base); >> > >> > Missing: >> > of_node_put(node); >> > >> > ...but I think this creates unnecessary dependency on different >> > compatible. I understand that disabling the EXTENDED_CLKSTOP is needed >> > after configuring the PMU so this code belongs here. However >> > everything you need is just a mapping of CMU address. The PMU driver >> > should receive in bindings everything it needs to do its work. Either >> > it is a phandle to something or an address for iomap. In this case the >> > PMU should probably get two addresses: PMU and optionally CMU (part of >> > CMU for example). Of course bindings would have to be updated. > > I will add an optional CMU phandle to the PMU bindings. We could additionally split the CMU_ATLAS region into 2 regions in DT (derived from exynos7420 documentation): reg = <0x1180 0xF08>, // offsets 0x...0x0F04 <0x11801000 0x8C>, // offsets 0x1000...0x1088 so that the first can be mapped by the clk driver and the second by the PMU driver? It seems the first region is strictly clock functionality related, while the second contains power control related and other registers. However I'm not sure it is a good idea, for consistency this would need to be done also for CMU_APOLLO, CMU_MIF{0...3}. All these CMUs don't have DT bindings defined yet though and there is no corresponding dts entries. -- Thanks, Sylwester
Re: [PATCH 1/2] mfd: cros_ec: Add MKBP event support
On Mon, 11 Jul 2016, Lee Jones wrote: > On Tue, 05 Jul 2016, Olof Johansson wrote: > > > On Tue, Jul 5, 2016 at 2:00 PM, Dmitry Torokhov > >wrote: > > > On July 5, 2016 1:55:44 PM PDT, Olof Johansson wrote: > > >>On Tue, Jul 5, 2016 at 1:54 PM, Olof Johansson wrote: > > >>> On Fri, Jul 1, 2016 at 2:07 AM, Enric Balletbo i Serra > > >>> wrote: > > From: Vic Yang > > > > Newer revisions of the ChromeOS EC add more events besides the > > >>keyboard > > ones. So handle interrupts in the MFD driver and let consumers > > >>register > > for notifications for the events they might care. > > > > To keep backward compatibility, if the EC doesn't support MKBP > > >>event, we > > fall back to the old MKBP key matrix host command. > > > > Signed-off-by: Vic Yang > > Signed-off-by: Tomeu Vizoso > > Tested-by: Enric Balletbo i Serra > > Cc: Randall Spangler > > Cc: Vincent Palatin > > Cc: Benson Leung > > >>> > > >>> Probably easiest to merge this through the MFD tree due to the > > >>> overlaps, so for that purpose: > > >>> > > >>> Acked-by: Olof Johansson > > >> > > >>Argh, I just noticed that the second patch is an input patch, not > > >>another MFD patch. Either way, I'm OK with this going through the > > >>input tree if that's easiest. If so, you should probably wait for an > > >>ack from Lee as well. > > > > > > Hmm, I thought I already acked input portion to go through MFD tree... or > > > am I confusing this with some other patch? > > > > Oh, then we're all set. That patch didn't thread with this one in my > > mailbox so I didn't see the comment thread on it. > > Yes, same for me. > > > Lee, all yours. > > Err, nice, ta! =;-) > > Eric, > > Please resubmit this set 'threaded' so I might take proper care of > it. Wait! Ignore that. I got mixed up with what Olof said and the way things looked in my inbox. The two patches are in fact threaded. I'll look to see to them later in the week. Please bear with me. -- Lee Jones Linaro STMicroelectronics Landing Team Lead Linaro.org │ Open source software for ARM SoCs Follow Linaro: Facebook | Twitter | Blog
Re: [PATCH 2/2] soc: samsung: Add support for Exynos7 PMU
On 07/11/2016 04:44 PM, Abhilash Kesavan wrote: >>> + /* >>> >> +* Set clock freeze cycle count to 0 before and after arm clamp >>> >> or >>> >> +* reset signal transition >>> >> +*/ >>> >> + node = of_find_compatible_node(NULL, NULL, >>> >> + "samsung,exynos7-clock-atlas"); >>> >> + if (node) { >>> >> + atlas_cmu_base = of_iomap(node, 0); >>> >> + if (!atlas_cmu_base) >>> >> + return; >>> >> + >>> >> + __raw_writel(0x0, >>> >> + atlas_cmu_base + >>> >> EXYNOS7_CORE_ARMCLK_STOPCTRL); >>> >> + iounmap(atlas_cmu_base); >> > >> > Missing: >> > of_node_put(node); >> > >> > ...but I think this creates unnecessary dependency on different >> > compatible. I understand that disabling the EXTENDED_CLKSTOP is needed >> > after configuring the PMU so this code belongs here. However >> > everything you need is just a mapping of CMU address. The PMU driver >> > should receive in bindings everything it needs to do its work. Either >> > it is a phandle to something or an address for iomap. In this case the >> > PMU should probably get two addresses: PMU and optionally CMU (part of >> > CMU for example). Of course bindings would have to be updated. > > I will add an optional CMU phandle to the PMU bindings. We could additionally split the CMU_ATLAS region into 2 regions in DT (derived from exynos7420 documentation): reg = <0x1180 0xF08>, // offsets 0x...0x0F04 <0x11801000 0x8C>, // offsets 0x1000...0x1088 so that the first can be mapped by the clk driver and the second by the PMU driver? It seems the first region is strictly clock functionality related, while the second contains power control related and other registers. However I'm not sure it is a good idea, for consistency this would need to be done also for CMU_APOLLO, CMU_MIF{0...3}. All these CMUs don't have DT bindings defined yet though and there is no corresponding dts entries. -- Thanks, Sylwester
Re: [PATCH 1/2] mfd: cros_ec: Add MKBP event support
On Mon, 11 Jul 2016, Lee Jones wrote: > On Tue, 05 Jul 2016, Olof Johansson wrote: > > > On Tue, Jul 5, 2016 at 2:00 PM, Dmitry Torokhov > > wrote: > > > On July 5, 2016 1:55:44 PM PDT, Olof Johansson wrote: > > >>On Tue, Jul 5, 2016 at 1:54 PM, Olof Johansson wrote: > > >>> On Fri, Jul 1, 2016 at 2:07 AM, Enric Balletbo i Serra > > >>> wrote: > > From: Vic Yang > > > > Newer revisions of the ChromeOS EC add more events besides the > > >>keyboard > > ones. So handle interrupts in the MFD driver and let consumers > > >>register > > for notifications for the events they might care. > > > > To keep backward compatibility, if the EC doesn't support MKBP > > >>event, we > > fall back to the old MKBP key matrix host command. > > > > Signed-off-by: Vic Yang > > Signed-off-by: Tomeu Vizoso > > Tested-by: Enric Balletbo i Serra > > Cc: Randall Spangler > > Cc: Vincent Palatin > > Cc: Benson Leung > > >>> > > >>> Probably easiest to merge this through the MFD tree due to the > > >>> overlaps, so for that purpose: > > >>> > > >>> Acked-by: Olof Johansson > > >> > > >>Argh, I just noticed that the second patch is an input patch, not > > >>another MFD patch. Either way, I'm OK with this going through the > > >>input tree if that's easiest. If so, you should probably wait for an > > >>ack from Lee as well. > > > > > > Hmm, I thought I already acked input portion to go through MFD tree... or > > > am I confusing this with some other patch? > > > > Oh, then we're all set. That patch didn't thread with this one in my > > mailbox so I didn't see the comment thread on it. > > Yes, same for me. > > > Lee, all yours. > > Err, nice, ta! =;-) > > Eric, > > Please resubmit this set 'threaded' so I might take proper care of > it. Wait! Ignore that. I got mixed up with what Olof said and the way things looked in my inbox. The two patches are in fact threaded. I'll look to see to them later in the week. Please bear with me. -- Lee Jones Linaro STMicroelectronics Landing Team Lead Linaro.org │ Open source software for ARM SoCs Follow Linaro: Facebook | Twitter | Blog
Re: [PATCH 1/2] mfd: cros_ec: Add MKBP event support
On Tue, 05 Jul 2016, Olof Johansson wrote: > On Tue, Jul 5, 2016 at 2:00 PM, Dmitry Torokhov >wrote: > > On July 5, 2016 1:55:44 PM PDT, Olof Johansson wrote: > >>On Tue, Jul 5, 2016 at 1:54 PM, Olof Johansson wrote: > >>> On Fri, Jul 1, 2016 at 2:07 AM, Enric Balletbo i Serra > >>> wrote: > From: Vic Yang > > Newer revisions of the ChromeOS EC add more events besides the > >>keyboard > ones. So handle interrupts in the MFD driver and let consumers > >>register > for notifications for the events they might care. > > To keep backward compatibility, if the EC doesn't support MKBP > >>event, we > fall back to the old MKBP key matrix host command. > > Signed-off-by: Vic Yang > Signed-off-by: Tomeu Vizoso > Tested-by: Enric Balletbo i Serra > Cc: Randall Spangler > Cc: Vincent Palatin > Cc: Benson Leung > >>> > >>> Probably easiest to merge this through the MFD tree due to the > >>> overlaps, so for that purpose: > >>> > >>> Acked-by: Olof Johansson > >> > >>Argh, I just noticed that the second patch is an input patch, not > >>another MFD patch. Either way, I'm OK with this going through the > >>input tree if that's easiest. If so, you should probably wait for an > >>ack from Lee as well. > > > > Hmm, I thought I already acked input portion to go through MFD tree... or > > am I confusing this with some other patch? > > Oh, then we're all set. That patch didn't thread with this one in my > mailbox so I didn't see the comment thread on it. Yes, same for me. > Lee, all yours. Err, nice, ta! =;-) Eric, Please resubmit this set 'threaded' so I might take proper care of it. -- Lee Jones Linaro STMicroelectronics Landing Team Lead Linaro.org │ Open source software for ARM SoCs Follow Linaro: Facebook | Twitter | Blog
Re: [PATCH 1/2] mfd: cros_ec: Add MKBP event support
On Tue, 05 Jul 2016, Olof Johansson wrote: > On Tue, Jul 5, 2016 at 2:00 PM, Dmitry Torokhov > wrote: > > On July 5, 2016 1:55:44 PM PDT, Olof Johansson wrote: > >>On Tue, Jul 5, 2016 at 1:54 PM, Olof Johansson wrote: > >>> On Fri, Jul 1, 2016 at 2:07 AM, Enric Balletbo i Serra > >>> wrote: > From: Vic Yang > > Newer revisions of the ChromeOS EC add more events besides the > >>keyboard > ones. So handle interrupts in the MFD driver and let consumers > >>register > for notifications for the events they might care. > > To keep backward compatibility, if the EC doesn't support MKBP > >>event, we > fall back to the old MKBP key matrix host command. > > Signed-off-by: Vic Yang > Signed-off-by: Tomeu Vizoso > Tested-by: Enric Balletbo i Serra > Cc: Randall Spangler > Cc: Vincent Palatin > Cc: Benson Leung > >>> > >>> Probably easiest to merge this through the MFD tree due to the > >>> overlaps, so for that purpose: > >>> > >>> Acked-by: Olof Johansson > >> > >>Argh, I just noticed that the second patch is an input patch, not > >>another MFD patch. Either way, I'm OK with this going through the > >>input tree if that's easiest. If so, you should probably wait for an > >>ack from Lee as well. > > > > Hmm, I thought I already acked input portion to go through MFD tree... or > > am I confusing this with some other patch? > > Oh, then we're all set. That patch didn't thread with this one in my > mailbox so I didn't see the comment thread on it. Yes, same for me. > Lee, all yours. Err, nice, ta! =;-) Eric, Please resubmit this set 'threaded' so I might take proper care of it. -- Lee Jones Linaro STMicroelectronics Landing Team Lead Linaro.org │ Open source software for ARM SoCs Follow Linaro: Facebook | Twitter | Blog
Re: [kernel-hardening] [PATCH v4 26/29] sched: Allow putting thread_info into task_struct
On Mon, Jul 11, 2016 at 09:06:58AM -0700, Linus Torvalds wrote: > On Jul 11, 2016 7:55 AM, "Andy Lutomirski" <[1]l...@amacapital.net> wrote: > > > > How do you intend to find 'current' to get to the preempt count > > without first disabling preemption? > > Actually, that is the classic case of "not a problem". > > The thing is, it doesn't matter if you schedule away while looking up > current or the preempt count - because both values are idempotent wet > scheduling. > > So until you do the wire that actually disables preemption you can > schedule away as much as you want, and after that write you no longer > will. I was assuming a percpu pointer to current (or preempt count). The percpu offset might be stale at the point you try to dereference that, even though current itself hasn't changed, and you may access the wrong CPU's value. > This is different wrt a per-cpu area - which is clearly not idempotent wrt > scheduling. > > The reason per-cpu works on x86 is that we have an atomic rmw operation > that is *also* atomic wrt the CPU lookup (thanks to the segment base) Sure, understood. Mark.
Re: [kernel-hardening] [PATCH v4 26/29] sched: Allow putting thread_info into task_struct
On Mon, Jul 11, 2016 at 09:06:58AM -0700, Linus Torvalds wrote: > On Jul 11, 2016 7:55 AM, "Andy Lutomirski" <[1]l...@amacapital.net> wrote: > > > > How do you intend to find 'current' to get to the preempt count > > without first disabling preemption? > > Actually, that is the classic case of "not a problem". > > The thing is, it doesn't matter if you schedule away while looking up > current or the preempt count - because both values are idempotent wet > scheduling. > > So until you do the wire that actually disables preemption you can > schedule away as much as you want, and after that write you no longer > will. I was assuming a percpu pointer to current (or preempt count). The percpu offset might be stale at the point you try to dereference that, even though current itself hasn't changed, and you may access the wrong CPU's value. > This is different wrt a per-cpu area - which is clearly not idempotent wrt > scheduling. > > The reason per-cpu works on x86 is that we have an atomic rmw operation > that is *also* atomic wrt the CPU lookup (thanks to the segment base) Sure, understood. Mark.
Re: [PATCH v2 06/13] sched: Store maximum per-cpu capacity in root domain
On 11/07/16 11:18, Peter Zijlstra wrote: > On Wed, Jun 22, 2016 at 06:03:17PM +0100, Morten Rasmussen wrote: >> @@ -6905,11 +6906,19 @@ static int build_sched_domains(const struct cpumask >> *cpu_map, >> /* Attach the domains */ >> rcu_read_lock(); >> for_each_cpu(i, cpu_map) { >> +rq = cpu_rq(i); >> sd = *per_cpu_ptr(d.sd, i); >> cpu_attach_domain(sd, d.rd, i); >> + >> +if (rq->cpu_capacity_orig > rq->rd->max_cpu_capacity) >> +rq->rd->max_cpu_capacity = rq->cpu_capacity_orig; >> } > > Should you not set that _before_ cpu_attach_domain(), such that the > state is up-to-date when its published? yes, much better. > Also, since its lockless, should we not use {READ,WRITE}_ONCE() with it? You mean for rq->rd->max_cpu_capacity ? IMHO, there is a data dependency between the read and the write and the code only runs on one cpu. I assume here that this is related to item 2 'Overlapping loads and stores within a particular CPU ...' in GUARANTEES of doc/Documentation/memory-barriers.txt. Do I miss something? >> rcu_read_unlock(); >> >> +if (rq) >> +pr_info("span: %*pbl (max cpu_capacity = %lu)\n", >> +cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity); >> + > > While a single statement, it is multi line, please add brackets. OK. > >> ret = 0; >> error:
Re: [PATCH v2 06/13] sched: Store maximum per-cpu capacity in root domain
On 11/07/16 11:18, Peter Zijlstra wrote: > On Wed, Jun 22, 2016 at 06:03:17PM +0100, Morten Rasmussen wrote: >> @@ -6905,11 +6906,19 @@ static int build_sched_domains(const struct cpumask >> *cpu_map, >> /* Attach the domains */ >> rcu_read_lock(); >> for_each_cpu(i, cpu_map) { >> +rq = cpu_rq(i); >> sd = *per_cpu_ptr(d.sd, i); >> cpu_attach_domain(sd, d.rd, i); >> + >> +if (rq->cpu_capacity_orig > rq->rd->max_cpu_capacity) >> +rq->rd->max_cpu_capacity = rq->cpu_capacity_orig; >> } > > Should you not set that _before_ cpu_attach_domain(), such that the > state is up-to-date when its published? yes, much better. > Also, since its lockless, should we not use {READ,WRITE}_ONCE() with it? You mean for rq->rd->max_cpu_capacity ? IMHO, there is a data dependency between the read and the write and the code only runs on one cpu. I assume here that this is related to item 2 'Overlapping loads and stores within a particular CPU ...' in GUARANTEES of doc/Documentation/memory-barriers.txt. Do I miss something? >> rcu_read_unlock(); >> >> +if (rq) >> +pr_info("span: %*pbl (max cpu_capacity = %lu)\n", >> +cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity); >> + > > While a single statement, it is multi line, please add brackets. OK. > >> ret = 0; >> error:
[tip:x86/fpu] x86/fpu/xstate: Re-enable XSAVES
Commit-ID: b8be15d588060a03569ac85dc4a0247460988f5b Gitweb: http://git.kernel.org/tip/b8be15d588060a03569ac85dc4a0247460988f5b Author: Yu-cheng YuAuthorDate: Mon, 11 Jul 2016 09:18:57 -0700 Committer: Ingo Molnar CommitDate: Mon, 11 Jul 2016 16:44:01 +0200 x86/fpu/xstate: Re-enable XSAVES We did not handle XSAVES instructions correctly. There were issues in converting between standard and compacted format when interfacing with user-space. These issues have been corrected. Add a WARN_ONCE() to make it clear that XSAVES supervisor states are not yet implemented. Signed-off-by: Yu-cheng Yu Signed-off-by: Fenghua Yu Reviewed-by: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V Shankar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468253937-40008-5-git-send-email-fenghua...@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/init.c | 15 --- arch/x86/kernel/fpu/xstate.c | 9 + 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 60f3839..93982ae 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -230,21 +230,6 @@ static void __init fpu__init_system_xstate_size_legacy(void) } fpu_user_xstate_size = fpu_kernel_xstate_size; - - /* -* Quirk: we don't yet handle the XSAVES* instructions -* correctly, as we don't correctly convert between -* standard and compacted format when interfacing -* with user-space - so disable it for now. -* -* The difference is small: with recent CPUs the -* compacted format is only marginally smaller than -* the standard FPU state format. -* -* ( This is easy to backport while we are fixing -* XSAVES* support. ) -*/ - setup_clear_cpu_cap(X86_FEATURE_XSAVES); } /* diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 4fb8dd7..3169bca 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -221,6 +221,15 @@ void fpu__init_cpu_xstate(void) { if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask) return; + /* +* Make it clear that XSAVES supervisor states are not yet +* implemented should anyone expect it to work by changing +* bits in XFEATURE_MASK_* macros and XCR0. +*/ + WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR), + "x86/fpu: XSAVES supervisor states are not yet implemented.\n"); + + xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR; cr4_set_bits(X86_CR4_OSXSAVE); xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
[tip:x86/fpu] x86/fpu/xstate: Re-enable XSAVES
Commit-ID: b8be15d588060a03569ac85dc4a0247460988f5b Gitweb: http://git.kernel.org/tip/b8be15d588060a03569ac85dc4a0247460988f5b Author: Yu-cheng Yu AuthorDate: Mon, 11 Jul 2016 09:18:57 -0700 Committer: Ingo Molnar CommitDate: Mon, 11 Jul 2016 16:44:01 +0200 x86/fpu/xstate: Re-enable XSAVES We did not handle XSAVES instructions correctly. There were issues in converting between standard and compacted format when interfacing with user-space. These issues have been corrected. Add a WARN_ONCE() to make it clear that XSAVES supervisor states are not yet implemented. Signed-off-by: Yu-cheng Yu Signed-off-by: Fenghua Yu Reviewed-by: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V Shankar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468253937-40008-5-git-send-email-fenghua...@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/init.c | 15 --- arch/x86/kernel/fpu/xstate.c | 9 + 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 60f3839..93982ae 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -230,21 +230,6 @@ static void __init fpu__init_system_xstate_size_legacy(void) } fpu_user_xstate_size = fpu_kernel_xstate_size; - - /* -* Quirk: we don't yet handle the XSAVES* instructions -* correctly, as we don't correctly convert between -* standard and compacted format when interfacing -* with user-space - so disable it for now. -* -* The difference is small: with recent CPUs the -* compacted format is only marginally smaller than -* the standard FPU state format. -* -* ( This is easy to backport while we are fixing -* XSAVES* support. ) -*/ - setup_clear_cpu_cap(X86_FEATURE_XSAVES); } /* diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 4fb8dd7..3169bca 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -221,6 +221,15 @@ void fpu__init_cpu_xstate(void) { if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask) return; + /* +* Make it clear that XSAVES supervisor states are not yet +* implemented should anyone expect it to work by changing +* bits in XFEATURE_MASK_* macros and XCR0. +*/ + WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR), + "x86/fpu: XSAVES supervisor states are not yet implemented.\n"); + + xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR; cr4_set_bits(X86_CR4_OSXSAVE); xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
[tip:x86/fpu] x86/fpu/xstate: Return NULL for disabled xstate component address
Commit-ID: 5060b91513b866f774da15dfd82157864c4b1683 Gitweb: http://git.kernel.org/tip/5060b91513b866f774da15dfd82157864c4b1683 Author: Yu-cheng YuAuthorDate: Mon, 11 Jul 2016 09:18:55 -0700 Committer: Ingo Molnar CommitDate: Mon, 11 Jul 2016 16:44:00 +0200 x86/fpu/xstate: Return NULL for disabled xstate component address It is an error to request a disabled XSAVE/XSAVES component address. For that case, make __raw_xsave_addr() return a NULL and issue a warning. Signed-off-by: Yu-cheng Yu Signed-off-by: Fenghua Yu Reviewed-by: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V Shankar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468253937-40008-3-git-send-email-fenghua...@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 5 + 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index f8d1aff..4fb8dd7 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -760,6 +760,11 @@ void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) { int feature_nr = fls64(xstate_feature_mask) - 1; + if (!xfeature_enabled(feature_nr)) { + WARN_ON_FPU(1); + return NULL; + } + return (void *)xsave + xstate_comp_offsets[feature_nr]; } /*
[tip:x86/fpu] x86/fpu/xstate: Fix fpstate_init() for XRSTORS
Commit-ID: 35ac2d7ba787eb4b7418a5a6f5919c25e10a780a Gitweb: http://git.kernel.org/tip/35ac2d7ba787eb4b7418a5a6f5919c25e10a780a Author: Yu-cheng YuAuthorDate: Mon, 11 Jul 2016 09:18:56 -0700 Committer: Ingo Molnar CommitDate: Mon, 11 Jul 2016 16:44:00 +0200 x86/fpu/xstate: Fix fpstate_init() for XRSTORS In XSAVES mode if fpstate_init() is used to initialize a task's extended state area, xsave.header.xcomp_bv[63] must be set. Otherwise, when the task is scheduled, a warning is triggered from copy_kernel_to_xregs(). One such test case is: setting an invalid extended state through PTRACE. When xstateregs_set() rejects the syscall and re-initializes the task's extended state area. This triggers the warning mentioned above. Signed-off-by: Yu-cheng Yu Signed-off-by: Fenghua Yu Reviewed-by: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V Shankar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468253937-40008-4-git-send-email-fenghua...@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/types.h | 6 ++ arch/x86/kernel/fpu/core.c | 8 2 files changed, 14 insertions(+) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 12dd648..48df486 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -232,6 +232,12 @@ struct xstate_header { } __attribute__((packed)); /* + * xstate_header.xcomp_bv[63] indicates that the extended_state_area + * is in compacted format. + */ +#define XCOMP_BV_COMPACTED_FORMAT ((u64)1 << 63) + +/* * This is our most modern FPU state format, as saved by the XSAVE * and restored by the XRSTOR instructions. * diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index c759bd0..3fc03a0 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -229,6 +230,13 @@ void fpstate_init(union fpregs_state *state) memset(state, 0, fpu_kernel_xstate_size); + /* +* XRSTORS requires that this bit is set in xcomp_bv, or +* it will #GP. Make sure it is replaced after the memset(). +*/ + if (static_cpu_has(X86_FEATURE_XSAVES)) + state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT; + if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(>fxsave); else
[tip:x86/fpu] x86/fpu/xstate: Return NULL for disabled xstate component address
Commit-ID: 5060b91513b866f774da15dfd82157864c4b1683 Gitweb: http://git.kernel.org/tip/5060b91513b866f774da15dfd82157864c4b1683 Author: Yu-cheng Yu AuthorDate: Mon, 11 Jul 2016 09:18:55 -0700 Committer: Ingo Molnar CommitDate: Mon, 11 Jul 2016 16:44:00 +0200 x86/fpu/xstate: Return NULL for disabled xstate component address It is an error to request a disabled XSAVE/XSAVES component address. For that case, make __raw_xsave_addr() return a NULL and issue a warning. Signed-off-by: Yu-cheng Yu Signed-off-by: Fenghua Yu Reviewed-by: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V Shankar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468253937-40008-3-git-send-email-fenghua...@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 5 + 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index f8d1aff..4fb8dd7 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -760,6 +760,11 @@ void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) { int feature_nr = fls64(xstate_feature_mask) - 1; + if (!xfeature_enabled(feature_nr)) { + WARN_ON_FPU(1); + return NULL; + } + return (void *)xsave + xstate_comp_offsets[feature_nr]; } /*
[tip:x86/fpu] x86/fpu/xstate: Fix fpstate_init() for XRSTORS
Commit-ID: 35ac2d7ba787eb4b7418a5a6f5919c25e10a780a Gitweb: http://git.kernel.org/tip/35ac2d7ba787eb4b7418a5a6f5919c25e10a780a Author: Yu-cheng Yu AuthorDate: Mon, 11 Jul 2016 09:18:56 -0700 Committer: Ingo Molnar CommitDate: Mon, 11 Jul 2016 16:44:00 +0200 x86/fpu/xstate: Fix fpstate_init() for XRSTORS In XSAVES mode if fpstate_init() is used to initialize a task's extended state area, xsave.header.xcomp_bv[63] must be set. Otherwise, when the task is scheduled, a warning is triggered from copy_kernel_to_xregs(). One such test case is: setting an invalid extended state through PTRACE. When xstateregs_set() rejects the syscall and re-initializes the task's extended state area. This triggers the warning mentioned above. Signed-off-by: Yu-cheng Yu Signed-off-by: Fenghua Yu Reviewed-by: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V Shankar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468253937-40008-4-git-send-email-fenghua...@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/types.h | 6 ++ arch/x86/kernel/fpu/core.c | 8 2 files changed, 14 insertions(+) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 12dd648..48df486 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -232,6 +232,12 @@ struct xstate_header { } __attribute__((packed)); /* + * xstate_header.xcomp_bv[63] indicates that the extended_state_area + * is in compacted format. + */ +#define XCOMP_BV_COMPACTED_FORMAT ((u64)1 << 63) + +/* * This is our most modern FPU state format, as saved by the XSAVE * and restored by the XRSTOR instructions. * diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index c759bd0..3fc03a0 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -229,6 +230,13 @@ void fpstate_init(union fpregs_state *state) memset(state, 0, fpu_kernel_xstate_size); + /* +* XRSTORS requires that this bit is set in xcomp_bv, or +* it will #GP. Make sure it is replaced after the memset(). +*/ + if (static_cpu_has(X86_FEATURE_XSAVES)) + state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT; + if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(>fxsave); else
Re: [PATCH v2 1/6] dt-bindings: clock: add DT binding for the Xtal clock on Armada 3700
Hi Thomas, On ven., juil. 08 2016, Thomas Petazzoniwrote: > Hello, > > On Fri, 8 Jul 2016 00:37:46 +0200, Gregory CLEMENT wrote: > >> +gpio1: gpio@13800 { >> +compatible = "marvell,mvebu-gpio-3700", "syscon", "simple-mfd"; > > I find this compatible string not very consistent with what we do for > other drivers, it should have been: > > marvell,armada-3700-gpio Thanks for pointing this. We missed it during the last review. I agree that using marvell,armada-3700-gpio is more appropriate, especially because the gpio controller on Armada 37xx seems to be different that the ones used on the other mvebu SoCs. Gregory > > or something like that. > > >> +xtalclk: xtal-clk { >> +compatible = "marvell,armada-3700-xtal-clock"; > > See here for example. > > Thomas > -- > Thomas Petazzoni, CTO, Free Electrons > Embedded Linux, Kernel and Android engineering > http://free-electrons.com -- Gregory Clement, Free Electrons Kernel, drivers, real-time and embedded Linux development, consulting, training and support. http://free-electrons.com
Re: [PATCH v2 1/6] dt-bindings: clock: add DT binding for the Xtal clock on Armada 3700
Hi Thomas, On ven., juil. 08 2016, Thomas Petazzoni wrote: > Hello, > > On Fri, 8 Jul 2016 00:37:46 +0200, Gregory CLEMENT wrote: > >> +gpio1: gpio@13800 { >> +compatible = "marvell,mvebu-gpio-3700", "syscon", "simple-mfd"; > > I find this compatible string not very consistent with what we do for > other drivers, it should have been: > > marvell,armada-3700-gpio Thanks for pointing this. We missed it during the last review. I agree that using marvell,armada-3700-gpio is more appropriate, especially because the gpio controller on Armada 37xx seems to be different that the ones used on the other mvebu SoCs. Gregory > > or something like that. > > >> +xtalclk: xtal-clk { >> +compatible = "marvell,armada-3700-xtal-clock"; > > See here for example. > > Thomas > -- > Thomas Petazzoni, CTO, Free Electrons > Embedded Linux, Kernel and Android engineering > http://free-electrons.com -- Gregory Clement, Free Electrons Kernel, drivers, real-time and embedded Linux development, consulting, training and support. http://free-electrons.com
[tip:x86/fpu] x86/fpu/xstate: Fix __fpu_restore_sig() for XSAVES
Commit-ID: 1fc2b67b43d5001b92b3a002b94ad0137e99 Gitweb: http://git.kernel.org/tip/1fc2b67b43d5001b92b3a002b94ad0137e99 Author: Yu-cheng YuAuthorDate: Mon, 11 Jul 2016 09:18:54 -0700 Committer: Ingo Molnar CommitDate: Mon, 11 Jul 2016 16:43:59 +0200 x86/fpu/xstate: Fix __fpu_restore_sig() for XSAVES When the kernel is using XSAVES compacted format, we cannot do __copy_from_user() from a signal frame, which has standard-format data. Fix it by using copyin_to_xsaves(), which converts between formats and filters out all supervisor states that we do not allow userspace to write. Signed-off-by: Yu-cheng Yu Signed-off-by: Fenghua Yu Reviewed-by: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V Shankar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468253937-40008-2-git-send-email-fenghua...@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/signal.c | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 8aa96cb..9e231d8 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -323,8 +323,15 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) */ fpu__drop(fpu); - if (__copy_from_user(>state.xsave, buf_fx, state_size) || - __copy_from_user(, buf, sizeof(env))) { + if (using_compacted_format()) { + err = copyin_to_xsaves(NULL, buf_fx, + >state.xsave); + } else { + err = __copy_from_user(>state.xsave, + buf_fx, state_size); + } + + if (err || __copy_from_user(, buf, sizeof(env))) { fpstate_init(>state); trace_x86_fpu_init_state(fpu); err = -1;
[tip:x86/fpu] x86/fpu/xstate: Fix __fpu_restore_sig() for XSAVES
Commit-ID: 1fc2b67b43d5001b92b3a002b94ad0137e99 Gitweb: http://git.kernel.org/tip/1fc2b67b43d5001b92b3a002b94ad0137e99 Author: Yu-cheng Yu AuthorDate: Mon, 11 Jul 2016 09:18:54 -0700 Committer: Ingo Molnar CommitDate: Mon, 11 Jul 2016 16:43:59 +0200 x86/fpu/xstate: Fix __fpu_restore_sig() for XSAVES When the kernel is using XSAVES compacted format, we cannot do __copy_from_user() from a signal frame, which has standard-format data. Fix it by using copyin_to_xsaves(), which converts between formats and filters out all supervisor states that we do not allow userspace to write. Signed-off-by: Yu-cheng Yu Signed-off-by: Fenghua Yu Reviewed-by: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V Shankar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468253937-40008-2-git-send-email-fenghua...@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/signal.c | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 8aa96cb..9e231d8 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -323,8 +323,15 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) */ fpu__drop(fpu); - if (__copy_from_user(>state.xsave, buf_fx, state_size) || - __copy_from_user(, buf, sizeof(env))) { + if (using_compacted_format()) { + err = copyin_to_xsaves(NULL, buf_fx, + >state.xsave); + } else { + err = __copy_from_user(>state.xsave, + buf_fx, state_size); + } + + if (err || __copy_from_user(, buf, sizeof(env))) { fpstate_init(>state); trace_x86_fpu_init_state(fpu); err = -1;