date:20160711

[RFC PATCH v2 7/7] lib/dlock-list: Use the per-subnode APIs for managing lists

2016-07-11 Thread Waiman Long

This patch modifies the dlock-list to use the per-subnode APIs to
manage the distributed lists. As a result, the number of lists that
need to be iterated in dlock_list_iterate() will be reduced at least
by half making the iteration a bit faster.

Signed-off-by: Waiman Long 
---
 include/linux/dlock-list.h |   81 +--
 lib/dlock-list.c   |   19 +-
 2 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h
index a8e1fd2..01667fc 100644
--- a/include/linux/dlock-list.h
+++ b/include/linux/dlock-list.h
@@ -20,12 +20,12 @@
 
 #include 
 #include 
-#include 
+#include 
 
 /*
  * include/linux/dlock-list.h
  *
- * A distributed (per-cpu) set of lists each of which is protected by its
+ * A distributed (per-subnode) set of lists each of which is protected by its
  * own spinlock, but acts like a single consolidated list to the callers.
  *
  * The dlock_list_head structure contains the spinlock, the other
@@ -45,19 +45,19 @@ struct dlock_list_head {
}
 
 /*
- * Per-cpu list iteration state
+ * Per-subnode list iteration state
  */
 struct dlock_list_state {
-   int  cpu;
+   int  snid;  /* Subnode ID */
spinlock_t  *lock;
-   struct list_head*head;  /* List head of current per-cpu list */
+   struct list_head*head;  /* List head of current per-subnode 
list */
struct dlock_list_node  *curr;
struct dlock_list_node  *next;
 };
 
 #define DLOCK_LIST_STATE_INIT()\
{   \
-   .cpu  = -1, \
+   .snid  = -1,\
.lock = NULL,   \
.head = NULL,   \
.curr = NULL,   \
@@ -69,7 +69,7 @@ struct dlock_list_state {
 
 static inline void init_dlock_list_state(struct dlock_list_state *state)
 {
-   state->cpu  = -1;
+   state->snid  = -1;
state->lock = NULL;
state->head = NULL;
state->curr = NULL;
@@ -83,12 +83,12 @@ static inline void init_dlock_list_state(struct 
dlock_list_state *state)
 #endif
 
 /*
- * Next per-cpu list entry
+ * Next per-subnode list entry
  */
 #define dlock_list_next_entry(pos, member) list_next_entry(pos, member.list)
 
 /*
- * Per-cpu node data structure
+ * Per-subnode node data structure
  */
 struct dlock_list_node {
struct list_head list;
@@ -109,50 +109,50 @@ static inline void init_dlock_list_node(struct 
dlock_list_node *node)
 }
 
 static inline void
-free_dlock_list_head(struct dlock_list_head __percpu **pdlock_head)
+free_dlock_list_head(struct dlock_list_head __persubnode **pdlock_head)
 {
-   free_percpu(*pdlock_head);
+   free_persubnode(*pdlock_head);
*pdlock_head = NULL;
 }
 
 /*
- * Check if all the per-cpu lists are empty
+ * Check if all the per-subnode lists are empty
  */
-static inline bool dlock_list_empty(struct dlock_list_head __percpu 
*dlock_head)
+static inline bool dlock_list_empty(struct dlock_list_head __persubnode 
*dlock_head)
 {
-   int cpu;
+   int snid;
 
-   for_each_possible_cpu(cpu)
-   if (!list_empty(_cpu_ptr(dlock_head, cpu)->list))
+   for_each_subnode(snid)
+   if (!list_empty(_subnode_ptr(dlock_head, snid)->list))
return false;
return true;
 }
 
 /*
- * Helper function to find the first entry of the next per-cpu list
- * It works somewhat like for_each_possible_cpu(cpu).
+ * Helper function to find the first entry of the next per-subnode list
+ * It works somewhat like for_each_subnode(snid).
  *
  * Return: true if the entry is found, false if all the lists exhausted
  */
 static __always_inline bool
-__dlock_list_next_cpu(struct dlock_list_head __percpu *head,
+__dlock_list_next_subnode(struct dlock_list_head __persubnode *head,
  struct dlock_list_state *state)
 {
if (state->lock)
spin_unlock(state->lock);
-next_cpu:
+next_subnode:
/*
-* for_each_possible_cpu(cpu)
+* for_each_subnode(snid)
 */
-   state->cpu = cpumask_next(state->cpu, cpu_possible_mask);
-   if (state->cpu >= nr_cpu_ids)
-   return false;   /* All the per-cpu lists iterated */
+   state->snid = cpumask_next(state->snid, subnode_mask);
+   if (state->snid >= nr_subnode_ids)
+   return false;   /* All the per-subnode lists iterated */
 
-   state->head = _cpu_ptr(head, state->cpu)->list;
+   state->head = _subnode_ptr(head, state->snid)->list;
if (list_empty(state->head))
-   goto next_cpu;
+   goto next_subnode;
 
-   state->lock = _cpu_ptr(head, state->cpu)->lock;
+   state->lock = _subnode_ptr(head, state->snid)->lock;
spin_lock(state->lock);

[PATCH v2 3/7] fsnotify: Simplify inode iteration on umount

2016-07-11 Thread Waiman Long

From: Jan Kara 

fsnotify_unmount_inodes() played complex tricks to pin next inode in the
sb->s_inodes list when iterating over all inodes. If we switch to
keeping current inode pinned somewhat longer, we can make the code much
simpler and standard.

Signed-off-by: Jan Kara 
Signed-off-by: Waiman Long 
---
 fs/notify/inode_mark.c |   45 +
 1 files changed, 9 insertions(+), 36 deletions(-)

diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 741077d..a364524 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -150,12 +150,10 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
  */
 void fsnotify_unmount_inodes(struct super_block *sb)
 {
-   struct inode *inode, *next_i, *need_iput = NULL;
+   struct inode *inode, *iput_inode = NULL;
 
spin_lock(>s_inode_list_lock);
-   list_for_each_entry_safe(inode, next_i, >s_inodes, i_sb_list) {
-   struct inode *need_iput_tmp;
-
+   list_for_each_entry(inode, >s_inodes, i_sb_list) {
/*
 * We cannot __iget() an inode in state I_FREEING,
 * I_WILL_FREE, or I_NEW which is fine because by that point
@@ -178,49 +176,24 @@ void fsnotify_unmount_inodes(struct super_block *sb)
continue;
}
 
-   need_iput_tmp = need_iput;
-   need_iput = NULL;
-
-   /* In case fsnotify_inode_delete() drops a reference. */
-   if (inode != need_iput_tmp)
-   __iget(inode);
-   else
-   need_iput_tmp = NULL;
+   __iget(inode);
spin_unlock(>i_lock);
-
-   /* In case the dropping of a reference would nuke next_i. */
-   while (_i->i_sb_list != >s_inodes) {
-   spin_lock(_i->i_lock);
-   if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
-   atomic_read(_i->i_count)) {
-   __iget(next_i);
-   need_iput = next_i;
-   spin_unlock(_i->i_lock);
-   break;
-   }
-   spin_unlock(_i->i_lock);
-   next_i = list_next_entry(next_i, i_sb_list);
-   }
-
-   /*
-* We can safely drop s_inode_list_lock here because either
-* we actually hold references on both inode and next_i or
-* end of list.  Also no new inodes will be added since the
-* umount has begun.
-*/
spin_unlock(>s_inode_list_lock);
 
-   if (need_iput_tmp)
-   iput(need_iput_tmp);
+   if (iput_inode)
+   iput(iput_inode);
 
/* for each watch, send FS_UNMOUNT and then remove it */
fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 
0);
 
fsnotify_inode_delete(inode);
 
-   iput(inode);
+   iput_inode = inode;
 
spin_lock(>s_inode_list_lock);
}
spin_unlock(>s_inode_list_lock);
+
+   if (iput_inode)
+   iput(iput_inode);
 }
-- 
1.7.1

[PATCH v2 5/7] vfs: Use dlock list for superblock's inode list

2016-07-11 Thread Waiman Long

When many threads are trying to add or delete inode to or from
a superblock's s_inodes list, spinlock contention on the list can
become a performance bottleneck.

This patch changes the s_inodes field to become a dlock list which
is a distributed set of lists with per-list spinlocks.  As a result,
the following superblock inode list (sb->s_inodes) iteration functions
in vfs are also being modified:

 1. iterate_bdevs()
 2. drop_pagecache_sb()
 3. wait_sb_inodes()
 4. evict_inodes()
 5. invalidate_inodes()
 6. fsnotify_unmount_inodes()
 7. add_dquot_ref()
 8. remove_dquot_ref()

With an exit microbenchmark that creates a large number of threads,
attachs many inodes to them and then exits. The runtimes of that
microbenchmark with 1000 threads before and after the patch on a
4-socket Intel E7-4820 v3 system (40 cores, 80 threads) were as
follows:

  KernelElapsed TimeSystem Time
  -----
  Vanilla 4.5-rc4  65.29s 82m14s
  Patched 4.5-rc4  22.81s 23m03s

Before the patch, spinlock contention at the inode_sb_list_add()
function at the startup phase and the inode_sb_list_del() function at
the exit phase were about 79% and 93% of total CPU time respectively
(as measured by perf). After the patch, the percpu_list_add()
function consumed only about 0.04% of CPU time at startup phase. The
percpu_list_del() function consumed about 0.4% of CPU time at exit
phase. There were still some spinlock contention, but they happened
elsewhere.

Signed-off-by: Waiman Long 
Reviewed-by: Jan Kara 
---
 fs/block_dev.c |   13 +++--
 fs/drop_caches.c   |   10 +-
 fs/fs-writeback.c  |   13 +++--
 fs/inode.c |   36 +++-
 fs/notify/inode_mark.c |   10 +-
 fs/quota/dquot.c   |   16 
 fs/super.c |7 ---
 include/linux/fs.h |8 
 8 files changed, 55 insertions(+), 58 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 71ccab1..21e9064 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1896,11 +1896,13 @@ EXPORT_SYMBOL(__invalidate_device);
 void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
 {
struct inode *inode, *old_inode = NULL;
+   DEFINE_DLOCK_LIST_STATE(state);
 
-   spin_lock(_superblock->s_inode_list_lock);
-   list_for_each_entry(inode, _superblock->s_inodes, i_sb_list) {
-   struct address_space *mapping = inode->i_mapping;
+   while (dlock_list_iterate(blockdev_superblock->s_inodes, )) {
+   struct address_space *mapping;
 
+   inode   = list_entry(state.curr, struct inode, i_sb_list);
+   mapping = inode->i_mapping;
spin_lock(>i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
mapping->nrpages == 0) {
@@ -1909,7 +1911,7 @@ void iterate_bdevs(void (*func)(struct block_device *, 
void *), void *arg)
}
__iget(inode);
spin_unlock(>i_lock);
-   spin_unlock(_superblock->s_inode_list_lock);
+   spin_unlock(state.lock);
/*
 * We hold a reference to 'inode' so it couldn't have been
 * removed from s_inodes list while we dropped the
@@ -1923,8 +1925,7 @@ void iterate_bdevs(void (*func)(struct block_device *, 
void *), void *arg)
 
func(I_BDEV(inode), arg);
 
-   spin_lock(_superblock->s_inode_list_lock);
+   spin_lock(state.lock);
}
-   spin_unlock(_superblock->s_inode_list_lock);
iput(old_inode);
 }
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index d72d52b..26b6c68 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -16,9 +16,10 @@ int sysctl_drop_caches;
 static void drop_pagecache_sb(struct super_block *sb, void *unused)
 {
struct inode *inode, *toput_inode = NULL;
+   DEFINE_DLOCK_LIST_STATE(state);
 
-   spin_lock(>s_inode_list_lock);
-   list_for_each_entry(inode, >s_inodes, i_sb_list) {
+   while (dlock_list_iterate(sb->s_inodes, )) {
+   inode = list_entry(state.curr, struct inode, i_sb_list);
spin_lock(>i_lock);
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
(inode->i_mapping->nrpages == 0)) {
@@ -27,15 +28,14 @@ static void drop_pagecache_sb(struct super_block *sb, void 
*unused)
}
__iget(inode);
spin_unlock(>i_lock);
-   spin_unlock(>s_inode_list_lock);
+   spin_unlock(state.lock);
 
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
 
-   spin_lock(>s_inode_list_lock);
+   spin_lock(state.lock);
}
-

[PATCH v2 5/7] vfs: Use dlock list for superblock's inode list

2016-07-11 Thread Waiman Long

When many threads are trying to add or delete inode to or from
a superblock's s_inodes list, spinlock contention on the list can
become a performance bottleneck.

This patch changes the s_inodes field to become a dlock list which
is a distributed set of lists with per-list spinlocks.  As a result,
the following superblock inode list (sb->s_inodes) iteration functions
in vfs are also being modified:

 1. iterate_bdevs()
 2. drop_pagecache_sb()
 3. wait_sb_inodes()
 4. evict_inodes()
 5. invalidate_inodes()
 6. fsnotify_unmount_inodes()
 7. add_dquot_ref()
 8. remove_dquot_ref()

With an exit microbenchmark that creates a large number of threads,
attachs many inodes to them and then exits. The runtimes of that
microbenchmark with 1000 threads before and after the patch on a
4-socket Intel E7-4820 v3 system (40 cores, 80 threads) were as
follows:

  KernelElapsed TimeSystem Time
  -----
  Vanilla 4.5-rc4  65.29s 82m14s
  Patched 4.5-rc4  22.81s 23m03s

Before the patch, spinlock contention at the inode_sb_list_add()
function at the startup phase and the inode_sb_list_del() function at
the exit phase were about 79% and 93% of total CPU time respectively
(as measured by perf). After the patch, the percpu_list_add()
function consumed only about 0.04% of CPU time at startup phase. The
percpu_list_del() function consumed about 0.4% of CPU time at exit
phase. There were still some spinlock contention, but they happened
elsewhere.

Signed-off-by: Waiman Long 
Reviewed-by: Jan Kara 
---
 fs/block_dev.c |   13 +++--
 fs/drop_caches.c   |   10 +-
 fs/fs-writeback.c  |   13 +++--
 fs/inode.c |   36 +++-
 fs/notify/inode_mark.c |   10 +-
 fs/quota/dquot.c   |   16 
 fs/super.c |7 ---
 include/linux/fs.h |8 
 8 files changed, 55 insertions(+), 58 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 71ccab1..21e9064 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1896,11 +1896,13 @@ EXPORT_SYMBOL(__invalidate_device);
 void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
 {
struct inode *inode, *old_inode = NULL;
+   DEFINE_DLOCK_LIST_STATE(state);
 
-   spin_lock(_superblock->s_inode_list_lock);
-   list_for_each_entry(inode, _superblock->s_inodes, i_sb_list) {
-   struct address_space *mapping = inode->i_mapping;
+   while (dlock_list_iterate(blockdev_superblock->s_inodes, )) {
+   struct address_space *mapping;
 
+   inode   = list_entry(state.curr, struct inode, i_sb_list);
+   mapping = inode->i_mapping;
spin_lock(>i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
mapping->nrpages == 0) {
@@ -1909,7 +1911,7 @@ void iterate_bdevs(void (*func)(struct block_device *, 
void *), void *arg)
}
__iget(inode);
spin_unlock(>i_lock);
-   spin_unlock(_superblock->s_inode_list_lock);
+   spin_unlock(state.lock);
/*
 * We hold a reference to 'inode' so it couldn't have been
 * removed from s_inodes list while we dropped the
@@ -1923,8 +1925,7 @@ void iterate_bdevs(void (*func)(struct block_device *, 
void *), void *arg)
 
func(I_BDEV(inode), arg);
 
-   spin_lock(_superblock->s_inode_list_lock);
+   spin_lock(state.lock);
}
-   spin_unlock(_superblock->s_inode_list_lock);
iput(old_inode);
 }
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index d72d52b..26b6c68 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -16,9 +16,10 @@ int sysctl_drop_caches;
 static void drop_pagecache_sb(struct super_block *sb, void *unused)
 {
struct inode *inode, *toput_inode = NULL;
+   DEFINE_DLOCK_LIST_STATE(state);
 
-   spin_lock(>s_inode_list_lock);
-   list_for_each_entry(inode, >s_inodes, i_sb_list) {
+   while (dlock_list_iterate(sb->s_inodes, )) {
+   inode = list_entry(state.curr, struct inode, i_sb_list);
spin_lock(>i_lock);
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
(inode->i_mapping->nrpages == 0)) {
@@ -27,15 +28,14 @@ static void drop_pagecache_sb(struct super_block *sb, void 
*unused)
}
__iget(inode);
spin_unlock(>i_lock);
-   spin_unlock(>s_inode_list_lock);
+   spin_unlock(state.lock);
 
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
 
-   spin_lock(>s_inode_list_lock);
+   spin_lock(state.lock);
}
-   spin_unlock(>s_inode_list_lock);

[PATCH v2 2/7] lib/dlock-list: Add __percpu modifier for parameters

2016-07-11 Thread Waiman Long

From: Boqun Feng 

Add __percpu modifier properly to help:

1.  Differ pointers to actual structures with those to percpu
structures, which could improve readability.

2.  Prevent sparse from complaining about "different address spaces"

Signed-off-by: Boqun Feng 
Signed-off-by: Waiman Long 
---
 include/linux/dlock-list.h |   18 ++
 lib/dlock-list.c   |5 +++--
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h
index 43355f8..a8e1fd2 100644
--- a/include/linux/dlock-list.h
+++ b/include/linux/dlock-list.h
@@ -108,7 +108,8 @@ static inline void init_dlock_list_node(struct 
dlock_list_node *node)
node->lockptr = NULL;
 }
 
-static inline void free_dlock_list_head(struct dlock_list_head **pdlock_head)
+static inline void
+free_dlock_list_head(struct dlock_list_head __percpu **pdlock_head)
 {
free_percpu(*pdlock_head);
*pdlock_head = NULL;
@@ -117,7 +118,7 @@ static inline void free_dlock_list_head(struct 
dlock_list_head **pdlock_head)
 /*
  * Check if all the per-cpu lists are empty
  */
-static inline bool dlock_list_empty(struct dlock_list_head *dlock_head)
+static inline bool dlock_list_empty(struct dlock_list_head __percpu 
*dlock_head)
 {
int cpu;
 
@@ -134,7 +135,7 @@ static inline bool dlock_list_empty(struct dlock_list_head 
*dlock_head)
  * Return: true if the entry is found, false if all the lists exhausted
  */
 static __always_inline bool
-__dlock_list_next_cpu(struct dlock_list_head *head,
+__dlock_list_next_cpu(struct dlock_list_head __percpu *head,
  struct dlock_list_state *state)
 {
if (state->lock)
@@ -172,7 +173,7 @@ next_cpu:
  *
  * Return: true if the next entry is found, false if all the entries iterated
  */
-static inline bool dlock_list_iterate(struct dlock_list_head *head,
+static inline bool dlock_list_iterate(struct dlock_list_head __percpu *head,
  struct dlock_list_state *state)
 {
/*
@@ -200,8 +201,9 @@ static inline bool dlock_list_iterate(struct 
dlock_list_head *head,
  *
  * Return: true if the next entry is found, false if all the entries iterated
  */
-static inline bool dlock_list_iterate_safe(struct dlock_list_head *head,
-  struct dlock_list_state *state)
+static inline bool
+dlock_list_iterate_safe(struct dlock_list_head __percpu *head,
+   struct dlock_list_state *state)
 {
/*
 * Find next entry
@@ -226,8 +228,8 @@ static inline bool dlock_list_iterate_safe(struct 
dlock_list_head *head,
 }
 
 extern void dlock_list_add(struct dlock_list_node *node,
- struct dlock_list_head *head);
+  struct dlock_list_head __percpu *head);
 extern void dlock_list_del(struct dlock_list_node *node);
-extern int  init_dlock_list_head(struct dlock_list_head **pdlock_head);
+extern int  init_dlock_list_head(struct dlock_list_head __percpu 
**pdlock_head);
 
 #endif /* __LINUX_DLOCK_LIST_H */
diff --git a/lib/dlock-list.c b/lib/dlock-list.c
index 84d4623..e1a1930 100644
--- a/lib/dlock-list.c
+++ b/lib/dlock-list.c
@@ -27,7 +27,7 @@ static struct lock_class_key dlock_list_key;
 /*
  * Initialize the per-cpu list head
  */
-int init_dlock_list_head(struct dlock_list_head **pdlock_head)
+int init_dlock_list_head(struct dlock_list_head __percpu **pdlock_head)
 {
struct dlock_list_head *dlock_head;
int cpu;
@@ -53,7 +53,8 @@ int init_dlock_list_head(struct dlock_list_head **pdlock_head)
  * function is called. However, deletion may be done by a different CPU.
  * So we still need to use a lock to protect the content of the list.
  */
-void dlock_list_add(struct dlock_list_node *node, struct dlock_list_head *head)
+void dlock_list_add(struct dlock_list_node *node,
+   struct dlock_list_head __percpu *head)
 {
struct dlock_list_head *myhead;
 
-- 
1.7.1

[PATCH v2 1/7] lib/dlock-list: Distributed and lock-protected lists

2016-07-11 Thread Waiman Long

Linked list is used everywhere in the Linux kernel. However, if many
threads are trying to add or delete entries into the same linked list,
it can create a performance bottleneck.

This patch introduces a new list APIs that provide a set of distributed
lists (one per CPU), each of which is protected by its own spinlock.
To the callers, however, the set of lists acts like a single
consolidated list.  This allows list entries insertion and deletion
operations to happen in parallel instead of being serialized with a
global list and lock.

List entry insertion is strictly per cpu. List deletion, however, can
happen in a cpu other than the one that did the insertion. So we still
need lock to protect the list. Because of that, there may still be
a small amount of contention when deletion is being done.

A new header file include/linux/dlock-list.h will be added with the
associated dlock_list_head and dlock_list_node structures. The following
functions are provided to manage the per-cpu list:

 1. int init_dlock_list_head(struct dlock_list_head **pdlock_head)
 2. void dlock_list_add(struct dlock_list_node *node,
struct dlock_list_head *head)
 3. void dlock_list_del(struct dlock_list *node)

Iteration of all the list entries within a group of per-cpu
lists is done by calling either the dlock_list_iterate() or
dlock_list_iterate_safe() functions in a while loop. They correspond
to the list_for_each_entry() and list_for_each_entry_safe() macros
respectively. The iteration states are keep in a dlock_list_state
structure that is passed to the iteration functions.

Signed-off-by: Waiman Long 
Reviewed-by: Jan Kara 
---
 include/linux/dlock-list.h |  233 
 lib/Makefile   |2 +-
 lib/dlock-list.c   |  100 +++
 3 files changed, 334 insertions(+), 1 deletions(-)
 create mode 100644 include/linux/dlock-list.h
 create mode 100644 lib/dlock-list.c

diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h
new file mode 100644
index 000..43355f8
--- /dev/null
+++ b/include/linux/dlock-list.h
@@ -0,0 +1,233 @@
+/*
+ * Distributed/locked list
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2016 Hewlett-Packard Enterprise Development LP
+ *
+ * Authors: Waiman Long 
+ */
+#ifndef __LINUX_DLOCK_LIST_H
+#define __LINUX_DLOCK_LIST_H
+
+#include 
+#include 
+#include 
+
+/*
+ * include/linux/dlock-list.h
+ *
+ * A distributed (per-cpu) set of lists each of which is protected by its
+ * own spinlock, but acts like a single consolidated list to the callers.
+ *
+ * The dlock_list_head structure contains the spinlock, the other
+ * dlock_list_node structures only contains a pointer to the spinlock in
+ * dlock_list_head.
+ */
+struct dlock_list_head {
+   struct list_head list;
+   spinlock_t lock;
+};
+
+#define DLOCK_LIST_HEAD_INIT(name) \
+   {   \
+   .list.prev = ,\
+   .list.next = ,\
+   .list.lock = __SPIN_LOCK_UNLOCKED(name),\
+   }
+
+/*
+ * Per-cpu list iteration state
+ */
+struct dlock_list_state {
+   int  cpu;
+   spinlock_t  *lock;
+   struct list_head*head;  /* List head of current per-cpu list */
+   struct dlock_list_node  *curr;
+   struct dlock_list_node  *next;
+};
+
+#define DLOCK_LIST_STATE_INIT()\
+   {   \
+   .cpu  = -1, \
+   .lock = NULL,   \
+   .head = NULL,   \
+   .curr = NULL,   \
+   .next = NULL,   \
+   }
+
+#define DEFINE_DLOCK_LIST_STATE(s) \
+   struct dlock_list_state s = DLOCK_LIST_STATE_INIT()
+
+static inline void init_dlock_list_state(struct dlock_list_state *state)
+{
+   state->cpu  = -1;
+   state->lock = NULL;
+   state->head = NULL;
+   state->curr = NULL;
+   state->next = NULL;
+}
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define DLOCK_LIST_WARN_ON(x)  WARN_ON(x)
+#else
+#define DLOCK_LIST_WARN_ON(x)
+#endif
+
+/*
+ * Next per-cpu list entry
+ */
+#define dlock_list_next_entry(pos, member) list_next_entry(pos, member.list)
+
+/*
+ * Per-cpu node data

[RFC PATCH v2 6/7] lib/persubnode: Introducing a simple per-subnode APIs

2016-07-11 Thread Waiman Long

The percpu APIs are extensively used in the Linux kernel to reduce
cacheline contention and improve performance. For some use cases, the
percpu APIs may be too fine-grain for distributed resources whereas
a per-node based allocation may be too coarse as we can have dozens
of CPUs in a NUMA node in some high-end systems.

This patch introduces a simple per-subnode APIs where each of the
distributed resources will be shared by only a handful of CPUs within
a NUMA node. The per-subnode APIs are built on top of the percpu APIs
and hence requires the same amount of memory as if the percpu APIs
are used. However, it helps to reduce the total number of separate
resources that needed to be managed. As a result, it can speed up code
that need to iterate all the resources compared with using the percpu
APIs. Cacheline contention, however, will increases slightly as each
resource is shared by more than one CPU. As long as the number of CPUs
in each subnode is small, the performance impact won't be significant.

In this patch, at most 2 sibling groups can be put into a subnode. For
an x86-64 CPU, at most 4 CPUs will be in a subnode when HT is enabled
and 2 when it is not.

Signed-off-by: Waiman Long 
---
 include/linux/persubnode.h |   80 +
 init/main.c|2 +
 lib/Makefile   |2 +
 lib/persubnode.c   |  119 
 4 files changed, 203 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/persubnode.h
 create mode 100644 lib/persubnode.c

diff --git a/include/linux/persubnode.h b/include/linux/persubnode.h
new file mode 100644
index 000..b777daa
--- /dev/null
+++ b/include/linux/persubnode.h
@@ -0,0 +1,80 @@
+/*
+ * Per-subnode definitions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2016 Hewlett-Packard Enterprise Development LP
+ *
+ * Authors: Waiman Long 
+ */
+#ifndef __LINUX_PERSUBNODE_H
+#define __LINUX_PERSUBNODE_H
+
+#include 
+#include 
+
+/*
+ * Per-subnode APIs
+ */
+#define __persubnode   __percpu
+#define nr_subnode_ids nr_cpu_ids
+#define alloc_persubnode(type) alloc_percpu(type)
+#define free_persubnode(var)   free_percpu(var)
+#define for_each_subnode(snode)for_each_cpu(snode, 
subnode_mask)
+#define per_subnode_ptr(ptr, subnode)  per_cpu_ptr(ptr, subnode)
+#define per_subnode(var, subnode)  per_cpu(var, subnode)
+
+#ifdef CONFIG_SMP
+
+extern struct cpumask __subnode_mask __read_mostly;
+DECLARE_PER_CPU_READ_MOSTLY(int, cpu_subnode_id);
+
+#define subnode_mask   (&__subnode_mask)
+
+static inline int this_cpu_to_subnode(void)
+{
+   return *this_cpu_ptr(_subnode_id);
+}
+
+/*
+ * For safety, preemption should be disabled before using this_subnode_ptr().
+ */
+#define this_subnode_ptr(ptr)  \
+({ \
+   int _snid = this_cpu_to_subnode();  \
+   per_cpu_ptr(ptr, _snid);\
+})
+
+#define get_subnode_ptr(ptr)   \
+({ \
+   preempt_disable();  \
+   this_subnode_ptr(ptr);  \
+})
+
+#define put_subnode_ptr(ptr)   \
+do {   \
+   (void)(ptr);\
+   preempt_enable();   \
+} while (0)
+
+extern void __init subnode_early_init(void);
+
+#else /* CONFIG_SMP */
+
+#define subnode_mask   cpu_possible_mask
+#define this_subnode_ptr(ptr)  this_cpu_ptr(ptr)
+#define get_subnode_ptr(ptr)   get_cpu_ptr(ptr)
+#define put_subnode_ptr(ptr)   put_cpu_ptr(ptr)
+
+static inline void subnode_early_init(void) { }
+
+#endif /* CONFIG_SMP */
+#endif /* __LINUX_PERSUBNODE_H */
diff --git a/init/main.c b/init/main.c
index 4c17fda..28e4425 100644
--- a/init/main.c
+++ b/init/main.c
@@ -81,6 +81,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -524,6 +525,7 @@ asmlinkage __visible void __init start_kernel(void)
   NULL, set_init_arg);
 
jump_label_init();
+   subnode_early_init();
 
/*
 * These use large bootmem allocations and must precede
diff --git a/lib/Makefile b/lib/Makefile
index 92e8c38..440152c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -232,3 +232,5 @@ obj-$(CONFIG_UCS2_STRING) += ucs2_string.o
 obj-$(CONFIG_UBSAN)

[PATCH v2 2/7] lib/dlock-list: Add __percpu modifier for parameters

2016-07-11 Thread Waiman Long

From: Boqun Feng 

Add __percpu modifier properly to help:

1.  Differ pointers to actual structures with those to percpu
structures, which could improve readability.

2.  Prevent sparse from complaining about "different address spaces"

Signed-off-by: Boqun Feng 
Signed-off-by: Waiman Long 
---
 include/linux/dlock-list.h |   18 ++
 lib/dlock-list.c   |5 +++--
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h
index 43355f8..a8e1fd2 100644
--- a/include/linux/dlock-list.h
+++ b/include/linux/dlock-list.h
@@ -108,7 +108,8 @@ static inline void init_dlock_list_node(struct 
dlock_list_node *node)
node->lockptr = NULL;
 }
 
-static inline void free_dlock_list_head(struct dlock_list_head **pdlock_head)
+static inline void
+free_dlock_list_head(struct dlock_list_head __percpu **pdlock_head)
 {
free_percpu(*pdlock_head);
*pdlock_head = NULL;
@@ -117,7 +118,7 @@ static inline void free_dlock_list_head(struct 
dlock_list_head **pdlock_head)
 /*
  * Check if all the per-cpu lists are empty
  */
-static inline bool dlock_list_empty(struct dlock_list_head *dlock_head)
+static inline bool dlock_list_empty(struct dlock_list_head __percpu 
*dlock_head)
 {
int cpu;
 
@@ -134,7 +135,7 @@ static inline bool dlock_list_empty(struct dlock_list_head 
*dlock_head)
  * Return: true if the entry is found, false if all the lists exhausted
  */
 static __always_inline bool
-__dlock_list_next_cpu(struct dlock_list_head *head,
+__dlock_list_next_cpu(struct dlock_list_head __percpu *head,
  struct dlock_list_state *state)
 {
if (state->lock)
@@ -172,7 +173,7 @@ next_cpu:
  *
  * Return: true if the next entry is found, false if all the entries iterated
  */
-static inline bool dlock_list_iterate(struct dlock_list_head *head,
+static inline bool dlock_list_iterate(struct dlock_list_head __percpu *head,
  struct dlock_list_state *state)
 {
/*
@@ -200,8 +201,9 @@ static inline bool dlock_list_iterate(struct 
dlock_list_head *head,
  *
  * Return: true if the next entry is found, false if all the entries iterated
  */
-static inline bool dlock_list_iterate_safe(struct dlock_list_head *head,
-  struct dlock_list_state *state)
+static inline bool
+dlock_list_iterate_safe(struct dlock_list_head __percpu *head,
+   struct dlock_list_state *state)
 {
/*
 * Find next entry
@@ -226,8 +228,8 @@ static inline bool dlock_list_iterate_safe(struct 
dlock_list_head *head,
 }
 
 extern void dlock_list_add(struct dlock_list_node *node,
- struct dlock_list_head *head);
+  struct dlock_list_head __percpu *head);
 extern void dlock_list_del(struct dlock_list_node *node);
-extern int  init_dlock_list_head(struct dlock_list_head **pdlock_head);
+extern int  init_dlock_list_head(struct dlock_list_head __percpu 
**pdlock_head);
 
 #endif /* __LINUX_DLOCK_LIST_H */
diff --git a/lib/dlock-list.c b/lib/dlock-list.c
index 84d4623..e1a1930 100644
--- a/lib/dlock-list.c
+++ b/lib/dlock-list.c
@@ -27,7 +27,7 @@ static struct lock_class_key dlock_list_key;
 /*
  * Initialize the per-cpu list head
  */
-int init_dlock_list_head(struct dlock_list_head **pdlock_head)
+int init_dlock_list_head(struct dlock_list_head __percpu **pdlock_head)
 {
struct dlock_list_head *dlock_head;
int cpu;
@@ -53,7 +53,8 @@ int init_dlock_list_head(struct dlock_list_head **pdlock_head)
  * function is called. However, deletion may be done by a different CPU.
  * So we still need to use a lock to protect the content of the list.
  */
-void dlock_list_add(struct dlock_list_node *node, struct dlock_list_head *head)
+void dlock_list_add(struct dlock_list_node *node,
+   struct dlock_list_head __percpu *head)
 {
struct dlock_list_head *myhead;
 
-- 
1.7.1

[PATCH v2 1/7] lib/dlock-list: Distributed and lock-protected lists

2016-07-11 Thread Waiman Long

Linked list is used everywhere in the Linux kernel. However, if many
threads are trying to add or delete entries into the same linked list,
it can create a performance bottleneck.

This patch introduces a new list APIs that provide a set of distributed
lists (one per CPU), each of which is protected by its own spinlock.
To the callers, however, the set of lists acts like a single
consolidated list.  This allows list entries insertion and deletion
operations to happen in parallel instead of being serialized with a
global list and lock.

List entry insertion is strictly per cpu. List deletion, however, can
happen in a cpu other than the one that did the insertion. So we still
need lock to protect the list. Because of that, there may still be
a small amount of contention when deletion is being done.

A new header file include/linux/dlock-list.h will be added with the
associated dlock_list_head and dlock_list_node structures. The following
functions are provided to manage the per-cpu list:

 1. int init_dlock_list_head(struct dlock_list_head **pdlock_head)
 2. void dlock_list_add(struct dlock_list_node *node,
struct dlock_list_head *head)
 3. void dlock_list_del(struct dlock_list *node)

Iteration of all the list entries within a group of per-cpu
lists is done by calling either the dlock_list_iterate() or
dlock_list_iterate_safe() functions in a while loop. They correspond
to the list_for_each_entry() and list_for_each_entry_safe() macros
respectively. The iteration states are keep in a dlock_list_state
structure that is passed to the iteration functions.

Signed-off-by: Waiman Long 
Reviewed-by: Jan Kara 
---
 include/linux/dlock-list.h |  233 
 lib/Makefile   |2 +-
 lib/dlock-list.c   |  100 +++
 3 files changed, 334 insertions(+), 1 deletions(-)
 create mode 100644 include/linux/dlock-list.h
 create mode 100644 lib/dlock-list.c

diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h
new file mode 100644
index 000..43355f8
--- /dev/null
+++ b/include/linux/dlock-list.h
@@ -0,0 +1,233 @@
+/*
+ * Distributed/locked list
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2016 Hewlett-Packard Enterprise Development LP
+ *
+ * Authors: Waiman Long 
+ */
+#ifndef __LINUX_DLOCK_LIST_H
+#define __LINUX_DLOCK_LIST_H
+
+#include 
+#include 
+#include 
+
+/*
+ * include/linux/dlock-list.h
+ *
+ * A distributed (per-cpu) set of lists each of which is protected by its
+ * own spinlock, but acts like a single consolidated list to the callers.
+ *
+ * The dlock_list_head structure contains the spinlock, the other
+ * dlock_list_node structures only contains a pointer to the spinlock in
+ * dlock_list_head.
+ */
+struct dlock_list_head {
+   struct list_head list;
+   spinlock_t lock;
+};
+
+#define DLOCK_LIST_HEAD_INIT(name) \
+   {   \
+   .list.prev = ,\
+   .list.next = ,\
+   .list.lock = __SPIN_LOCK_UNLOCKED(name),\
+   }
+
+/*
+ * Per-cpu list iteration state
+ */
+struct dlock_list_state {
+   int  cpu;
+   spinlock_t  *lock;
+   struct list_head*head;  /* List head of current per-cpu list */
+   struct dlock_list_node  *curr;
+   struct dlock_list_node  *next;
+};
+
+#define DLOCK_LIST_STATE_INIT()\
+   {   \
+   .cpu  = -1, \
+   .lock = NULL,   \
+   .head = NULL,   \
+   .curr = NULL,   \
+   .next = NULL,   \
+   }
+
+#define DEFINE_DLOCK_LIST_STATE(s) \
+   struct dlock_list_state s = DLOCK_LIST_STATE_INIT()
+
+static inline void init_dlock_list_state(struct dlock_list_state *state)
+{
+   state->cpu  = -1;
+   state->lock = NULL;
+   state->head = NULL;
+   state->curr = NULL;
+   state->next = NULL;
+}
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define DLOCK_LIST_WARN_ON(x)  WARN_ON(x)
+#else
+#define DLOCK_LIST_WARN_ON(x)
+#endif
+
+/*
+ * Next per-cpu list entry
+ */
+#define dlock_list_next_entry(pos, member) list_next_entry(pos, member.list)
+
+/*
+ * Per-cpu node data structure
+ */
+struct dlock_list_node {
+   struct

[RFC PATCH v2 6/7] lib/persubnode: Introducing a simple per-subnode APIs

2016-07-11 Thread Waiman Long

The percpu APIs are extensively used in the Linux kernel to reduce
cacheline contention and improve performance. For some use cases, the
percpu APIs may be too fine-grain for distributed resources whereas
a per-node based allocation may be too coarse as we can have dozens
of CPUs in a NUMA node in some high-end systems.

This patch introduces a simple per-subnode APIs where each of the
distributed resources will be shared by only a handful of CPUs within
a NUMA node. The per-subnode APIs are built on top of the percpu APIs
and hence requires the same amount of memory as if the percpu APIs
are used. However, it helps to reduce the total number of separate
resources that needed to be managed. As a result, it can speed up code
that need to iterate all the resources compared with using the percpu
APIs. Cacheline contention, however, will increases slightly as each
resource is shared by more than one CPU. As long as the number of CPUs
in each subnode is small, the performance impact won't be significant.

In this patch, at most 2 sibling groups can be put into a subnode. For
an x86-64 CPU, at most 4 CPUs will be in a subnode when HT is enabled
and 2 when it is not.

Signed-off-by: Waiman Long 
---
 include/linux/persubnode.h |   80 +
 init/main.c|2 +
 lib/Makefile   |2 +
 lib/persubnode.c   |  119 
 4 files changed, 203 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/persubnode.h
 create mode 100644 lib/persubnode.c

diff --git a/include/linux/persubnode.h b/include/linux/persubnode.h
new file mode 100644
index 000..b777daa
--- /dev/null
+++ b/include/linux/persubnode.h
@@ -0,0 +1,80 @@
+/*
+ * Per-subnode definitions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2016 Hewlett-Packard Enterprise Development LP
+ *
+ * Authors: Waiman Long 
+ */
+#ifndef __LINUX_PERSUBNODE_H
+#define __LINUX_PERSUBNODE_H
+
+#include 
+#include 
+
+/*
+ * Per-subnode APIs
+ */
+#define __persubnode   __percpu
+#define nr_subnode_ids nr_cpu_ids
+#define alloc_persubnode(type) alloc_percpu(type)
+#define free_persubnode(var)   free_percpu(var)
+#define for_each_subnode(snode)for_each_cpu(snode, 
subnode_mask)
+#define per_subnode_ptr(ptr, subnode)  per_cpu_ptr(ptr, subnode)
+#define per_subnode(var, subnode)  per_cpu(var, subnode)
+
+#ifdef CONFIG_SMP
+
+extern struct cpumask __subnode_mask __read_mostly;
+DECLARE_PER_CPU_READ_MOSTLY(int, cpu_subnode_id);
+
+#define subnode_mask   (&__subnode_mask)
+
+static inline int this_cpu_to_subnode(void)
+{
+   return *this_cpu_ptr(_subnode_id);
+}
+
+/*
+ * For safety, preemption should be disabled before using this_subnode_ptr().
+ */
+#define this_subnode_ptr(ptr)  \
+({ \
+   int _snid = this_cpu_to_subnode();  \
+   per_cpu_ptr(ptr, _snid);\
+})
+
+#define get_subnode_ptr(ptr)   \
+({ \
+   preempt_disable();  \
+   this_subnode_ptr(ptr);  \
+})
+
+#define put_subnode_ptr(ptr)   \
+do {   \
+   (void)(ptr);\
+   preempt_enable();   \
+} while (0)
+
+extern void __init subnode_early_init(void);
+
+#else /* CONFIG_SMP */
+
+#define subnode_mask   cpu_possible_mask
+#define this_subnode_ptr(ptr)  this_cpu_ptr(ptr)
+#define get_subnode_ptr(ptr)   get_cpu_ptr(ptr)
+#define put_subnode_ptr(ptr)   put_cpu_ptr(ptr)
+
+static inline void subnode_early_init(void) { }
+
+#endif /* CONFIG_SMP */
+#endif /* __LINUX_PERSUBNODE_H */
diff --git a/init/main.c b/init/main.c
index 4c17fda..28e4425 100644
--- a/init/main.c
+++ b/init/main.c
@@ -81,6 +81,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -524,6 +525,7 @@ asmlinkage __visible void __init start_kernel(void)
   NULL, set_init_arg);
 
jump_label_init();
+   subnode_early_init();
 
/*
 * These use large bootmem allocations and must precede
diff --git a/lib/Makefile b/lib/Makefile
index 92e8c38..440152c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -232,3 +232,5 @@ obj-$(CONFIG_UCS2_STRING) += ucs2_string.o
 obj-$(CONFIG_UBSAN) += ubsan.o
 
 UBSAN_SANITIZE_ubsan.o := n
+

Re: [PATCH -next] bpf: make inode code explicitly non-modular

2016-07-11 Thread Daniel Borkmann


On 07/11/2016 06:51 PM, Paul Gortmaker wrote:

The Kconfig currently controlling compilation of this code is:

init/Kconfig:config BPF_SYSCALL
init/Kconfig:   bool "Enable bpf() system call"

...meaning that it currently is not being built as a module by anyone.

Lets remove the couple traces of modular infrastructure use, so that
when reading the driver there is no doubt it is builtin-only.

Note that MODULE_ALIAS is a no-op for non-modular code.

We replace module.h with init.h since the file does use __init.

Cc: Alexei Starovoitov 
Cc: net...@vger.kernel.org
Signed-off-by: Paul Gortmaker 


(Patch is for net-next tree then.)

Acked-by: Daniel Borkmann

Re: [PATCH -next] bpf: make inode code explicitly non-modular

2016-07-11 Thread Daniel Borkmann


On 07/11/2016 06:51 PM, Paul Gortmaker wrote:

The Kconfig currently controlling compilation of this code is:

init/Kconfig:config BPF_SYSCALL
init/Kconfig:   bool "Enable bpf() system call"

...meaning that it currently is not being built as a module by anyone.

Lets remove the couple traces of modular infrastructure use, so that
when reading the driver there is no doubt it is builtin-only.

Note that MODULE_ALIAS is a no-op for non-modular code.

We replace module.h with init.h since the file does use __init.

Cc: Alexei Starovoitov 
Cc: net...@vger.kernel.org
Signed-off-by: Paul Gortmaker 


(Patch is for net-next tree then.)

Acked-by: Daniel Borkmann

[PATCH] staging: lustre: o2iblnd: iov fixes for kiblnd_send

2016-07-11 Thread James Simmons

With the move to iov_iter handling two issues merged
for the ko2iblnd driver. The first fix address a simple
typo of the wrong flag being used with iov_iter_kvec.
The second fix adds the payload offset to the payload
size.

Signed-off-by: James Simmons 
---
 drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c 
b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 3d597dc..437e149 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -1519,12 +1519,15 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t 
*lntmsg)
/* payload is either all vaddrs or all pages */
LASSERT(!(payload_kiov && payload_iov));
 
-   if (payload_kiov)
+   if (payload_kiov) {
iov_iter_bvec(, ITER_BVEC | WRITE,
-   payload_kiov, payload_niov, payload_nob);
-   else
-   iov_iter_kvec(, ITER_BVEC | WRITE,
-   payload_iov, payload_niov, payload_nob);
+ payload_kiov, payload_niov,
+ payload_nob + payload_offset);
+   } else {
+   iov_iter_kvec(, ITER_KVEC | WRITE,
+ payload_iov, payload_niov,
+ payload_nob + payload_offset);
+   }
iov_iter_advance(, payload_offset);
 
switch (type) {
-- 
2.7.4

[PATCH] staging: lustre: o2iblnd: iov fixes for kiblnd_send

2016-07-11 Thread James Simmons

With the move to iov_iter handling two issues merged
for the ko2iblnd driver. The first fix address a simple
typo of the wrong flag being used with iov_iter_kvec.
The second fix adds the payload offset to the payload
size.

Signed-off-by: James Simmons 
---
 drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c 
b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 3d597dc..437e149 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -1519,12 +1519,15 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t 
*lntmsg)
/* payload is either all vaddrs or all pages */
LASSERT(!(payload_kiov && payload_iov));
 
-   if (payload_kiov)
+   if (payload_kiov) {
iov_iter_bvec(, ITER_BVEC | WRITE,
-   payload_kiov, payload_niov, payload_nob);
-   else
-   iov_iter_kvec(, ITER_BVEC | WRITE,
-   payload_iov, payload_niov, payload_nob);
+ payload_kiov, payload_niov,
+ payload_nob + payload_offset);
+   } else {
+   iov_iter_kvec(, ITER_KVEC | WRITE,
+ payload_iov, payload_niov,
+ payload_nob + payload_offset);
+   }
iov_iter_advance(, payload_offset);
 
switch (type) {
-- 
2.7.4

Re: More parallel atomic_open/d_splice_alias fun with NFS and possibly more FSes.

2016-07-11 Thread James Simmons


> On Sun, Jul 10, 2016 at 07:14:18PM +0100, James Simmons wrote:
> 
> > [  111.210818]  [] kiblnd_send+0x51d/0x9e0 [ko2iblnd]
> 
> Mea culpa - in kiblnd_send() this
> if (payload_kiov)
> iov_iter_bvec(, ITER_BVEC | WRITE,
> payload_kiov, payload_niov, payload_nob);
> else
> iov_iter_kvec(, ITER_BVEC | WRITE,
> payload_iov, payload_niov, payload_nob);
> should have s/BVEC/KVEC/ in the iov_iter_kvec() arguments.  Cut'n'paste
> braindamage...

That is the fix. Also I believe payload_nob should be payload_nob + 
payload_offset instead. I will send a patch that against Oleg's tree
that address these issues.

Re: More parallel atomic_open/d_splice_alias fun with NFS and possibly more FSes.

2016-07-11 Thread James Simmons


> On Sun, Jul 10, 2016 at 07:14:18PM +0100, James Simmons wrote:
> 
> > [  111.210818]  [] kiblnd_send+0x51d/0x9e0 [ko2iblnd]
> 
> Mea culpa - in kiblnd_send() this
> if (payload_kiov)
> iov_iter_bvec(, ITER_BVEC | WRITE,
> payload_kiov, payload_niov, payload_nob);
> else
> iov_iter_kvec(, ITER_BVEC | WRITE,
> payload_iov, payload_niov, payload_nob);
> should have s/BVEC/KVEC/ in the iov_iter_kvec() arguments.  Cut'n'paste
> braindamage...

That is the fix. Also I believe payload_nob should be payload_nob + 
payload_offset instead. I will send a patch that against Oleg's tree
that address these issues.

Re: [PATCH 14/14] PCI: xgene: make it explicitly non-modular

2016-07-11 Thread Duc Dang

On Sat, Jul 9, 2016 at 16:15 Paul Gortmaker
 wrote:
>
> [Re: [PATCH 14/14] PCI: xgene: make it explicitly non-modular] On 07/07/2016 
> (Thu 15:42) Duc Dang wrote:
>
> > On Thu, Jul 7, 2016 at 3:35 PM, Tanmay Inamdar  wrote:
> > >
> > >
> > > On Sat, Jul 2, 2016 at 4:13 PM, Paul Gortmaker
> > >  wrote:
> > >>
> > >> The Kconfig currently controlling compilation of this code is:
> > >>
> > >> drivers/pci/host/Kconfig:config PCI_XGENE
> > >> drivers/pci/host/Kconfig:   bool "X-Gene PCIe controller"
> > >>
> > >> ...meaning that it currently is not being built as a module by anyone.
> > >>
> > >> Lets remove the few trace uses of modular code and macros, so that
> > >> when reading the driver there is no doubt it is builtin-only.
> > >>
> > >> Since module_platform_driver() uses the same init level priority as
> > >> builtin_platform_driver() the init ordering remains unchanged with
> > >> this commit.
> > >>
> > >> We also delete the MODULE_LICENSE tag etc. since all that information
> > >> is already contained at the top of the file in the comments.
> > >>
> > >> Cc: Tanmay Inamdar 
> > >> Cc: Bjorn Helgaas 
> > >> Cc: linux-...@vger.kernel.org
> > >> Signed-off-by: Paul Gortmaker 
> >
> > Thanks for taking care of this, Paul.
> >
> > I tested your patch and it worked fine on my X-Gene Mustang board.
> >
> > One minor comment below.
> >
> > >> ---
> > >>  drivers/pci/host/pci-xgene.c | 8 ++--
> > >>  1 file changed, 2 insertions(+), 6 deletions(-)
> > >>
> > >> diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
> > >> index 7eb20cc76dd3..a81273c23341 100644
> > >> --- a/drivers/pci/host/pci-xgene.c
> > >> +++ b/drivers/pci/host/pci-xgene.c
> > >> @@ -21,7 +21,7 @@
> > >>  #include 
> > >>  #include 
> > >>  #include 
> > >> -#include 
> > >> +#include 
> >
> > The platform_device.h already has builtin_platform_driver macro
> > defined. So this init.h is not need?
>
> If you look, you will find that platform_device.h does not include the
> init.h even though it references __init; it can do this w/o error since
> all the references themselves are in a macro.  However once code wants
> to be a consumer of those macros, they will need init.h present.  Often
> you can overlook directly calling it out for inclusion since it gets
> sourced by another header, but it is best policy to list what gets used.

Ah, got it.

Thanks, Paul!
>
> Thanks for testing!
>
> Paul.
> --
>
> >
> > >>  #include 
> > >>  #include 
> > >>  #include 
> > >> @@ -579,8 +579,4 @@ static struct platform_driver xgene_pcie_driver = {
> > >> },
> > >> .probe = xgene_pcie_probe_bridge,
> > >>  };
> > >> -module_platform_driver(xgene_pcie_driver);
> > >> -
> > >> -MODULE_AUTHOR("Tanmay Inamdar ");
> > >> -MODULE_DESCRIPTION("APM X-Gene PCIe driver");
> > >> -MODULE_LICENSE("GPL v2");
> > >> +builtin_platform_driver(xgene_pcie_driver);
> > >
> > >
> > > Copying Duc.
> > >>
> > >> --
> > >> 2.8.4
> > >>
> > >
> > Regards,
> > Duc Dang.

Re: [PATCH 14/14] PCI: xgene: make it explicitly non-modular

2016-07-11 Thread Duc Dang

On Sat, Jul 9, 2016 at 16:15 Paul Gortmaker
 wrote:
>
> [Re: [PATCH 14/14] PCI: xgene: make it explicitly non-modular] On 07/07/2016 
> (Thu 15:42) Duc Dang wrote:
>
> > On Thu, Jul 7, 2016 at 3:35 PM, Tanmay Inamdar  wrote:
> > >
> > >
> > > On Sat, Jul 2, 2016 at 4:13 PM, Paul Gortmaker
> > >  wrote:
> > >>
> > >> The Kconfig currently controlling compilation of this code is:
> > >>
> > >> drivers/pci/host/Kconfig:config PCI_XGENE
> > >> drivers/pci/host/Kconfig:   bool "X-Gene PCIe controller"
> > >>
> > >> ...meaning that it currently is not being built as a module by anyone.
> > >>
> > >> Lets remove the few trace uses of modular code and macros, so that
> > >> when reading the driver there is no doubt it is builtin-only.
> > >>
> > >> Since module_platform_driver() uses the same init level priority as
> > >> builtin_platform_driver() the init ordering remains unchanged with
> > >> this commit.
> > >>
> > >> We also delete the MODULE_LICENSE tag etc. since all that information
> > >> is already contained at the top of the file in the comments.
> > >>
> > >> Cc: Tanmay Inamdar 
> > >> Cc: Bjorn Helgaas 
> > >> Cc: linux-...@vger.kernel.org
> > >> Signed-off-by: Paul Gortmaker 
> >
> > Thanks for taking care of this, Paul.
> >
> > I tested your patch and it worked fine on my X-Gene Mustang board.
> >
> > One minor comment below.
> >
> > >> ---
> > >>  drivers/pci/host/pci-xgene.c | 8 ++--
> > >>  1 file changed, 2 insertions(+), 6 deletions(-)
> > >>
> > >> diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
> > >> index 7eb20cc76dd3..a81273c23341 100644
> > >> --- a/drivers/pci/host/pci-xgene.c
> > >> +++ b/drivers/pci/host/pci-xgene.c
> > >> @@ -21,7 +21,7 @@
> > >>  #include 
> > >>  #include 
> > >>  #include 
> > >> -#include 
> > >> +#include 
> >
> > The platform_device.h already has builtin_platform_driver macro
> > defined. So this init.h is not need?
>
> If you look, you will find that platform_device.h does not include the
> init.h even though it references __init; it can do this w/o error since
> all the references themselves are in a macro.  However once code wants
> to be a consumer of those macros, they will need init.h present.  Often
> you can overlook directly calling it out for inclusion since it gets
> sourced by another header, but it is best policy to list what gets used.

Ah, got it.

Thanks, Paul!
>
> Thanks for testing!
>
> Paul.
> --
>
> >
> > >>  #include 
> > >>  #include 
> > >>  #include 
> > >> @@ -579,8 +579,4 @@ static struct platform_driver xgene_pcie_driver = {
> > >> },
> > >> .probe = xgene_pcie_probe_bridge,
> > >>  };
> > >> -module_platform_driver(xgene_pcie_driver);
> > >> -
> > >> -MODULE_AUTHOR("Tanmay Inamdar ");
> > >> -MODULE_DESCRIPTION("APM X-Gene PCIe driver");
> > >> -MODULE_LICENSE("GPL v2");
> > >> +builtin_platform_driver(xgene_pcie_driver);
> > >
> > >
> > > Copying Duc.
> > >>
> > >> --
> > >> 2.8.4
> > >>
> > >
> > Regards,
> > Duc Dang.

Re: [PATCH] capabilities: audit capability use

2016-07-11 Thread Tejun Heo

Hello,

On Mon, Jul 11, 2016 at 02:14:31PM +0300, Topi Miettinen wrote:
> [   28.443674] audit: type=1327 audit(1468234333.144:520): 
> proctitle=6D6B6E6F64002F6465762F7A5F343639006300310032
> [   28.465888] audit: type=1330 audit(1468234333.144:520): 
> cap_used=0800
> [   28.482080] audit: type=1331 audit(1468234333.144:520): cgroups=:/test;

Please don't put additions of the two different audit types into one
patch and I don't think the cgroup audit logging makes much sense.
Without logging all migrations, it doesn't help auditing all that
much.  Also, printing all cgroup membership like that can be
problematic for audit it can be arbitrarily long.

Thanks.

-- 
tejun

Re: [PATCH] capabilities: audit capability use

2016-07-11 Thread Tejun Heo

Hello,

On Mon, Jul 11, 2016 at 02:14:31PM +0300, Topi Miettinen wrote:
> [   28.443674] audit: type=1327 audit(1468234333.144:520): 
> proctitle=6D6B6E6F64002F6465762F7A5F343639006300310032
> [   28.465888] audit: type=1330 audit(1468234333.144:520): 
> cap_used=0800
> [   28.482080] audit: type=1331 audit(1468234333.144:520): cgroups=:/test;

Please don't put additions of the two different audit types into one
patch and I don't think the cgroup audit logging makes much sense.
Without logging all migrations, it doesn't help auditing all that
much.  Also, printing all cgroup membership like that can be
problematic for audit it can be arbitrarily long.

Thanks.

-- 
tejun

Re: [PATCH] spi: spi-ti-qspi: clear wlen field while setting word length.

2016-07-11 Thread prahlad venkata

On Mon, Jul 11, 2016 at 3:53 PM, Vignesh R  wrote:
>
>
> On Monday 11 July 2016 02:49 PM, prahlad venkata wrote:
>> On Mon, Jul 11, 2016 at 2:45 PM, Vignesh R  wrote:
> [...]
> diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
> index 29ea8d2..6c61f54 100644
> --- a/drivers/spi/spi-ti-qspi.c
> +++ b/drivers/spi/spi-ti-qspi.c
> @@ -276,9 +276,9 @@ static int qspi_write_msg(struct ti_qspi *qspi, 
> struct spi_transfer *t,
>   cmd |= QSPI_WLEN(QSPI_WLEN_MAX_BITS);
>   } else {
>   writeb(*txbuf, qspi->base + 
> QSPI_SPI_DATA_REG);
> - cmd = qspi->cmd | QSPI_WR_SNGL;
>>
>> This is wrong. Deleting this line means QSPI_WR_SNGL is not set and no
>> data is sent out on the wire.
> QSPI_WR_SNGL is already set as soon as we enter the function.
>>

 qspi->cmd always has WLEN field cleared and set to WLEN = 1 byte (see
 ti_qspi_start_transfer_one()). And hence variable 'cmd' will also have
 WLEN set to 1 byte.
>>> Even though WLEN=1 is set in the ti_qspi_transfer_one, if we ask for a
>>> transfer of large data,
>>> say 300 bytes in length, for attaining faster data rate WLEN 128 is
>>> selected for the first two
>>> transactions and remaining 44 bytes will be transmitted with WLEN 1.
>>> During that case,
>>> WLEN will be changed inside qspi_write_msg function itself and the
>>> field should be cleared
>>> first while doing that.
>>
>> In qspi_write_msg(), qspi->cmd will always have WLEN set to
>> QSPI_WLEN(t->bits_per_word) and qspi->cmd is never changed within this
>> function.
>> It is the value of local variable 'cmd' that is changed to appropriate
>> WLEN (128bit or 8bit) as necessary.
>>>
 'cmd' is written back to qspi->cmd for every transaction.
>>>
>>> You mean qspi->cmd = cmd ?
>>> I don't see this happening anywhere in the driver. Can you point me to
>>> that line of code?
>> line 296:
>> ti_qspi_write(qspi, cmd, QSPI_SPI_CMD_REG);
>
> Sorry, I don't understand. QSPI_SPI_CMD_REG and qspi->cmd are different.
> qspi->cmd does not represent the QSPI_SPI_CMD_REG register. qspi->cmd is
> just local driver data for book-keeping.
>
> Please add some prints in driver to see how 'cmd' (and qspi->cmd)
> variable changes in case of 128bit mode and 8bit mode.
I don't have hardware setup to verify this. Is there anyway to verify
this without hardware?
>
> Regards
> Vignesh
>
>
> --
> Regards
> Vignesh



-- 
Regards,
Prahlad.

Re: [PATCH] spi: spi-ti-qspi: clear wlen field while setting word length.

2016-07-11 Thread prahlad venkata

On Mon, Jul 11, 2016 at 3:53 PM, Vignesh R  wrote:
>
>
> On Monday 11 July 2016 02:49 PM, prahlad venkata wrote:
>> On Mon, Jul 11, 2016 at 2:45 PM, Vignesh R  wrote:
> [...]
> diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
> index 29ea8d2..6c61f54 100644
> --- a/drivers/spi/spi-ti-qspi.c
> +++ b/drivers/spi/spi-ti-qspi.c
> @@ -276,9 +276,9 @@ static int qspi_write_msg(struct ti_qspi *qspi, 
> struct spi_transfer *t,
>   cmd |= QSPI_WLEN(QSPI_WLEN_MAX_BITS);
>   } else {
>   writeb(*txbuf, qspi->base + 
> QSPI_SPI_DATA_REG);
> - cmd = qspi->cmd | QSPI_WR_SNGL;
>>
>> This is wrong. Deleting this line means QSPI_WR_SNGL is not set and no
>> data is sent out on the wire.
> QSPI_WR_SNGL is already set as soon as we enter the function.
>>

 qspi->cmd always has WLEN field cleared and set to WLEN = 1 byte (see
 ti_qspi_start_transfer_one()). And hence variable 'cmd' will also have
 WLEN set to 1 byte.
>>> Even though WLEN=1 is set in the ti_qspi_transfer_one, if we ask for a
>>> transfer of large data,
>>> say 300 bytes in length, for attaining faster data rate WLEN 128 is
>>> selected for the first two
>>> transactions and remaining 44 bytes will be transmitted with WLEN 1.
>>> During that case,
>>> WLEN will be changed inside qspi_write_msg function itself and the
>>> field should be cleared
>>> first while doing that.
>>
>> In qspi_write_msg(), qspi->cmd will always have WLEN set to
>> QSPI_WLEN(t->bits_per_word) and qspi->cmd is never changed within this
>> function.
>> It is the value of local variable 'cmd' that is changed to appropriate
>> WLEN (128bit or 8bit) as necessary.
>>>
 'cmd' is written back to qspi->cmd for every transaction.
>>>
>>> You mean qspi->cmd = cmd ?
>>> I don't see this happening anywhere in the driver. Can you point me to
>>> that line of code?
>> line 296:
>> ti_qspi_write(qspi, cmd, QSPI_SPI_CMD_REG);
>
> Sorry, I don't understand. QSPI_SPI_CMD_REG and qspi->cmd are different.
> qspi->cmd does not represent the QSPI_SPI_CMD_REG register. qspi->cmd is
> just local driver data for book-keeping.
>
> Please add some prints in driver to see how 'cmd' (and qspi->cmd)
> variable changes in case of 128bit mode and 8bit mode.
I don't have hardware setup to verify this. Is there anyway to verify
this without hardware?
>
> Regards
> Vignesh
>
>
> --
> Regards
> Vignesh



-- 
Regards,
Prahlad.

Re: Resend: Another 4.4 to 4.5 floppy issue

2016-07-11 Thread Mark Hounschell


On 07/11/2016 11:36 AM, Jiri Kosina wrote:

On Tue, 5 Jul 2016, Mark Hounschell wrote:


From: Jiri Kosina 

Commit 09954bad4 ("floppy: refactor open() flags handling"), as a
side-effect, causes open(/dev/fdX, O_ACCMODE) to fail. It turns out that
this is being used setfdprm userspace for ioctl-only open().

Reintroduce back the original behavior wrt !(FMODE_READ|FMODE_WRITE)
modes, while still keeping the original O_NDELAY bug fixed.

Cc: sta...@vger.kernel.org # v4.5+
Reported-by: Wim Osterholt 
Tested-by: Wim Osterholt 
Signed-off-by: Jiri Kosina 
---

[ ... snip ... ]


But this does not completely fix all the problems induced by the original
changes from 4.4 to 4.5. The following is what we use to open the floppy.

fd = open(device,  O_RDWR | O_NDELAY);

The FMODE_NDELAY check that was removed now prevents one from doing an open of
the device with no media inserted. It also prevents one from doing an open of
the device with media inserted that is not already formatted in a "standard"
format.  I do both of these things a lot. I deal with a few very non-standard
formats and this change prevents me from doing what I've been doing for YEARS.
Could we please get the original behavior back in the floppy driver.


Hi Mark,

thanks for the regression report.

For my better understanding of your issue -- what behavior/semantics
exactly does your userspace think it'll be getting from opening /dev/fd0
with O_NDELAY?

Thanks,



Hi Jiri.

Well, all that was specified in my original post. I can no longer open 
the floppy drive with no floppy media inserted. Worse, I can also no 
longer open a floppy with media inserted that is not a "linux" 
recognized format. A floppy drive is a removable media device and should 
be treated as such. The original implementation of the O_NDELAY flag 
allowed it to be.


Any removable media device should be capable of being opened with no, or 
even unrecognizable media installed. The kernel and its utilities should 
not "assume" to much when it comes to removable media. Consider a SCSI 
tape drive or even a removable media SCSI disk drive. How would you 
explain an open failure to someone trying to open a SCSI tape drive that 
had no tape or even a "non-tar" formatted tape media in it???
Or better yet, trying to open a removable media device the was write 
protected but didn't include O_RDONLY in the open?


The original behavior of the floppy driver was correct. I have no idea 
what BUG these changes were supposed to fix but the "fix" obviously 
broke user land. Was this bug reported by some new ROBOT test or 
something? The kernel floppy driver has been stable for years now so I 
am really confused as to why these changes were induced.


As for the "O_RDONLY | O_WRONLY" thing you decided to change back, which 
I'm happy to see, was wrong. Almost ALL removable media devices have W/R 
protection built into the media. For ever, I understood that it was MY 
responsibility to write protect my removable media. An open of a 
removable device should never even care about that stuff. It is the 
users responsibility.


We use extensively, the FDRAWCMD ioctl API. It is totally borked now for 
us without maintaining our own kernel patch that reverts the changes 
from 4.4 to 4.5.


Regards
Mark

Re: Resend: Another 4.4 to 4.5 floppy issue

2016-07-11 Thread Mark Hounschell


On 07/11/2016 11:36 AM, Jiri Kosina wrote:

On Tue, 5 Jul 2016, Mark Hounschell wrote:


From: Jiri Kosina 

Commit 09954bad4 ("floppy: refactor open() flags handling"), as a
side-effect, causes open(/dev/fdX, O_ACCMODE) to fail. It turns out that
this is being used setfdprm userspace for ioctl-only open().

Reintroduce back the original behavior wrt !(FMODE_READ|FMODE_WRITE)
modes, while still keeping the original O_NDELAY bug fixed.

Cc: sta...@vger.kernel.org # v4.5+
Reported-by: Wim Osterholt 
Tested-by: Wim Osterholt 
Signed-off-by: Jiri Kosina 
---

[ ... snip ... ]


But this does not completely fix all the problems induced by the original
changes from 4.4 to 4.5. The following is what we use to open the floppy.

fd = open(device,  O_RDWR | O_NDELAY);

The FMODE_NDELAY check that was removed now prevents one from doing an open of
the device with no media inserted. It also prevents one from doing an open of
the device with media inserted that is not already formatted in a "standard"
format.  I do both of these things a lot. I deal with a few very non-standard
formats and this change prevents me from doing what I've been doing for YEARS.
Could we please get the original behavior back in the floppy driver.


Hi Mark,

thanks for the regression report.

For my better understanding of your issue -- what behavior/semantics
exactly does your userspace think it'll be getting from opening /dev/fd0
with O_NDELAY?

Thanks,



Hi Jiri.

Well, all that was specified in my original post. I can no longer open 
the floppy drive with no floppy media inserted. Worse, I can also no 
longer open a floppy with media inserted that is not a "linux" 
recognized format. A floppy drive is a removable media device and should 
be treated as such. The original implementation of the O_NDELAY flag 
allowed it to be.


Any removable media device should be capable of being opened with no, or 
even unrecognizable media installed. The kernel and its utilities should 
not "assume" to much when it comes to removable media. Consider a SCSI 
tape drive or even a removable media SCSI disk drive. How would you 
explain an open failure to someone trying to open a SCSI tape drive that 
had no tape or even a "non-tar" formatted tape media in it???
Or better yet, trying to open a removable media device the was write 
protected but didn't include O_RDONLY in the open?


The original behavior of the floppy driver was correct. I have no idea 
what BUG these changes were supposed to fix but the "fix" obviously 
broke user land. Was this bug reported by some new ROBOT test or 
something? The kernel floppy driver has been stable for years now so I 
am really confused as to why these changes were induced.


As for the "O_RDONLY | O_WRONLY" thing you decided to change back, which 
I'm happy to see, was wrong. Almost ALL removable media devices have W/R 
protection built into the media. For ever, I understood that it was MY 
responsibility to write protect my removable media. An open of a 
removable device should never even care about that stuff. It is the 
users responsibility.


We use extensively, the FDRAWCMD ioctl API. It is totally borked now for 
us without maintaining our own kernel patch that reverts the changes 
from 4.4 to 4.5.


Regards
Mark

Re: [PATCH 3/3] time: drop local_irq_save/restore from irqtime_account_irq

2016-07-11 Thread Paolo Bonzini



On 11/07/2016 18:53, r...@redhat.com wrote:
> From: Rik van Riel 
> 
> Paolo pointed out that irqs are already blocked when irqtime_account_irq
> is called. That means there is no reason to call local_irq_save/restore
> again.
> 
> Signed-off-by: Rik van Riel 
> Suggested-by: Paolo Bonzini 
> ---
>  kernel/sched/cputime.c | 4 
>  1 file changed, 4 deletions(-)
> 
> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> index ca7e33cb0967..7b6fa4d7ad4c 100644
> --- a/kernel/sched/cputime.c
> +++ b/kernel/sched/cputime.c
> @@ -49,15 +49,12 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq);
>   */
>  void irqtime_account_irq(struct task_struct *curr)
>  {
> - unsigned long flags;
>   s64 delta;
>   int cpu;
>  
>   if (!sched_clock_irqtime)
>   return;
>  
> - local_irq_save(flags);
> -
>   cpu = smp_processor_id();
>   delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
>   __this_cpu_add(irq_start_time, delta);
> @@ -75,7 +72,6 @@ void irqtime_account_irq(struct task_struct *curr)
>   __this_cpu_add(cpu_softirq_time, delta);
>  
>   irq_time_write_end();
> - local_irq_restore(flags);
>  }
>  EXPORT_SYMBOL_GPL(irqtime_account_irq);
>  
> 

Reviewed-by: Paolo Bonzini

Re: [PATCH 3/3] time: drop local_irq_save/restore from irqtime_account_irq

2016-07-11 Thread Paolo Bonzini



On 11/07/2016 18:53, r...@redhat.com wrote:
> From: Rik van Riel 
> 
> Paolo pointed out that irqs are already blocked when irqtime_account_irq
> is called. That means there is no reason to call local_irq_save/restore
> again.
> 
> Signed-off-by: Rik van Riel 
> Suggested-by: Paolo Bonzini 
> ---
>  kernel/sched/cputime.c | 4 
>  1 file changed, 4 deletions(-)
> 
> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> index ca7e33cb0967..7b6fa4d7ad4c 100644
> --- a/kernel/sched/cputime.c
> +++ b/kernel/sched/cputime.c
> @@ -49,15 +49,12 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq);
>   */
>  void irqtime_account_irq(struct task_struct *curr)
>  {
> - unsigned long flags;
>   s64 delta;
>   int cpu;
>  
>   if (!sched_clock_irqtime)
>   return;
>  
> - local_irq_save(flags);
> -
>   cpu = smp_processor_id();
>   delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
>   __this_cpu_add(irq_start_time, delta);
> @@ -75,7 +72,6 @@ void irqtime_account_irq(struct task_struct *curr)
>   __this_cpu_add(cpu_softirq_time, delta);
>  
>   irq_time_write_end();
> - local_irq_restore(flags);
>  }
>  EXPORT_SYMBOL_GPL(irqtime_account_irq);
>  
> 

Reviewed-by: Paolo Bonzini

Re: [PATCH] Input: /input/mouse/elan_i2c_core.c Fix some Asus touchapod which casue TP no funciton sometimes, the patch detect some specific touchpad and run a special initialize

2016-07-11 Thread 'Dmitry Torokhov'

On Mon, Jul 11, 2016 at 08:40:58PM +0800, 廖崇榮 wrote:
> > +
> > +   error = data->ops->get_sm_version(client, >ic_type,
> > + >sm_version);
> > +   if (error)
> > +   return false;
> 
> That means we'd be fetching product ID and IC type twice when initializing 
> the device. Can we come with a way to do it once?
> [KT]:Because the elan_query_device_info() is behind the elan_initialize(). 
> That's why I fetching product ID and IC type in the elan_initialize()
> I will discuss with FW team and then execute elan_query_device_info() 
> first to get product_id and ic_type. 

We might need to split fetching product ID and IC type form the rest of
the device info.

Thanks.

-- 
Dmitry

Re: [PATCH] Input: /input/mouse/elan_i2c_core.c Fix some Asus touchapod which casue TP no funciton sometimes, the patch detect some specific touchpad and run a special initialize

2016-07-11 Thread 'Dmitry Torokhov'

On Mon, Jul 11, 2016 at 08:40:58PM +0800, 廖崇榮 wrote:
> > +
> > +   error = data->ops->get_sm_version(client, >ic_type,
> > + >sm_version);
> > +   if (error)
> > +   return false;
> 
> That means we'd be fetching product ID and IC type twice when initializing 
> the device. Can we come with a way to do it once?
> [KT]:Because the elan_query_device_info() is behind the elan_initialize(). 
> That's why I fetching product ID and IC type in the elan_initialize()
> I will discuss with FW team and then execute elan_query_device_info() 
> first to get product_id and ic_type. 

We might need to split fetching product ID and IC type form the rest of
the device info.

Thanks.

-- 
Dmitry

Re: [PATCH 3/7] security,overlayfs: Provide security hook for copy up of xattrs for overlay file

2016-07-11 Thread Vivek Goyal

On Mon, Jul 11, 2016 at 11:31:47AM -0400, Stephen Smalley wrote:

[..]
> > +static inline int security_inode_copy_up_xattr(const char *name)
> > +{
> > +   -EOPNOTSUPP;
> 
> return?

Yes, this one I fixed it in my patches now. kbuild also flagged this.

Vivek

Re: [PATCH 3/7] security,overlayfs: Provide security hook for copy up of xattrs for overlay file

2016-07-11 Thread Vivek Goyal

On Mon, Jul 11, 2016 at 11:31:47AM -0400, Stephen Smalley wrote:

[..]
> > +static inline int security_inode_copy_up_xattr(const char *name)
> > +{
> > +   -EOPNOTSUPP;
> 
> return?

Yes, this one I fixed it in my patches now. kbuild also flagged this.

Vivek

Re: [PATCH RESEND] iwlwifi, Do not implement thermal zone unless ucode is loaded

2016-07-11 Thread Prarit Bhargava



On 07/11/2016 12:07 PM, Coelho, Luciano wrote:
> On Mon, 2016-07-11 at 11:18 -0400, Prarit Bhargava wrote:
>> Didn't get any feedback or review comments on this patch.  Resending
>> ...
>>
>> P.
> 
> Sorry, this got flooded down my inbox.

NP, Luciano -- My worry was that it hadn't been seen or didn't make it out to
the list.

I'm being a bit impatient too ;)

P.

> 
> 
>> ---8<---
>>
>> The iwlwifi driver implements a thermal zone and hwmon device, but
>> returns -EIO on temperature reads if the firmware isn't loaded.  This
>> results in the error
>>
>> iwlwifi-virtual-0
>> Adapter: Virtual device
>> ERROR: Can't get value of subfeature temp1_input: I/O error
>> temp1:N/A
>>
>> being output when using sensors from the lm-sensors package.  Since
>> the temperature cannot be read unless the ucode is loaded there is no
>> reason to add the interface only to have it return an error 100% of
>> the time.
>>
>> This patch moves the firmware check to
>> iwl_mvm_thermal_zone_register() and
>> stops the thermal zone from being created if the ucode hasn't been
>> loaded.
>>
>> Signed-off-by: Prarit Bhargava 
>> Cc: Johannes Berg 
>> Cc: Emmanuel Grumbach 
>> Cc: Luca Coelho 
>> Cc: Intel Linux Wireless 
>> Cc: Kalle Valo 
>> Cc: Chaya Rachel Ivgi 
>> Cc: Sara Sharon 
>> Cc: linux-wirel...@vger.kernel.org
>> Cc: net...@vger.kernel.org
>> ---
> 
> I have now sent it for review on our internal tree.
> 
> --
> Luca.
>

Re: [PATCH RESEND] iwlwifi, Do not implement thermal zone unless ucode is loaded

2016-07-11 Thread Prarit Bhargava



On 07/11/2016 12:07 PM, Coelho, Luciano wrote:
> On Mon, 2016-07-11 at 11:18 -0400, Prarit Bhargava wrote:
>> Didn't get any feedback or review comments on this patch.  Resending
>> ...
>>
>> P.
> 
> Sorry, this got flooded down my inbox.

NP, Luciano -- My worry was that it hadn't been seen or didn't make it out to
the list.

I'm being a bit impatient too ;)

P.

> 
> 
>> ---8<---
>>
>> The iwlwifi driver implements a thermal zone and hwmon device, but
>> returns -EIO on temperature reads if the firmware isn't loaded.  This
>> results in the error
>>
>> iwlwifi-virtual-0
>> Adapter: Virtual device
>> ERROR: Can't get value of subfeature temp1_input: I/O error
>> temp1:N/A
>>
>> being output when using sensors from the lm-sensors package.  Since
>> the temperature cannot be read unless the ucode is loaded there is no
>> reason to add the interface only to have it return an error 100% of
>> the time.
>>
>> This patch moves the firmware check to
>> iwl_mvm_thermal_zone_register() and
>> stops the thermal zone from being created if the ucode hasn't been
>> loaded.
>>
>> Signed-off-by: Prarit Bhargava 
>> Cc: Johannes Berg 
>> Cc: Emmanuel Grumbach 
>> Cc: Luca Coelho 
>> Cc: Intel Linux Wireless 
>> Cc: Kalle Valo 
>> Cc: Chaya Rachel Ivgi 
>> Cc: Sara Sharon 
>> Cc: linux-wirel...@vger.kernel.org
>> Cc: net...@vger.kernel.org
>> ---
> 
> I have now sent it for review on our internal tree.
> 
> --
> Luca.
>

Re: [kernel-hardening] Re: [PATCH v3 06/13] fork: Add generic vmalloced stack support

2016-07-11 Thread Andrey Ryabinin

2016-06-21 21:32 GMT+03:00 Rik van Riel :
> On Tue, 2016-06-21 at 10:13 -0700, Kees Cook wrote:
>> On Tue, Jun 21, 2016 at 9:59 AM, Andy Lutomirski > > wrote:
>> >
>> > I'm tempted to explicitly disallow VM_NO_GUARD in the vmalloc
>> > range.
>> > It has no in-tree users for non-fixed addresses right now.
>> What about the lack of pre-range guard page? That seems like a
>> critical feature for this. :)
>
> If VM_NO_GUARD is disallowed, and every vmalloc area has
> a guard area behind it, then every subsequent vmalloc area
> will have a guard page ahead of it.
>
> I think disallowing VM_NO_GUARD will be all that is required.
>

VM_NO_GUARD is a flag of vm_struct. But some vmalloc areas don't have
vm_struct (see vm_map_ram())
and don't have guard pages too. Once, vm_map_ram() had guard pages,
but they were removed in
248ac0e1943a ("mm/vmalloc: remove guard page from between vmap blocks")
due to exhaustion of vmalloc space on 32-bits. I guess we can
resurrect guard page on 64bits without any problems.

AFAICS per-cpu vmap blocks also don't have guard pages. pcpu vmaps
have vm_struct *without* VM_NO_GUARD, but
don't actually have the guard pages. It seems to be a harmless bug,
because pcpu vmaps use their own alloc/free paths
(pcp_get_vm_areas()/pcpu_free_vm_areas())
and just don't care about vm->flags content.
Fortunately, pcpu_get_vm_areas() allocates from top of vmalloc, so the
gap between pcpu vmap and regular vmalloc() should be huge.

> The only thing we may want to verify on the architectures that
> we care about is that there is nothing mapped immediately before
> the start of the vmalloc range, otherwise the first vmalloced
> area will not have a guard page below it.
>
> I suspect all the 64 bit architectures are fine in that regard,
> with enormous gaps between kernel memory ranges.
>
> --
> All Rights Reversed.
>

Re: [kernel-hardening] Re: [PATCH v3 06/13] fork: Add generic vmalloced stack support

2016-07-11 Thread Andrey Ryabinin

2016-06-21 21:32 GMT+03:00 Rik van Riel :
> On Tue, 2016-06-21 at 10:13 -0700, Kees Cook wrote:
>> On Tue, Jun 21, 2016 at 9:59 AM, Andy Lutomirski > > wrote:
>> >
>> > I'm tempted to explicitly disallow VM_NO_GUARD in the vmalloc
>> > range.
>> > It has no in-tree users for non-fixed addresses right now.
>> What about the lack of pre-range guard page? That seems like a
>> critical feature for this. :)
>
> If VM_NO_GUARD is disallowed, and every vmalloc area has
> a guard area behind it, then every subsequent vmalloc area
> will have a guard page ahead of it.
>
> I think disallowing VM_NO_GUARD will be all that is required.
>

VM_NO_GUARD is a flag of vm_struct. But some vmalloc areas don't have
vm_struct (see vm_map_ram())
and don't have guard pages too. Once, vm_map_ram() had guard pages,
but they were removed in
248ac0e1943a ("mm/vmalloc: remove guard page from between vmap blocks")
due to exhaustion of vmalloc space on 32-bits. I guess we can
resurrect guard page on 64bits without any problems.

AFAICS per-cpu vmap blocks also don't have guard pages. pcpu vmaps
have vm_struct *without* VM_NO_GUARD, but
don't actually have the guard pages. It seems to be a harmless bug,
because pcpu vmaps use their own alloc/free paths
(pcp_get_vm_areas()/pcpu_free_vm_areas())
and just don't care about vm->flags content.
Fortunately, pcpu_get_vm_areas() allocates from top of vmalloc, so the
gap between pcpu vmap and regular vmalloc() should be huge.

> The only thing we may want to verify on the architectures that
> we care about is that there is nothing mapped immediately before
> the start of the vmalloc range, otherwise the first vmalloced
> area will not have a guard page below it.
>
> I suspect all the 64 bit architectures are fine in that regard,
> with enormous gaps between kernel memory ranges.
>
> --
> All Rights Reversed.
>

Re: [PATCH 3/7] security,overlayfs: Provide security hook for copy up of xattrs for overlay file

2016-07-11 Thread Vivek Goyal

On Mon, Jul 11, 2016 at 11:31:47AM -0400, Stephen Smalley wrote:
> On 07/08/2016 12:19 PM, Vivek Goyal wrote:
> > Provide a security hook which is called when xattrs of a file are being
> > copied up. This hook is called once for each xattr and LSM can return 0
> > to access the xattr, 1 to reject xattr, -EOPNOTSUPP if none of the lsms
> > claim to know xattr and a negative error code if something went terribly
> > wrong.
> 
> 0 if the security module wants the xattr to be copied up, 1 if the
> security module wants the xattr to be discarded on the copy, -EOPNOTSUPP
> if the security module does not handle/manage the xattr, or a -errno
> upon an error.

Ok, will change the description.

> 
> > 
> > If 0 or -EOPNOTSUPP is returned, xattr will be copied up, if 1 is returned,
> > xattr will not be copied up and if negative error code is returned, copy up
> > will be aborted.
> 
> Not sure I understand the benefit of the 0 vs -EOPNOTSUPP distinction.

I am not sure either. Casey wanted to have four states so I introduced it. 

Thanks
Vivek

Re: [PATCH 3/7] security,overlayfs: Provide security hook for copy up of xattrs for overlay file

2016-07-11 Thread Vivek Goyal

On Mon, Jul 11, 2016 at 11:31:47AM -0400, Stephen Smalley wrote:
> On 07/08/2016 12:19 PM, Vivek Goyal wrote:
> > Provide a security hook which is called when xattrs of a file are being
> > copied up. This hook is called once for each xattr and LSM can return 0
> > to access the xattr, 1 to reject xattr, -EOPNOTSUPP if none of the lsms
> > claim to know xattr and a negative error code if something went terribly
> > wrong.
> 
> 0 if the security module wants the xattr to be copied up, 1 if the
> security module wants the xattr to be discarded on the copy, -EOPNOTSUPP
> if the security module does not handle/manage the xattr, or a -errno
> upon an error.

Ok, will change the description.

> 
> > 
> > If 0 or -EOPNOTSUPP is returned, xattr will be copied up, if 1 is returned,
> > xattr will not be copied up and if negative error code is returned, copy up
> > will be aborted.
> 
> Not sure I understand the benefit of the 0 vs -EOPNOTSUPP distinction.

I am not sure either. Casey wanted to have four states so I introduced it. 

Thanks
Vivek

Re: [PATCH 2/2] trace-cmd: Use tracecmd_peek_next_data() in fgraph_ent_handler

2016-07-11 Thread Steven Rostedt

On Fri,  8 Jul 2016 14:56:12 +0900
Namhyung Kim  wrote:

> When a task was migrated to other cpu in the middle of a function, the
> fgraph_exit record will be in a different cpu than the enter record.
> But currently fgraph_ent_handler() only peeks at the same cpu so it
> could read an incorrect record.
> 
> For example, please see following raw records:
> 
>   bash-10478 [007]73.454273: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [000]73.454650: funcgraph_exit:func=0x8123bf90 
> calltime=0x111a37483c rettime=0x111a3d0285 overrun=0x0 depth=0
>   bash-10478 [000]74.456383: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [000]74.456655: funcgraph_exit:func=0x8123bf90 
> calltime=0x1155f24337 rettime=0x1155f66559 overrun=0x0 depth=0
>   bash-10478 [000]75.458517: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [001]75.458849: funcgraph_exit:func=0x8123bf90 
> calltime=0x1191ad9de0 rettime=0x1191b2a6aa overrun=0x0 depth=0
>   bash-10478 [001]76.460482: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [000]76.460679: funcgraph_exit:func=0x8123bf90 
> calltime=0x11cd6662b4 rettime=0x11cd695e03 overrun=0x0 depth=0
>   bash-10478 [000]77.462334: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [004]77.462564: funcgraph_exit:func=0x8123bf90 
> calltime=0x12091d71c4 rettime=0x120920e977 overrun=0x0 depth=0
>   bash-10478 [004]78.464315: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [001]78.464644: funcgraph_exit:func=0x8123bf90 
> calltime=0x1244d674de rettime=0x1244db7329 overrun=0x0 depth=0
>   bash-10478 [001]79.466018: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [004]79.466326: funcgraph_exit:func=0x8123bf90 
> calltime=0x12808b3940 rettime=0x12808fe819 overrun=0x0 depth=0
>   bash-10478 [004]80.468005: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [002]80.468291: funcgraph_exit:func=0x8123bf90 
> calltime=0x12bc44551f rettime=0x12bc48ac9a overrun=0x0 depth=0
>   bash-10478 [002]81.469718: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [007]81.470088: funcgraph_exit:func=0x8123bf90 
> calltime=0x12f7f945b8 rettime=0x12f7fee028 overrun=0x0 depth=0
> 
> The first entry was call to cma_alloc function, it was on cpu 7 but the
> task was migrated to cpu 0 before returning from the function.
> Currently trace-cmd shows like below:
> 
>   bash-10478 [007]73.454273: funcgraph_entry:  ! 367.216 us |  
> cma_alloc();
>   bash-10478 [000]73.454650: funcgraph_exit:   ! 375.369 us |  }
>   bash-10478 [000]74.456383: funcgraph_entry:  ! 270.882 us |  
> cma_alloc();
>   bash-10478 [000]75.458517: funcgraph_entry:  ! 195.407 us |  
> cma_alloc();
>   bash-10478 [001]75.458849: funcgraph_exit:   ! 329.930 us |  }
>   bash-10478 [001]76.460482: funcgraph_entry:  ! 327.243 us |  
> cma_alloc();
>   bash-10478 [000]77.462334: funcgraph_entry:  ! 293.465 us |  
> cma_alloc();
>   bash-10478 [004]77.462564: funcgraph_exit:   ! 227.251 us |  }
>   bash-10478 [004]78.464315: funcgraph_entry:  ! 306.905 us |  
> cma_alloc();
>   bash-10478 [001]79.466018: funcgraph_entry:  ! 303.196 us |  
> cma_alloc();
>   bash-10478 [004]80.468005: funcgraph_entry:   |  
> cma_alloc() {
>   bash-10478 [002]80.468291: funcgraph_exit:   ! 284.539 us |  }
>   bash-10478 [002]81.469718: funcgraph_entry:  ! 323.215 us |  
> cma_alloc();
> 
> This is because the first funcgraph_entry on cpu 7 matched to the last
> funcgraph_exit on cpu 7.  And second funcgraph_exit on cpu 0 was shown
> alone.  We need to match record from all cpu rather than the same cpu.
> In this case, entry on cpu 7 should be paired with exit on cpu 0.
> 
> With this patch, the output look like below:
> 
>   bash-10478 [007]73.454273: funcgraph_entry:  ! 375.369 us |  
> cma_alloc();
>   bash-10478 [000]74.456383: funcgraph_entry:  ! 270.882 us |  
> cma_alloc();
>   bash-10478 [000]75.458517: funcgraph_entry:  ! 329.930 us |  
> cma_alloc();
>   bash-10478 [001]76.460482: funcgraph_entry:  ! 195.407 us |  
> cma_alloc();
>   bash-10478 [000]77.462334: funcgraph_entry:  ! 227.251 us |  
> cma_alloc();
>   bash-10478 [004]78.464315: funcgraph_entry:  ! 327.243 us |  
> cma_alloc();
>   bash-10478 [001]79.466018: funcgraph_entry:  ! 306.905 us |  
> cma_alloc();
>   bash-10478 [004]80.468005: funcgraph_entry:  ! 284.539 us |  
> cma_alloc();
>   bash-10478 [002]81.469718: funcgraph_entry:  ! 367.216 us |  
> cma_alloc();
> 
> Maybe we can separate enter and exit if they happened on different
> cpu.  Anyway the

Re: [PATCH 2/2] trace-cmd: Use tracecmd_peek_next_data() in fgraph_ent_handler

2016-07-11 Thread Steven Rostedt

On Fri,  8 Jul 2016 14:56:12 +0900
Namhyung Kim  wrote:

> When a task was migrated to other cpu in the middle of a function, the
> fgraph_exit record will be in a different cpu than the enter record.
> But currently fgraph_ent_handler() only peeks at the same cpu so it
> could read an incorrect record.
> 
> For example, please see following raw records:
> 
>   bash-10478 [007]73.454273: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [000]73.454650: funcgraph_exit:func=0x8123bf90 
> calltime=0x111a37483c rettime=0x111a3d0285 overrun=0x0 depth=0
>   bash-10478 [000]74.456383: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [000]74.456655: funcgraph_exit:func=0x8123bf90 
> calltime=0x1155f24337 rettime=0x1155f66559 overrun=0x0 depth=0
>   bash-10478 [000]75.458517: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [001]75.458849: funcgraph_exit:func=0x8123bf90 
> calltime=0x1191ad9de0 rettime=0x1191b2a6aa overrun=0x0 depth=0
>   bash-10478 [001]76.460482: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [000]76.460679: funcgraph_exit:func=0x8123bf90 
> calltime=0x11cd6662b4 rettime=0x11cd695e03 overrun=0x0 depth=0
>   bash-10478 [000]77.462334: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [004]77.462564: funcgraph_exit:func=0x8123bf90 
> calltime=0x12091d71c4 rettime=0x120920e977 overrun=0x0 depth=0
>   bash-10478 [004]78.464315: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [001]78.464644: funcgraph_exit:func=0x8123bf90 
> calltime=0x1244d674de rettime=0x1244db7329 overrun=0x0 depth=0
>   bash-10478 [001]79.466018: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [004]79.466326: funcgraph_exit:func=0x8123bf90 
> calltime=0x12808b3940 rettime=0x12808fe819 overrun=0x0 depth=0
>   bash-10478 [004]80.468005: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [002]80.468291: funcgraph_exit:func=0x8123bf90 
> calltime=0x12bc44551f rettime=0x12bc48ac9a overrun=0x0 depth=0
>   bash-10478 [002]81.469718: funcgraph_entry:   func=0x8123bf90 
> depth=0
>   bash-10478 [007]81.470088: funcgraph_exit:func=0x8123bf90 
> calltime=0x12f7f945b8 rettime=0x12f7fee028 overrun=0x0 depth=0
> 
> The first entry was call to cma_alloc function, it was on cpu 7 but the
> task was migrated to cpu 0 before returning from the function.
> Currently trace-cmd shows like below:
> 
>   bash-10478 [007]73.454273: funcgraph_entry:  ! 367.216 us |  
> cma_alloc();
>   bash-10478 [000]73.454650: funcgraph_exit:   ! 375.369 us |  }
>   bash-10478 [000]74.456383: funcgraph_entry:  ! 270.882 us |  
> cma_alloc();
>   bash-10478 [000]75.458517: funcgraph_entry:  ! 195.407 us |  
> cma_alloc();
>   bash-10478 [001]75.458849: funcgraph_exit:   ! 329.930 us |  }
>   bash-10478 [001]76.460482: funcgraph_entry:  ! 327.243 us |  
> cma_alloc();
>   bash-10478 [000]77.462334: funcgraph_entry:  ! 293.465 us |  
> cma_alloc();
>   bash-10478 [004]77.462564: funcgraph_exit:   ! 227.251 us |  }
>   bash-10478 [004]78.464315: funcgraph_entry:  ! 306.905 us |  
> cma_alloc();
>   bash-10478 [001]79.466018: funcgraph_entry:  ! 303.196 us |  
> cma_alloc();
>   bash-10478 [004]80.468005: funcgraph_entry:   |  
> cma_alloc() {
>   bash-10478 [002]80.468291: funcgraph_exit:   ! 284.539 us |  }
>   bash-10478 [002]81.469718: funcgraph_entry:  ! 323.215 us |  
> cma_alloc();
> 
> This is because the first funcgraph_entry on cpu 7 matched to the last
> funcgraph_exit on cpu 7.  And second funcgraph_exit on cpu 0 was shown
> alone.  We need to match record from all cpu rather than the same cpu.
> In this case, entry on cpu 7 should be paired with exit on cpu 0.
> 
> With this patch, the output look like below:
> 
>   bash-10478 [007]73.454273: funcgraph_entry:  ! 375.369 us |  
> cma_alloc();
>   bash-10478 [000]74.456383: funcgraph_entry:  ! 270.882 us |  
> cma_alloc();
>   bash-10478 [000]75.458517: funcgraph_entry:  ! 329.930 us |  
> cma_alloc();
>   bash-10478 [001]76.460482: funcgraph_entry:  ! 195.407 us |  
> cma_alloc();
>   bash-10478 [000]77.462334: funcgraph_entry:  ! 227.251 us |  
> cma_alloc();
>   bash-10478 [004]78.464315: funcgraph_entry:  ! 327.243 us |  
> cma_alloc();
>   bash-10478 [001]79.466018: funcgraph_entry:  ! 306.905 us |  
> cma_alloc();
>   bash-10478 [004]80.468005: funcgraph_entry:  ! 284.539 us |  
> cma_alloc();
>   bash-10478 [002]81.469718: funcgraph_entry:  ! 367.216 us |  
> cma_alloc();
> 
> Maybe we can separate enter and exit if they happened on different
> cpu.  Anyway the time duration has

[PATCH 3/3] time: drop local_irq_save/restore from irqtime_account_irq

2016-07-11 Thread riel

From: Rik van Riel 

Paolo pointed out that irqs are already blocked when irqtime_account_irq
is called. That means there is no reason to call local_irq_save/restore
again.

Signed-off-by: Rik van Riel 
Suggested-by: Paolo Bonzini 
---
 kernel/sched/cputime.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index ca7e33cb0967..7b6fa4d7ad4c 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -49,15 +49,12 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq);
  */
 void irqtime_account_irq(struct task_struct *curr)
 {
-   unsigned long flags;
s64 delta;
int cpu;
 
if (!sched_clock_irqtime)
return;
 
-   local_irq_save(flags);
-
cpu = smp_processor_id();
delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
__this_cpu_add(irq_start_time, delta);
@@ -75,7 +72,6 @@ void irqtime_account_irq(struct task_struct *curr)
__this_cpu_add(cpu_softirq_time, delta);
 
irq_time_write_end();
-   local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(irqtime_account_irq);
 
-- 
2.7.4

[PATCH 3/3] time: drop local_irq_save/restore from irqtime_account_irq

2016-07-11 Thread riel

From: Rik van Riel 

Paolo pointed out that irqs are already blocked when irqtime_account_irq
is called. That means there is no reason to call local_irq_save/restore
again.

Signed-off-by: Rik van Riel 
Suggested-by: Paolo Bonzini 
---
 kernel/sched/cputime.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index ca7e33cb0967..7b6fa4d7ad4c 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -49,15 +49,12 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq);
  */
 void irqtime_account_irq(struct task_struct *curr)
 {
-   unsigned long flags;
s64 delta;
int cpu;
 
if (!sched_clock_irqtime)
return;
 
-   local_irq_save(flags);
-
cpu = smp_processor_id();
delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
__this_cpu_add(irq_start_time, delta);
@@ -75,7 +72,6 @@ void irqtime_account_irq(struct task_struct *curr)
__this_cpu_add(cpu_softirq_time, delta);
 
irq_time_write_end();
-   local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(irqtime_account_irq);
 
-- 
2.7.4

[PATCH 2/3] nohz,cputime: replace VTIME_GEN irq time code with IRQ_TIME_ACCOUNTING code

2016-07-11 Thread riel

From: Rik van Riel 

The CONFIG_VIRT_CPU_ACCOUNTING_GEN irq time tracking code does not
appear to currently work right.

On CPUs without nohz_full=, only tick based irq time sampling is
done, which breaks down when dealing with a nohz_idle CPU.

On firewalls and similar systems, no ticks may happen on a CPU for a
while, and the irq time spent may never get accounted properly. This
can cause issues with capacity planning and power saving, which use
the CPU statistics as inputs in decision making.

Replace the VTIME_GEN vtime irq time code, and replace it with the
IRQ_TIME_ACCOUNTING code, when selected as a config option by the user.

Signed-off-by: Rik van Riel 
---
 include/linux/vtime.h  | 32 ++--
 init/Kconfig   |  6 +++---
 kernel/sched/cputime.c | 16 +++-
 3 files changed, 20 insertions(+), 34 deletions(-)

diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index fa2196990f84..d1977d84ebdf 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -14,6 +14,18 @@ struct task_struct;
  */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 static inline bool vtime_accounting_cpu_enabled(void) { return true; }
+
+#ifdef __ARCH_HAS_VTIME_ACCOUNT
+extern void vtime_account_irq_enter(struct task_struct *tsk);
+#else
+extern void vtime_common_account_irq_enter(struct task_struct *tsk);
+static inline void vtime_account_irq_enter(struct task_struct *tsk)
+{
+   if (vtime_accounting_cpu_enabled())
+   vtime_common_account_irq_enter(tsk);
+}
+#endif /* __ARCH_HAS_VTIME_ACCOUNT */
+
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
@@ -64,17 +76,6 @@ extern void vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
 
-#ifdef __ARCH_HAS_VTIME_ACCOUNT
-extern void vtime_account_irq_enter(struct task_struct *tsk);
-#else
-extern void vtime_common_account_irq_enter(struct task_struct *tsk);
-static inline void vtime_account_irq_enter(struct task_struct *tsk)
-{
-   if (vtime_accounting_cpu_enabled())
-   vtime_common_account_irq_enter(tsk);
-}
-#endif /* __ARCH_HAS_VTIME_ACCOUNT */
-
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 static inline void vtime_task_switch(struct task_struct *prev) { }
@@ -85,13 +86,8 @@ static inline void vtime_account_irq_enter(struct 
task_struct *tsk) { }
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern void arch_vtime_task_switch(struct task_struct *tsk);
-extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
-
-static inline void vtime_account_irq_exit(struct task_struct *tsk)
-{
-   if (vtime_accounting_cpu_enabled())
-   vtime_gen_account_irq_exit(tsk);
-}
+static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
+static inline void vtime_account_irq_exit(struct task_struct *tsk) { }
 
 extern void vtime_user_enter(struct task_struct *tsk);
 
diff --git a/init/Kconfig b/init/Kconfig
index 0dfd09d54c65..4c7ee4f136cf 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -375,9 +375,11 @@ config VIRT_CPU_ACCOUNTING_GEN
 
  If unsure, say N.
 
+endchoice
+
 config IRQ_TIME_ACCOUNTING
bool "Fine granularity task level IRQ time accounting"
-   depends on HAVE_IRQ_TIME_ACCOUNTING && !NO_HZ_FULL
+   depends on HAVE_IRQ_TIME_ACCOUNTING && !VIRT_CPU_ACCOUNTING_NATIVE
help
  Select this option to enable fine granularity task irq time
  accounting. This is done by reading a timestamp on each
@@ -386,8 +388,6 @@ config IRQ_TIME_ACCOUNTING
 
  If in doubt, say N here.
 
-endchoice
-
 config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
depends on MULTIUSER
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index db82ae12cf01..ca7e33cb0967 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -711,14 +711,14 @@ static cputime_t vtime_delta(struct task_struct *tsk)
 static cputime_t get_vtime_delta(struct task_struct *tsk)
 {
unsigned long now = READ_ONCE(jiffies);
-   cputime_t delta, steal;
+   cputime_t delta, other;
 
delta = jiffies_to_cputime(now - tsk->vtime_snap);
-   steal = steal_account_process_time(delta);
+   other = account_other_time(delta);
WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
tsk->vtime_snap = now;
 
-   return delta - steal;
+   return delta - other;
 }
 
 static void __vtime_account_system(struct task_struct *tsk)
@@ -738,16 +738,6 @@ void vtime_account_system(struct task_struct *tsk)
write_seqcount_end(>vtime_seqcount);
 }
 
-void vtime_gen_account_irq_exit(struct task_struct *tsk)
-{
-   write_seqcount_begin(>vtime_seqcount);
-   if (vtime_delta(tsk))
-   __vtime_account_system(tsk);
-   if (context_tracking_in_user())
-   tsk->vtime_snap_whence = VTIME_USER;
-

[PATCH 2/3] nohz,cputime: replace VTIME_GEN irq time code with IRQ_TIME_ACCOUNTING code

2016-07-11 Thread riel

From: Rik van Riel 

The CONFIG_VIRT_CPU_ACCOUNTING_GEN irq time tracking code does not
appear to currently work right.

On CPUs without nohz_full=, only tick based irq time sampling is
done, which breaks down when dealing with a nohz_idle CPU.

On firewalls and similar systems, no ticks may happen on a CPU for a
while, and the irq time spent may never get accounted properly. This
can cause issues with capacity planning and power saving, which use
the CPU statistics as inputs in decision making.

Replace the VTIME_GEN vtime irq time code, and replace it with the
IRQ_TIME_ACCOUNTING code, when selected as a config option by the user.

Signed-off-by: Rik van Riel 
---
 include/linux/vtime.h  | 32 ++--
 init/Kconfig   |  6 +++---
 kernel/sched/cputime.c | 16 +++-
 3 files changed, 20 insertions(+), 34 deletions(-)

diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index fa2196990f84..d1977d84ebdf 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -14,6 +14,18 @@ struct task_struct;
  */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 static inline bool vtime_accounting_cpu_enabled(void) { return true; }
+
+#ifdef __ARCH_HAS_VTIME_ACCOUNT
+extern void vtime_account_irq_enter(struct task_struct *tsk);
+#else
+extern void vtime_common_account_irq_enter(struct task_struct *tsk);
+static inline void vtime_account_irq_enter(struct task_struct *tsk)
+{
+   if (vtime_accounting_cpu_enabled())
+   vtime_common_account_irq_enter(tsk);
+}
+#endif /* __ARCH_HAS_VTIME_ACCOUNT */
+
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
@@ -64,17 +76,6 @@ extern void vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
 
-#ifdef __ARCH_HAS_VTIME_ACCOUNT
-extern void vtime_account_irq_enter(struct task_struct *tsk);
-#else
-extern void vtime_common_account_irq_enter(struct task_struct *tsk);
-static inline void vtime_account_irq_enter(struct task_struct *tsk)
-{
-   if (vtime_accounting_cpu_enabled())
-   vtime_common_account_irq_enter(tsk);
-}
-#endif /* __ARCH_HAS_VTIME_ACCOUNT */
-
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 static inline void vtime_task_switch(struct task_struct *prev) { }
@@ -85,13 +86,8 @@ static inline void vtime_account_irq_enter(struct 
task_struct *tsk) { }
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern void arch_vtime_task_switch(struct task_struct *tsk);
-extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
-
-static inline void vtime_account_irq_exit(struct task_struct *tsk)
-{
-   if (vtime_accounting_cpu_enabled())
-   vtime_gen_account_irq_exit(tsk);
-}
+static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
+static inline void vtime_account_irq_exit(struct task_struct *tsk) { }
 
 extern void vtime_user_enter(struct task_struct *tsk);
 
diff --git a/init/Kconfig b/init/Kconfig
index 0dfd09d54c65..4c7ee4f136cf 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -375,9 +375,11 @@ config VIRT_CPU_ACCOUNTING_GEN
 
  If unsure, say N.
 
+endchoice
+
 config IRQ_TIME_ACCOUNTING
bool "Fine granularity task level IRQ time accounting"
-   depends on HAVE_IRQ_TIME_ACCOUNTING && !NO_HZ_FULL
+   depends on HAVE_IRQ_TIME_ACCOUNTING && !VIRT_CPU_ACCOUNTING_NATIVE
help
  Select this option to enable fine granularity task irq time
  accounting. This is done by reading a timestamp on each
@@ -386,8 +388,6 @@ config IRQ_TIME_ACCOUNTING
 
  If in doubt, say N here.
 
-endchoice
-
 config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
depends on MULTIUSER
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index db82ae12cf01..ca7e33cb0967 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -711,14 +711,14 @@ static cputime_t vtime_delta(struct task_struct *tsk)
 static cputime_t get_vtime_delta(struct task_struct *tsk)
 {
unsigned long now = READ_ONCE(jiffies);
-   cputime_t delta, steal;
+   cputime_t delta, other;
 
delta = jiffies_to_cputime(now - tsk->vtime_snap);
-   steal = steal_account_process_time(delta);
+   other = account_other_time(delta);
WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
tsk->vtime_snap = now;
 
-   return delta - steal;
+   return delta - other;
 }
 
 static void __vtime_account_system(struct task_struct *tsk)
@@ -738,16 +738,6 @@ void vtime_account_system(struct task_struct *tsk)
write_seqcount_end(>vtime_seqcount);
 }
 
-void vtime_gen_account_irq_exit(struct task_struct *tsk)
-{
-   write_seqcount_begin(>vtime_seqcount);
-   if (vtime_delta(tsk))
-   __vtime_account_system(tsk);
-   if (context_tracking_in_user())
-   tsk->vtime_snap_whence = VTIME_USER;
-

[PATCH v4 0/3] sched,time: fix irq time accounting with nohz_idle

2016-07-11 Thread riel

Currently irq time accounting only works in these cases:
1) purely ticke based accounting
2) nohz_full accounting, but only on housekeeping & nohz_full CPUs
3) architectures with native vtime accounting

On nohz_idle CPUs, which are probably the majority nowadays,
irq time accounting is currently broken. This leads to systems
reporting a dramatically lower amount of irq & softirq time than
is actually spent handling them, with all the time spent while the
system is in the idle task being accounted as idle.

This patch set seems to bring the amount of irq time reported by
top (and /proc/stat) roughly in line with that measured when I do
a "perf record -g -a" run to see what is using all that time.

The amount of irq time used, especially softirq, is shockingly high,
to the point of me thinking this patch set may be wrong, but the
numbers seem to match what perf is giving me...

These patches apply on top of Wanpeng Li's steal time patches.

CONFIG_IRQ_TIME_ACCOUNTING is now a config option that is available
as a separate choice from tick based / nohz_idle / nohz_full mode,
a suggested by Frederic Weisbecker.

Next up: look at the things that are using CPU time on an otherwise
idle system, and see if I can make those a little faster :)

v2: address Peterz's concerns, some more cleanups
v3: rewrite the code along Frederic's suggestions, now cputime_t
is used everywhere
v4: greatly simplify the local_irq_save/restore optimisation, thanks
to Paolo pointing out irqs are already blocked by the callers

[PATCH v4 0/3] sched,time: fix irq time accounting with nohz_idle

2016-07-11 Thread riel

Currently irq time accounting only works in these cases:
1) purely ticke based accounting
2) nohz_full accounting, but only on housekeeping & nohz_full CPUs
3) architectures with native vtime accounting

On nohz_idle CPUs, which are probably the majority nowadays,
irq time accounting is currently broken. This leads to systems
reporting a dramatically lower amount of irq & softirq time than
is actually spent handling them, with all the time spent while the
system is in the idle task being accounted as idle.

This patch set seems to bring the amount of irq time reported by
top (and /proc/stat) roughly in line with that measured when I do
a "perf record -g -a" run to see what is using all that time.

The amount of irq time used, especially softirq, is shockingly high,
to the point of me thinking this patch set may be wrong, but the
numbers seem to match what perf is giving me...

These patches apply on top of Wanpeng Li's steal time patches.

CONFIG_IRQ_TIME_ACCOUNTING is now a config option that is available
as a separate choice from tick based / nohz_idle / nohz_full mode,
a suggested by Frederic Weisbecker.

Next up: look at the things that are using CPU time on an otherwise
idle system, and see if I can make those a little faster :)

v2: address Peterz's concerns, some more cleanups
v3: rewrite the code along Frederic's suggestions, now cputime_t
is used everywhere
v4: greatly simplify the local_irq_save/restore optimisation, thanks
to Paolo pointing out irqs are already blocked by the callers

Re: [PATCH 1/7] security, overlayfs: provide copy up security hook for unioned files

2016-07-11 Thread Vivek Goyal

On Mon, Jul 11, 2016 at 11:24:26AM -0400, Stephen Smalley wrote:
> On 07/08/2016 12:19 PM, Vivek Goyal wrote:
> > Provide a security hook to label new file correctly when a file is copied
> > up from lower layer to upper layer of a overlay/union mount.
> > 
> > This hook can prepare a new set of creds which are suitable for new file
> > creation during copy up. Caller will use new creds to create file and then
> > revert back to old creds and release new creds.
> > 
> > Signed-off-by: Vivek Goyal 
> > ---
> >  fs/overlayfs/copy_up.c| 18 ++
> >  include/linux/lsm_hooks.h | 11 +++
> >  include/linux/security.h  |  6 ++
> >  security/security.c   |  8 
> >  4 files changed, 43 insertions(+)
> > 
> > diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
> > index 80aa6f1..8ebea18 100644
> > --- a/fs/overlayfs/copy_up.c
> > +++ b/fs/overlayfs/copy_up.c
> > @@ -246,6 +246,8 @@ static int ovl_copy_up_locked(struct dentry *workdir, 
> > struct dentry *upperdir,
> > struct dentry *upper = NULL;
> > umode_t mode = stat->mode;
> > int err;
> > +   const struct cred *old_creds = NULL;
> > +   struct cred *new_creds = NULL;
> >  
> > newdentry = ovl_lookup_temp(workdir, dentry);
> > err = PTR_ERR(newdentry);
> > @@ -258,10 +260,26 @@ static int ovl_copy_up_locked(struct dentry *workdir, 
> > struct dentry *upperdir,
> > if (IS_ERR(upper))
> > goto out1;
> >  
> > +   err = security_inode_copy_up(dentry, _creds);
> > +   if (err < 0) {
> > +   if (new_creds)
> > +   put_cred(new_creds);
> 
> Why do we need a put_cred() here?

Being paranoid for the case of stacked modules. Say first module allocated
creds but second module returned error, in that case creds will have to
be freed.

I can get rid of it for now and if in future two LSMs implement this hook,
one can change it, if need be.

Thanks
Vivek

Re: [PATCH 1/7] security, overlayfs: provide copy up security hook for unioned files

2016-07-11 Thread Vivek Goyal

On Mon, Jul 11, 2016 at 11:24:26AM -0400, Stephen Smalley wrote:
> On 07/08/2016 12:19 PM, Vivek Goyal wrote:
> > Provide a security hook to label new file correctly when a file is copied
> > up from lower layer to upper layer of a overlay/union mount.
> > 
> > This hook can prepare a new set of creds which are suitable for new file
> > creation during copy up. Caller will use new creds to create file and then
> > revert back to old creds and release new creds.
> > 
> > Signed-off-by: Vivek Goyal 
> > ---
> >  fs/overlayfs/copy_up.c| 18 ++
> >  include/linux/lsm_hooks.h | 11 +++
> >  include/linux/security.h  |  6 ++
> >  security/security.c   |  8 
> >  4 files changed, 43 insertions(+)
> > 
> > diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
> > index 80aa6f1..8ebea18 100644
> > --- a/fs/overlayfs/copy_up.c
> > +++ b/fs/overlayfs/copy_up.c
> > @@ -246,6 +246,8 @@ static int ovl_copy_up_locked(struct dentry *workdir, 
> > struct dentry *upperdir,
> > struct dentry *upper = NULL;
> > umode_t mode = stat->mode;
> > int err;
> > +   const struct cred *old_creds = NULL;
> > +   struct cred *new_creds = NULL;
> >  
> > newdentry = ovl_lookup_temp(workdir, dentry);
> > err = PTR_ERR(newdentry);
> > @@ -258,10 +260,26 @@ static int ovl_copy_up_locked(struct dentry *workdir, 
> > struct dentry *upperdir,
> > if (IS_ERR(upper))
> > goto out1;
> >  
> > +   err = security_inode_copy_up(dentry, _creds);
> > +   if (err < 0) {
> > +   if (new_creds)
> > +   put_cred(new_creds);
> 
> Why do we need a put_cred() here?

Being paranoid for the case of stacked modules. Say first module allocated
creds but second module returned error, in that case creds will have to
be freed.

I can get rid of it for now and if in future two LSMs implement this hook,
one can change it, if need be.

Thanks
Vivek

[PATCH 1/3] sched,time: count actually elapsed irq & softirq time

2016-07-11 Thread riel

From: Rik van Riel 

Currently, if there was any irq or softirq time during 'ticks'
jiffies, the entire period will be accounted as irq or softirq
time.

This is inaccurate if only a subset of the time was actually spent
handling irqs, and could conceivably mis-count all of the ticks during
a period as irq time, when there was some irq and some softirq time.

This can actually happen when irqtime_account_process_tick is called
from account_idle_ticks, which can pass a larger number of ticks down
all at once.

Fix this by changing irqtime_account_hi_update, irqtime_account_si_update,
and steal_account_process_ticks to work with cputime_t time units, and
return the amount of time spent in each mode.

Rename steal_account_process_ticks to steal_account_process_time, to
reflect that time is now accounted in cputime_t, instead of ticks.

Additionally, have irqtime_account_process_tick take into account how
much time was spent in each of steal, irq, and softirq time.

The latter could help improve the accuracy of cputime
accounting when returning from idle on a NO_HZ_IDLE CPU.

Properly accounting how much time was spent in hardirq and
softirq time will also allow the NO_HZ_FULL code to re-use
these same functions for hardirq and softirq accounting.

Signed-off-by: Rik van Riel 
---
 include/asm-generic/cputime_nsecs.h |   2 +
 kernel/sched/cputime.c  | 124 ++--
 2 files changed, 79 insertions(+), 47 deletions(-)

diff --git a/include/asm-generic/cputime_nsecs.h 
b/include/asm-generic/cputime_nsecs.h
index 0f1c6f315cdc..918ebb01486c 100644
--- a/include/asm-generic/cputime_nsecs.h
+++ b/include/asm-generic/cputime_nsecs.h
@@ -50,6 +50,8 @@ typedef u64 __nocast cputime64_t;
(__force u64)(__ct)
 #define nsecs_to_cputime(__nsecs)  \
(__force cputime_t)(__nsecs)
+#define nsecs_to_cputime64(__nsecs)\
+   (__force cputime_t)(__nsecs)
 
 
 /*
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 3d60e5d76fdb..db82ae12cf01 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -79,40 +79,50 @@ void irqtime_account_irq(struct task_struct *curr)
 }
 EXPORT_SYMBOL_GPL(irqtime_account_irq);
 
-static int irqtime_account_hi_update(void)
+static cputime_t irqtime_account_hi_update(cputime_t maxtime)
 {
u64 *cpustat = kcpustat_this_cpu->cpustat;
unsigned long flags;
-   u64 latest_ns;
-   int ret = 0;
+   cputime_t irq_cputime;
 
local_irq_save(flags);
-   latest_ns = this_cpu_read(cpu_hardirq_time);
-   if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
-   ret = 1;
+   irq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time)) -
+ cpustat[CPUTIME_IRQ];
+   irq_cputime = min(irq_cputime, maxtime);
+   cpustat[CPUTIME_IRQ] += irq_cputime;
local_irq_restore(flags);
-   return ret;
+   return irq_cputime;
 }
 
-static int irqtime_account_si_update(void)
+static cputime_t irqtime_account_si_update(cputime_t maxtime)
 {
u64 *cpustat = kcpustat_this_cpu->cpustat;
unsigned long flags;
-   u64 latest_ns;
-   int ret = 0;
+   cputime_t softirq_cputime;
 
local_irq_save(flags);
-   latest_ns = this_cpu_read(cpu_softirq_time);
-   if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
-   ret = 1;
+   softirq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time)) -
+ cpustat[CPUTIME_SOFTIRQ];
+   softirq_cputime = min(softirq_cputime, maxtime);
+   cpustat[CPUTIME_SOFTIRQ] += softirq_cputime;
local_irq_restore(flags);
-   return ret;
+   return softirq_cputime;
 }
 
 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
 
 #define sched_clock_irqtime(0)
 
+static cputime_t irqtime_account_hi_update(cputime_t dummy)
+{
+   return 0;
+}
+
+static cputime_t irqtime_account_si_update(cputime_t dummy)
+{
+   return 0;
+}
+
 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
 
 static inline void task_group_account_field(struct task_struct *p, int index,
@@ -257,32 +267,45 @@ void account_idle_time(cputime_t cputime)
cpustat[CPUTIME_IDLE] += (__force u64) cputime;
 }
 
-static __always_inline unsigned long steal_account_process_tick(unsigned long 
max_jiffies)
+static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
 {
 #ifdef CONFIG_PARAVIRT
if (static_key_false(_steal_enabled)) {
+   cputime_t steal_cputime;
u64 steal;
-   unsigned long steal_jiffies;
 
steal = paravirt_steal_clock(smp_processor_id());
steal -= this_rq()->prev_steal_time;
 
-   /*
-* steal is in nsecs but our caller is expecting steal
-* time in jiffies. Lets cast the result to jiffies
-* granularity and account the rest on the next rounds.
-

[PATCH 1/3] sched,time: count actually elapsed irq & softirq time

2016-07-11 Thread riel

From: Rik van Riel 

Currently, if there was any irq or softirq time during 'ticks'
jiffies, the entire period will be accounted as irq or softirq
time.

This is inaccurate if only a subset of the time was actually spent
handling irqs, and could conceivably mis-count all of the ticks during
a period as irq time, when there was some irq and some softirq time.

This can actually happen when irqtime_account_process_tick is called
from account_idle_ticks, which can pass a larger number of ticks down
all at once.

Fix this by changing irqtime_account_hi_update, irqtime_account_si_update,
and steal_account_process_ticks to work with cputime_t time units, and
return the amount of time spent in each mode.

Rename steal_account_process_ticks to steal_account_process_time, to
reflect that time is now accounted in cputime_t, instead of ticks.

Additionally, have irqtime_account_process_tick take into account how
much time was spent in each of steal, irq, and softirq time.

The latter could help improve the accuracy of cputime
accounting when returning from idle on a NO_HZ_IDLE CPU.

Properly accounting how much time was spent in hardirq and
softirq time will also allow the NO_HZ_FULL code to re-use
these same functions for hardirq and softirq accounting.

Signed-off-by: Rik van Riel 
---
 include/asm-generic/cputime_nsecs.h |   2 +
 kernel/sched/cputime.c  | 124 ++--
 2 files changed, 79 insertions(+), 47 deletions(-)

diff --git a/include/asm-generic/cputime_nsecs.h 
b/include/asm-generic/cputime_nsecs.h
index 0f1c6f315cdc..918ebb01486c 100644
--- a/include/asm-generic/cputime_nsecs.h
+++ b/include/asm-generic/cputime_nsecs.h
@@ -50,6 +50,8 @@ typedef u64 __nocast cputime64_t;
(__force u64)(__ct)
 #define nsecs_to_cputime(__nsecs)  \
(__force cputime_t)(__nsecs)
+#define nsecs_to_cputime64(__nsecs)\
+   (__force cputime_t)(__nsecs)
 
 
 /*
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 3d60e5d76fdb..db82ae12cf01 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -79,40 +79,50 @@ void irqtime_account_irq(struct task_struct *curr)
 }
 EXPORT_SYMBOL_GPL(irqtime_account_irq);
 
-static int irqtime_account_hi_update(void)
+static cputime_t irqtime_account_hi_update(cputime_t maxtime)
 {
u64 *cpustat = kcpustat_this_cpu->cpustat;
unsigned long flags;
-   u64 latest_ns;
-   int ret = 0;
+   cputime_t irq_cputime;
 
local_irq_save(flags);
-   latest_ns = this_cpu_read(cpu_hardirq_time);
-   if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
-   ret = 1;
+   irq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time)) -
+ cpustat[CPUTIME_IRQ];
+   irq_cputime = min(irq_cputime, maxtime);
+   cpustat[CPUTIME_IRQ] += irq_cputime;
local_irq_restore(flags);
-   return ret;
+   return irq_cputime;
 }
 
-static int irqtime_account_si_update(void)
+static cputime_t irqtime_account_si_update(cputime_t maxtime)
 {
u64 *cpustat = kcpustat_this_cpu->cpustat;
unsigned long flags;
-   u64 latest_ns;
-   int ret = 0;
+   cputime_t softirq_cputime;
 
local_irq_save(flags);
-   latest_ns = this_cpu_read(cpu_softirq_time);
-   if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
-   ret = 1;
+   softirq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time)) -
+ cpustat[CPUTIME_SOFTIRQ];
+   softirq_cputime = min(softirq_cputime, maxtime);
+   cpustat[CPUTIME_SOFTIRQ] += softirq_cputime;
local_irq_restore(flags);
-   return ret;
+   return softirq_cputime;
 }
 
 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
 
 #define sched_clock_irqtime(0)
 
+static cputime_t irqtime_account_hi_update(cputime_t dummy)
+{
+   return 0;
+}
+
+static cputime_t irqtime_account_si_update(cputime_t dummy)
+{
+   return 0;
+}
+
 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
 
 static inline void task_group_account_field(struct task_struct *p, int index,
@@ -257,32 +267,45 @@ void account_idle_time(cputime_t cputime)
cpustat[CPUTIME_IDLE] += (__force u64) cputime;
 }
 
-static __always_inline unsigned long steal_account_process_tick(unsigned long 
max_jiffies)
+static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
 {
 #ifdef CONFIG_PARAVIRT
if (static_key_false(_steal_enabled)) {
+   cputime_t steal_cputime;
u64 steal;
-   unsigned long steal_jiffies;
 
steal = paravirt_steal_clock(smp_processor_id());
steal -= this_rq()->prev_steal_time;
 
-   /*
-* steal is in nsecs but our caller is expecting steal
-* time in jiffies. Lets cast the result to jiffies
-* granularity and account the rest on the next rounds.
-*/
-

Re: [PATCH v3 12/14] regulator: pwm: Retrieve correct voltage

2016-07-11 Thread Doug Anderson

Hi,

On Mon, Jul 11, 2016 at 12:02 AM, Thierry Reding
 wrote:
> On Sat, Jul 09, 2016 at 11:47:18AM +0200, Mark Brown wrote:
>> On Fri, Jul 08, 2016 at 05:43:02PM +0200, Thierry Reding wrote:
>>
>> > Mark, do you want me to provide a stable branch with the PWM regulator
>> > patches and resolve that conflict in your tree? Or would you rather take
>> > the whole set based on a stable branch from the PWM tree? Or maybe yet
>> > another possibility would be to base the PWM tree on a stable branch
>> > from the regulator tree containing the above commit.
>>
>> Probably easiest to use this signed tag and resolve it in your tree:
>>
>> The following changes since commit 1a695a905c18548062509178b98bc91e67510864:
>>
>>   Linux 4.7-rc1 (2016-05-29 09:29:24 -0700)
>>
>> are available in the git repository at:
>>
>>   git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git 
>> tags/pwm-modernization
>>
>> for you to fetch changes up to c2588393e6315ab68207323d37d2a73713d6bc81:
>>
>>   regulator: pwm: Fix regulator ramp delay for continuous mode (2016-07-07 
>> 11:45:06 +0200)
>>
>> 
>> regulator: Provide a branch for moderninzation of the PWM code
>>
>> There's a new, improved PWM API which allows a lot of improvements in
>> the PWM regulator driver.  Since the bulk of the changes are in the PWM
>> API this is being managed in the PWM tree, merge pending regulator API
>> changes to allow this to be resolved more easily.
>>
>> 
>> Alexandre Courbot (1):
>>   regulator: pwm: Support for enable GPIO
>>
>> Boris Brezillon (1):
>>   regulator: pwm: Drop unneeded pwm_enable() call
>>
>> Douglas Anderson (1):
>>   regulator: pwm: Fix regulator ramp delay for continuous mode
>>
>>  .../bindings/regulator/pwm-regulator.txt   |  7 +++-
>>  drivers/regulator/pwm-regulator.c  | 40 
>> ++
>>  2 files changed, 39 insertions(+), 8 deletions(-)
>
> Merged into for-4.8/regulator of the PWM tree and rebased Boris'
> pwm-regulator patches on top.
>
> Boris, everything looks right to me, but can you take a quick look to
> see if it all matches up with what you expect?

As I mentioned in the other thread about the linuxnext conflict,
pwm_regulator_set_voltage() is wrong.

You have:

  ramp_delay = DIV_ROUND_UP(abs(min_uV - old_uV), ramp_delay);

You should have:

  ramp_delay = DIV_ROUND_UP(abs(req_min_uV - old_uV), ramp_delay);

-Doug

Re: [PATCH v3 12/14] regulator: pwm: Retrieve correct voltage

2016-07-11 Thread Doug Anderson

Hi,

On Mon, Jul 11, 2016 at 12:02 AM, Thierry Reding
 wrote:
> On Sat, Jul 09, 2016 at 11:47:18AM +0200, Mark Brown wrote:
>> On Fri, Jul 08, 2016 at 05:43:02PM +0200, Thierry Reding wrote:
>>
>> > Mark, do you want me to provide a stable branch with the PWM regulator
>> > patches and resolve that conflict in your tree? Or would you rather take
>> > the whole set based on a stable branch from the PWM tree? Or maybe yet
>> > another possibility would be to base the PWM tree on a stable branch
>> > from the regulator tree containing the above commit.
>>
>> Probably easiest to use this signed tag and resolve it in your tree:
>>
>> The following changes since commit 1a695a905c18548062509178b98bc91e67510864:
>>
>>   Linux 4.7-rc1 (2016-05-29 09:29:24 -0700)
>>
>> are available in the git repository at:
>>
>>   git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git 
>> tags/pwm-modernization
>>
>> for you to fetch changes up to c2588393e6315ab68207323d37d2a73713d6bc81:
>>
>>   regulator: pwm: Fix regulator ramp delay for continuous mode (2016-07-07 
>> 11:45:06 +0200)
>>
>> 
>> regulator: Provide a branch for moderninzation of the PWM code
>>
>> There's a new, improved PWM API which allows a lot of improvements in
>> the PWM regulator driver.  Since the bulk of the changes are in the PWM
>> API this is being managed in the PWM tree, merge pending regulator API
>> changes to allow this to be resolved more easily.
>>
>> 
>> Alexandre Courbot (1):
>>   regulator: pwm: Support for enable GPIO
>>
>> Boris Brezillon (1):
>>   regulator: pwm: Drop unneeded pwm_enable() call
>>
>> Douglas Anderson (1):
>>   regulator: pwm: Fix regulator ramp delay for continuous mode
>>
>>  .../bindings/regulator/pwm-regulator.txt   |  7 +++-
>>  drivers/regulator/pwm-regulator.c  | 40 
>> ++
>>  2 files changed, 39 insertions(+), 8 deletions(-)
>
> Merged into for-4.8/regulator of the PWM tree and rebased Boris'
> pwm-regulator patches on top.
>
> Boris, everything looks right to me, but can you take a quick look to
> see if it all matches up with what you expect?

As I mentioned in the other thread about the linuxnext conflict,
pwm_regulator_set_voltage() is wrong.

You have:

  ramp_delay = DIV_ROUND_UP(abs(min_uV - old_uV), ramp_delay);

You should have:

  ramp_delay = DIV_ROUND_UP(abs(req_min_uV - old_uV), ramp_delay);

-Doug

[PATCH -next] bpf: make inode code explicitly non-modular

2016-07-11 Thread Paul Gortmaker

The Kconfig currently controlling compilation of this code is:

init/Kconfig:config BPF_SYSCALL
init/Kconfig:   bool "Enable bpf() system call"

...meaning that it currently is not being built as a module by anyone.

Lets remove the couple traces of modular infrastructure use, so that
when reading the driver there is no doubt it is builtin-only.

Note that MODULE_ALIAS is a no-op for non-modular code.

We replace module.h with init.h since the file does use __init.

Cc: Alexei Starovoitov 
Cc: net...@vger.kernel.org
Signed-off-by: Paul Gortmaker 
---
 kernel/bpf/inode.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 318858edb1cd..5967b870a895 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -11,7 +11,7 @@
  * version 2 as published by the Free Software Foundation.
  */
 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -367,8 +367,6 @@ static struct file_system_type bpf_fs_type = {
.kill_sb= kill_litter_super,
 };
 
-MODULE_ALIAS_FS("bpf");
-
 static int __init bpf_init(void)
 {
int ret;
-- 
2.8.4

[PATCH -next] bpf: make inode code explicitly non-modular

2016-07-11 Thread Paul Gortmaker

The Kconfig currently controlling compilation of this code is:

init/Kconfig:config BPF_SYSCALL
init/Kconfig:   bool "Enable bpf() system call"

...meaning that it currently is not being built as a module by anyone.

Lets remove the couple traces of modular infrastructure use, so that
when reading the driver there is no doubt it is builtin-only.

Note that MODULE_ALIAS is a no-op for non-modular code.

We replace module.h with init.h since the file does use __init.

Cc: Alexei Starovoitov 
Cc: net...@vger.kernel.org
Signed-off-by: Paul Gortmaker 
---
 kernel/bpf/inode.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 318858edb1cd..5967b870a895 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -11,7 +11,7 @@
  * version 2 as published by the Free Software Foundation.
  */
 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -367,8 +367,6 @@ static struct file_system_type bpf_fs_type = {
.kill_sb= kill_litter_super,
 };
 
-MODULE_ALIAS_FS("bpf");
-
 static int __init bpf_init(void)
 {
int ret;
-- 
2.8.4

Re: [PATCH 7/7] ARM: dts: NSP: Add bgmac entries

2016-07-11 Thread Florian Fainelli

On 07/08/2016 08:56 AM, Jon Mason wrote:
> Add device tree entries for the ethernet devices present on the
> Broadcom Northstar Plus SoCs
> 
> Signed-off-by: Jon Mason 

Applied to devicetree/next, with s/bgmac/AMAC/ in the subject, thanks!
-- 
Florian

Re: [PATCH 7/7] ARM: dts: NSP: Add bgmac entries

2016-07-11 Thread Florian Fainelli

On 07/08/2016 08:56 AM, Jon Mason wrote:
> Add device tree entries for the ethernet devices present on the
> Broadcom Northstar Plus SoCs
> 
> Signed-off-by: Jon Mason 

Applied to devicetree/next, with s/bgmac/AMAC/ in the subject, thanks!
-- 
Florian

Re: [PATCH v2 6/6] dt-bindings: net: bgmac: add bindings documentation for bgmac

2016-07-11 Thread Florian Fainelli

On 07/07/2016 04:08 PM, Jon Mason wrote:
> Signed-off-by: Jon Mason 

Applied to devicetree/next, thanks
-- 
Florian

Re: [PATCH v2 6/6] dt-bindings: net: bgmac: add bindings documentation for bgmac

2016-07-11 Thread Florian Fainelli

On 07/07/2016 04:08 PM, Jon Mason wrote:
> Signed-off-by: Jon Mason 

Applied to devicetree/next, thanks
-- 
Florian

Re: linux-next: manual merge of the pwm tree with the regulator tree

2016-07-11 Thread Doug Anderson

Hi,

On Sun, Jul 10, 2016 at 11:56 PM, Stephen Rothwell  
wrote:
> Hi Thierry,
>
> Today's linux-next merge of the pwm tree got a conflict in:
>
>   drivers/regulator/pwm-regulator.c
>
> between commit:
>
>   830583004e61 ("regulator: pwm: Drop unneeded pwm_enable() call")
>   27bfa8893b15 ("regulator: pwm: Support for enable GPIO")
>   c2588393e631 ("regulator: pwm: Fix regulator ramp delay for continuous 
> mode")
>
> from the regulator tree and commit:
>
>   b0303deaa480 ("regulator: pwm: Adjust PWM config at probe time")
>   8bd57ca236d0 ("regulator: pwm: Switch to the atomic PWM API")
>   25d16595935b ("regulator: pwm: Retrieve correct voltage")
>   53f239af4c14 ("regulator: pwm: Support extra continuous mode cases")
>
> from the pwm tree.
>
> I fixed it up (I think, please check - see below) and can carry the fix
> as necessary. This is now fixed as far as linux-next is concerned, but
> any non trivial conflicts should be mentioned to your upstream maintainer
> when your tree is submitted for merging.  You may also want to consider
> cooperating with the maintainer of the conflicting tree to minimise any
> particularly complex conflicts.
>
> --
> Cheers,
> Stephen Rothwell

[ cut ]

>  -  /* Delay required by PWM regulator to settle to the new voltage */
>  -  usleep_range(ramp_delay, ramp_delay + 1000);
>  +  /* Ramp delay is in uV/uS. Adjust to uS and delay */
>  +  ramp_delay = DIV_ROUND_UP(abs(min_uV - old_uV), ramp_delay);

This was what I was worried about and why I originally sent my patch
based upon Boris's series.  The above should be:

ramp_delay = DIV_ROUND_UP(abs(req_min_uV - old_uV), ramp_delay);

Specifically note the use of "req_min_uV" and not "min_uV".


-Doug

Re: linux-next: manual merge of the pwm tree with the regulator tree

2016-07-11 Thread Doug Anderson

Hi,

On Sun, Jul 10, 2016 at 11:56 PM, Stephen Rothwell  
wrote:
> Hi Thierry,
>
> Today's linux-next merge of the pwm tree got a conflict in:
>
>   drivers/regulator/pwm-regulator.c
>
> between commit:
>
>   830583004e61 ("regulator: pwm: Drop unneeded pwm_enable() call")
>   27bfa8893b15 ("regulator: pwm: Support for enable GPIO")
>   c2588393e631 ("regulator: pwm: Fix regulator ramp delay for continuous 
> mode")
>
> from the regulator tree and commit:
>
>   b0303deaa480 ("regulator: pwm: Adjust PWM config at probe time")
>   8bd57ca236d0 ("regulator: pwm: Switch to the atomic PWM API")
>   25d16595935b ("regulator: pwm: Retrieve correct voltage")
>   53f239af4c14 ("regulator: pwm: Support extra continuous mode cases")
>
> from the pwm tree.
>
> I fixed it up (I think, please check - see below) and can carry the fix
> as necessary. This is now fixed as far as linux-next is concerned, but
> any non trivial conflicts should be mentioned to your upstream maintainer
> when your tree is submitted for merging.  You may also want to consider
> cooperating with the maintainer of the conflicting tree to minimise any
> particularly complex conflicts.
>
> --
> Cheers,
> Stephen Rothwell

[ cut ]

>  -  /* Delay required by PWM regulator to settle to the new voltage */
>  -  usleep_range(ramp_delay, ramp_delay + 1000);
>  +  /* Ramp delay is in uV/uS. Adjust to uS and delay */
>  +  ramp_delay = DIV_ROUND_UP(abs(min_uV - old_uV), ramp_delay);

This was what I was worried about and why I originally sent my patch
based upon Boris's series.  The above should be:

ramp_delay = DIV_ROUND_UP(abs(req_min_uV - old_uV), ramp_delay);

Specifically note the use of "req_min_uV" and not "min_uV".


-Doug

Re: [PATCH v7 3/4] perf: xgene: Add APM X-Gene SoC Performance Monitoring Unit driver

2016-07-11 Thread Tai Tri Nguyen

Hi Mark,

On Mon, Jul 11, 2016 at 4:39 AM, Mark Rutland  wrote:
> Please add some commit messge text, e.g.
>
> This patch adds a driver for the SoC-wide (AKA uncore) PMU hardware
> found in APM X-Gene SoCs.
>
> On Wed, Jul 06, 2016 at 05:07:24PM -0700, Tai Nguyen wrote:
>> Signed-off-by: Tai Nguyen 
>
> Modulo that, and Paul's comments:
>
> Reviewed-by: Mark Rutland 

Thanks a lot. I'll fix it.

Regards,
Tai

[...]
>
> Thanks,
> Mark.
>
>> ---
>>  Documentation/perf/xgene-pmu.txt |   48 ++
>>  drivers/perf/Kconfig |7 +
>>  drivers/perf/Makefile|1 +
>>  drivers/perf/xgene_pmu.c | 1398 
>> ++
>>  4 files changed, 1454 insertions(+)
>>  create mode 100644 Documentation/perf/xgene-pmu.txt
>>  create mode 100644 drivers/perf/xgene_pmu.c
>>
>> diff --git a/Documentation/perf/xgene-pmu.txt 
>> b/Documentation/perf/xgene-pmu.txt
>> new file mode 100644
>> index 000..d7cff44
>> --- /dev/null
>> +++ b/Documentation/perf/xgene-pmu.txt
>> @@ -0,0 +1,48 @@
>> +APM X-Gene SoC Performance Monitoring Unit (PMU)
>> +
>> +
>> +X-Gene SoC PMU consists of various independent system device PMUs such as
>> +L3 cache(s), I/O bridge(s), memory controller bridge(s) and memory
>> +controller(s). These PMU devices are loosely architected to follow the
>> +same model as the PMU for ARM cores. The PMUs share the same top level
>> +interrupt and status CSR region.
>> +
>> +PMU (perf) driver
>> +-
>> +
>> +The xgene-pmu driver registers several perf PMU drivers. Each of the perf
>> +driver provides description of its available events and configuration 
>> options
>> +in sysfs, see /sys/devices//.
>> +
>> +The "format" directory describes format of the config (event ID),
>> +config1 (agent ID) fields of the perf_event_attr structure. The "events"
>> +directory provides configuration templates for all supported event types 
>> that
>> +can be used with perf tool. For example, "l3c0/bank-fifo-full/" is an
>> +equivalent of "l3c0/config=0x0b/".
>> +
>> +Most of the SoC PMU has a specific list of agent ID used for monitoring
>> +performance of a specific datapath. For example, agents of a L3 cache can be
>> +a specific CPU or an I/O bridge. Each PMU has a set of 2 registers capable 
>> of
>> +masking the agents from which the request come from. If the bit with
>> +the bit number corresponding to the agent is set, the event is counted only 
>> if
>> +it is caused by a request from that agent. Each agent ID bit is inversely 
>> mapped
>> +to a corresponding bit in "config1" field. By default, the event will be
>> +counted for all agent requests (config1 = 0x0). For all the supported 
>> agents of
>> +each PMU, please refer to APM X-Gene User Manual.
>> +
>> +Each perf driver also provides a "cpumask" sysfs attribute, which contains a
>> +single CPU ID of the processor which will be used to handle all the PMU 
>> events.
>> +
>> +Example for perf tool use:
>> +
>> + / # perf list | grep -e l3c -e iob -e mcb -e mc
>> +   l3c0/ackq-full/[Kernel PMU event]
>> + <...>
>> +   mcb1/mcb-csw-stall/[Kernel PMU event]
>> +
>> + / # perf stat -a -e l3c0/read-miss/,mcb1/csw-write-request/ sleep 1
>> +
>> + / # perf stat -a -e l3c0/read-miss,config1=0xfffe/ sleep 1
>> +
>> +The driver does not support sampling, therefore "perf record" will
>> +not work. Per-task (without "-a") perf sessions are not supported.
>> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
>> index 04e2653..4d5c5f9 100644
>> --- a/drivers/perf/Kconfig
>> +++ b/drivers/perf/Kconfig
>> @@ -12,4 +12,11 @@ config ARM_PMU
>> Say y if you want to use CPU performance monitors on ARM-based
>> systems.
>>
>> +config XGENE_PMU
>> +depends on PERF_EVENTS && ARCH_XGENE
>> +bool "APM X-Gene SoC PMU"
>> +default n
>> +help
>> +  Say y if you want to use APM X-Gene SoC performance monitors.
>> +
>>  endmenu
>> diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
>> index acd2397..b116e98 100644
>> --- a/drivers/perf/Makefile
>> +++ b/drivers/perf/Makefile
>> @@ -1 +1,2 @@
>>  obj-$(CONFIG_ARM_PMU) += arm_pmu.o
>> +obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
>> diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
>> new file mode 100644
>> index 000..907a6cc
>> --- /dev/null
>> +++ b/drivers/perf/xgene_pmu.c
>> @@ -0,0 +1,1398 @@
>> +/*
>> + * APM X-Gene SoC PMU (Performance Monitor Unit)
>> + *
>> + * Copyright (c) 2016, Applied Micro Circuits Corporation
>> + * Author: Hoan Tran 
>> + * Tai Nguyen 
>> + *
>> + * This program is free software; you can redistribute  it and/or modify it
>> + * under  the terms of  the GNU General  Public License as published by the
>> + * Free Software Foundation;

Re: [PATCH v7 3/4] perf: xgene: Add APM X-Gene SoC Performance Monitoring Unit driver

2016-07-11 Thread Tai Tri Nguyen

Hi Mark,

On Mon, Jul 11, 2016 at 4:39 AM, Mark Rutland  wrote:
> Please add some commit messge text, e.g.
>
> This patch adds a driver for the SoC-wide (AKA uncore) PMU hardware
> found in APM X-Gene SoCs.
>
> On Wed, Jul 06, 2016 at 05:07:24PM -0700, Tai Nguyen wrote:
>> Signed-off-by: Tai Nguyen 
>
> Modulo that, and Paul's comments:
>
> Reviewed-by: Mark Rutland 

Thanks a lot. I'll fix it.

Regards,
Tai

[...]
>
> Thanks,
> Mark.
>
>> ---
>>  Documentation/perf/xgene-pmu.txt |   48 ++
>>  drivers/perf/Kconfig |7 +
>>  drivers/perf/Makefile|1 +
>>  drivers/perf/xgene_pmu.c | 1398 
>> ++
>>  4 files changed, 1454 insertions(+)
>>  create mode 100644 Documentation/perf/xgene-pmu.txt
>>  create mode 100644 drivers/perf/xgene_pmu.c
>>
>> diff --git a/Documentation/perf/xgene-pmu.txt 
>> b/Documentation/perf/xgene-pmu.txt
>> new file mode 100644
>> index 000..d7cff44
>> --- /dev/null
>> +++ b/Documentation/perf/xgene-pmu.txt
>> @@ -0,0 +1,48 @@
>> +APM X-Gene SoC Performance Monitoring Unit (PMU)
>> +
>> +
>> +X-Gene SoC PMU consists of various independent system device PMUs such as
>> +L3 cache(s), I/O bridge(s), memory controller bridge(s) and memory
>> +controller(s). These PMU devices are loosely architected to follow the
>> +same model as the PMU for ARM cores. The PMUs share the same top level
>> +interrupt and status CSR region.
>> +
>> +PMU (perf) driver
>> +-
>> +
>> +The xgene-pmu driver registers several perf PMU drivers. Each of the perf
>> +driver provides description of its available events and configuration 
>> options
>> +in sysfs, see /sys/devices//.
>> +
>> +The "format" directory describes format of the config (event ID),
>> +config1 (agent ID) fields of the perf_event_attr structure. The "events"
>> +directory provides configuration templates for all supported event types 
>> that
>> +can be used with perf tool. For example, "l3c0/bank-fifo-full/" is an
>> +equivalent of "l3c0/config=0x0b/".
>> +
>> +Most of the SoC PMU has a specific list of agent ID used for monitoring
>> +performance of a specific datapath. For example, agents of a L3 cache can be
>> +a specific CPU or an I/O bridge. Each PMU has a set of 2 registers capable 
>> of
>> +masking the agents from which the request come from. If the bit with
>> +the bit number corresponding to the agent is set, the event is counted only 
>> if
>> +it is caused by a request from that agent. Each agent ID bit is inversely 
>> mapped
>> +to a corresponding bit in "config1" field. By default, the event will be
>> +counted for all agent requests (config1 = 0x0). For all the supported 
>> agents of
>> +each PMU, please refer to APM X-Gene User Manual.
>> +
>> +Each perf driver also provides a "cpumask" sysfs attribute, which contains a
>> +single CPU ID of the processor which will be used to handle all the PMU 
>> events.
>> +
>> +Example for perf tool use:
>> +
>> + / # perf list | grep -e l3c -e iob -e mcb -e mc
>> +   l3c0/ackq-full/[Kernel PMU event]
>> + <...>
>> +   mcb1/mcb-csw-stall/[Kernel PMU event]
>> +
>> + / # perf stat -a -e l3c0/read-miss/,mcb1/csw-write-request/ sleep 1
>> +
>> + / # perf stat -a -e l3c0/read-miss,config1=0xfffe/ sleep 1
>> +
>> +The driver does not support sampling, therefore "perf record" will
>> +not work. Per-task (without "-a") perf sessions are not supported.
>> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
>> index 04e2653..4d5c5f9 100644
>> --- a/drivers/perf/Kconfig
>> +++ b/drivers/perf/Kconfig
>> @@ -12,4 +12,11 @@ config ARM_PMU
>> Say y if you want to use CPU performance monitors on ARM-based
>> systems.
>>
>> +config XGENE_PMU
>> +depends on PERF_EVENTS && ARCH_XGENE
>> +bool "APM X-Gene SoC PMU"
>> +default n
>> +help
>> +  Say y if you want to use APM X-Gene SoC performance monitors.
>> +
>>  endmenu
>> diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
>> index acd2397..b116e98 100644
>> --- a/drivers/perf/Makefile
>> +++ b/drivers/perf/Makefile
>> @@ -1 +1,2 @@
>>  obj-$(CONFIG_ARM_PMU) += arm_pmu.o
>> +obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
>> diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
>> new file mode 100644
>> index 000..907a6cc
>> --- /dev/null
>> +++ b/drivers/perf/xgene_pmu.c
>> @@ -0,0 +1,1398 @@
>> +/*
>> + * APM X-Gene SoC PMU (Performance Monitor Unit)
>> + *
>> + * Copyright (c) 2016, Applied Micro Circuits Corporation
>> + * Author: Hoan Tran 
>> + * Tai Nguyen 
>> + *
>> + * This program is free software; you can redistribute  it and/or modify it
>> + * under  the terms of  the GNU General  Public License as published by the
>> + * Free Software Foundation;  either version 2 of the  License, or (at your
>> + * option) any later version.
>> + *
>> + * This

Re: Linux 4.6.4

2016-07-11 Thread Greg KH

diff --git a/Makefile b/Makefile
index c62b531d5a85..cd374426114a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 6
-SUBLEVEL = 3
+SUBLEVEL = 4
 EXTRAVERSION =
 NAME = Charred Weasel
 
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 43fe85f20d57..7097a3395b25 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -455,6 +455,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
[CRYPTO_MSG_NEWALG  - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
[CRYPTO_MSG_DELALG  - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
[CRYPTO_MSG_UPDATEALG   - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+   [CRYPTO_MSG_GETALG  - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
[CRYPTO_MSG_DELRNG  - CRYPTO_MSG_BASE] = 0,
 };
 
diff --git a/drivers/crypto/ux500/hash/hash_core.c 
b/drivers/crypto/ux500/hash/hash_core.c
index 574e87c7f2b8..9acccad26928 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -781,7 +781,7 @@ static int hash_process_data(struct hash_device_data 
*device_data,
_data->state);
memmove(req_ctx->state.buffer,
device_data->state.buffer,
-   HASH_BLOCK_SIZE / sizeof(u32));
+   HASH_BLOCK_SIZE);
if (ret) {
dev_err(device_data->dev,
"%s: hash_resume_state() 
failed!\n",
@@ -832,7 +832,7 @@ static int hash_process_data(struct hash_device_data 
*device_data,
 
memmove(device_data->state.buffer,
req_ctx->state.buffer,
-   HASH_BLOCK_SIZE / sizeof(u32));
+   HASH_BLOCK_SIZE);
if (ret) {
dev_err(device_data->dev, "%s: 
hash_save_state() failed!\n",
__func__);
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 495577b6d31b..94ad5c0adbcb 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = {
.cra_name = "cbc(aes)",
.cra_driver_name = "p8_aes_cbc",
.cra_module = THIS_MODULE,
-   .cra_priority = 1000,
+   .cra_priority = 2000,
.cra_type = _blkcipher_type,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
.cra_alignmask = 0,
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index 0a3c1b04cf3c..38ed10d761d0 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = {
.cra_name = "ctr(aes)",
.cra_driver_name = "p8_aes_ctr",
.cra_module = THIS_MODULE,
-   .cra_priority = 1000,
+   .cra_priority = 2000,
.cra_type = _blkcipher_type,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
.cra_alignmask = 0,
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 6dc810bce295..944a6dca0fcb 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -44,6 +44,9 @@ static const struct usb_device_id usb_quirk_list[] = {
/* Creative SB Audigy 2 NX */
{ USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME },
 
+   /* USB3503 */
+   { USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
+
/* Microsoft Wireless Laser Mouse 6000 Receiver */
{ USB_DEVICE(0x045e, 0x00e1), .driver_info = USB_QUIRK_RESET_RESUME },
 
@@ -173,6 +176,10 @@ static const struct usb_device_id usb_quirk_list[] = {
/* MAYA44USB sound device */
{ USB_DEVICE(0x0a92, 0x0091), .driver_info = USB_QUIRK_RESET_RESUME },
 
+   /* ASUS Base Station(T100) */
+   { USB_DEVICE(0x0b05, 0x17e0), .driver_info =
+   USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+
/* Action Semiconductor flash disk */
{ USB_DEVICE(0x10d6, 0x2200), .driver_info =
USB_QUIRK_STRING_FETCH_255 },
@@ -188,26 +195,22 @@ static const struct usb_device_id usb_quirk_list[] = {
{ USB_DEVICE(0x1908, 0x1315), .driver_info =
USB_QUIRK_HONOR_BNUMINTERFACES },
 
-   /* INTEL VALUE SSD */
-   { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
-
-   /* USB3503 */
-   { USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
-
-   /* ASUS Base Station(T100) */
-   { USB_DEVICE(0x0b05, 0x17e0), .driver_info =
-   USB_QUIRK_IGNORE_REMOTE_WAKEUP },
-
/* Protocol and OTG Electrical Test Device */
{ USB_DEVICE(0x1a0a, 0x0200),

Re: Linux 4.6.4

2016-07-11 Thread Greg KH

diff --git a/Makefile b/Makefile
index c62b531d5a85..cd374426114a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 6
-SUBLEVEL = 3
+SUBLEVEL = 4
 EXTRAVERSION =
 NAME = Charred Weasel
 
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 43fe85f20d57..7097a3395b25 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -455,6 +455,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
[CRYPTO_MSG_NEWALG  - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
[CRYPTO_MSG_DELALG  - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
[CRYPTO_MSG_UPDATEALG   - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+   [CRYPTO_MSG_GETALG  - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
[CRYPTO_MSG_DELRNG  - CRYPTO_MSG_BASE] = 0,
 };
 
diff --git a/drivers/crypto/ux500/hash/hash_core.c 
b/drivers/crypto/ux500/hash/hash_core.c
index 574e87c7f2b8..9acccad26928 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -781,7 +781,7 @@ static int hash_process_data(struct hash_device_data 
*device_data,
_data->state);
memmove(req_ctx->state.buffer,
device_data->state.buffer,
-   HASH_BLOCK_SIZE / sizeof(u32));
+   HASH_BLOCK_SIZE);
if (ret) {
dev_err(device_data->dev,
"%s: hash_resume_state() 
failed!\n",
@@ -832,7 +832,7 @@ static int hash_process_data(struct hash_device_data 
*device_data,
 
memmove(device_data->state.buffer,
req_ctx->state.buffer,
-   HASH_BLOCK_SIZE / sizeof(u32));
+   HASH_BLOCK_SIZE);
if (ret) {
dev_err(device_data->dev, "%s: 
hash_save_state() failed!\n",
__func__);
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 495577b6d31b..94ad5c0adbcb 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = {
.cra_name = "cbc(aes)",
.cra_driver_name = "p8_aes_cbc",
.cra_module = THIS_MODULE,
-   .cra_priority = 1000,
+   .cra_priority = 2000,
.cra_type = _blkcipher_type,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
.cra_alignmask = 0,
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index 0a3c1b04cf3c..38ed10d761d0 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = {
.cra_name = "ctr(aes)",
.cra_driver_name = "p8_aes_ctr",
.cra_module = THIS_MODULE,
-   .cra_priority = 1000,
+   .cra_priority = 2000,
.cra_type = _blkcipher_type,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
.cra_alignmask = 0,
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 6dc810bce295..944a6dca0fcb 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -44,6 +44,9 @@ static const struct usb_device_id usb_quirk_list[] = {
/* Creative SB Audigy 2 NX */
{ USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME },
 
+   /* USB3503 */
+   { USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
+
/* Microsoft Wireless Laser Mouse 6000 Receiver */
{ USB_DEVICE(0x045e, 0x00e1), .driver_info = USB_QUIRK_RESET_RESUME },
 
@@ -173,6 +176,10 @@ static const struct usb_device_id usb_quirk_list[] = {
/* MAYA44USB sound device */
{ USB_DEVICE(0x0a92, 0x0091), .driver_info = USB_QUIRK_RESET_RESUME },
 
+   /* ASUS Base Station(T100) */
+   { USB_DEVICE(0x0b05, 0x17e0), .driver_info =
+   USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+
/* Action Semiconductor flash disk */
{ USB_DEVICE(0x10d6, 0x2200), .driver_info =
USB_QUIRK_STRING_FETCH_255 },
@@ -188,26 +195,22 @@ static const struct usb_device_id usb_quirk_list[] = {
{ USB_DEVICE(0x1908, 0x1315), .driver_info =
USB_QUIRK_HONOR_BNUMINTERFACES },
 
-   /* INTEL VALUE SSD */
-   { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
-
-   /* USB3503 */
-   { USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
-
-   /* ASUS Base Station(T100) */
-   { USB_DEVICE(0x0b05, 0x17e0), .driver_info =
-   USB_QUIRK_IGNORE_REMOTE_WAKEUP },
-
/* Protocol and OTG Electrical Test Device */
{ USB_DEVICE(0x1a0a, 0x0200),

Re: Linux 4.4.15

2016-07-11 Thread Greg KH

diff --git a/Makefile b/Makefile
index fadbb9d73c6d..979088079338 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 14
+SUBLEVEL = 15
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 43fe85f20d57..7097a3395b25 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -455,6 +455,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
[CRYPTO_MSG_NEWALG  - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
[CRYPTO_MSG_DELALG  - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
[CRYPTO_MSG_UPDATEALG   - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+   [CRYPTO_MSG_GETALG  - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
[CRYPTO_MSG_DELRNG  - CRYPTO_MSG_BASE] = 0,
 };
 
diff --git a/drivers/crypto/ux500/hash/hash_core.c 
b/drivers/crypto/ux500/hash/hash_core.c
index 66b1c3313e2e..cd4398498495 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -797,7 +797,7 @@ static int hash_process_data(struct hash_device_data 
*device_data,
_data->state);
memmove(req_ctx->state.buffer,
device_data->state.buffer,
-   HASH_BLOCK_SIZE / sizeof(u32));
+   HASH_BLOCK_SIZE);
if (ret) {
dev_err(device_data->dev,
"%s: hash_resume_state() 
failed!\n",
@@ -848,7 +848,7 @@ static int hash_process_data(struct hash_device_data 
*device_data,
 
memmove(device_data->state.buffer,
req_ctx->state.buffer,
-   HASH_BLOCK_SIZE / sizeof(u32));
+   HASH_BLOCK_SIZE);
if (ret) {
dev_err(device_data->dev, "%s: 
hash_save_state() failed!\n",
__func__);
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 0b8fe2ec5315..f3801b983f42 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = {
.cra_name = "cbc(aes)",
.cra_driver_name = "p8_aes_cbc",
.cra_module = THIS_MODULE,
-   .cra_priority = 1000,
+   .cra_priority = 2000,
.cra_type = _blkcipher_type,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
.cra_alignmask = 0,
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index ee1306cd8f59..404a1b69a3ab 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = {
.cra_name = "ctr(aes)",
.cra_driver_name = "p8_aes_ctr",
.cra_module = THIS_MODULE,
-   .cra_priority = 1000,
+   .cra_priority = 2000,
.cra_type = _blkcipher_type,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
.cra_alignmask = 0,
diff --git a/drivers/net/ethernet/atheros/alx/main.c 
b/drivers/net/ethernet/atheros/alx/main.c
index bd377a6b067d..df54475d163b 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -86,9 +86,14 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t 
gfp)
while (!cur_buf->skb && next != rxq->read_idx) {
struct alx_rfd *rfd = >rfd[cur];
 
-   skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp);
+   skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size + 64, gfp);
if (!skb)
break;
+
+   /* Workround for the HW RX DMA overflow issue */
+   if (((unsigned long)skb->data & 0xfff) == 0xfc0)
+   skb_reserve(skb, 64);
+
dma = dma_map_single(>hw.pdev->dev,
 skb->data, alx->rxbuf_size,
 DMA_FROM_DEVICE);
diff --git a/drivers/net/ethernet/cadence/macb.c 
b/drivers/net/ethernet/cadence/macb.c
index 169059c92f80..8d54e7b41bbf 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -2405,9 +2405,9 @@ static int macb_init(struct platform_device *pdev)
if (bp->phy_interface == PHY_INTERFACE_MODE_RGMII)
val = GEM_BIT(RGMII);
else if (bp->phy_interface == PHY_INTERFACE_MODE_RMII &&
-(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII))
+(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII))
val = MACB_BIT(RMII);
-   else if (!(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII))
+   else if (!(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII))
val =

Linux 4.6.4

2016-07-11 Thread Greg KH

I'm announcing the release of the 4.6.4 kernel.

All users of the 4.6 kernel series must upgrade.

The updated 4.6.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git 
linux-4.6.y
and can be browsed at the normal kernel.org git web browser:

http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary

thanks,

greg k-h



 Makefile  |2 -
 crypto/crypto_user.c  |1 
 drivers/crypto/ux500/hash/hash_core.c |4 +-
 drivers/crypto/vmx/aes_cbc.c  |2 -
 drivers/crypto/vmx/aes_ctr.c  |2 -
 drivers/usb/core/quirks.c |   23 ---
 drivers/usb/dwc3/dwc3-exynos.c|   19 +++-
 drivers/usb/gadget/legacy/inode.c |   17 ---
 drivers/usb/host/ehci-tegra.c |2 -
 drivers/usb/host/xhci-pci.c   |5 +++
 drivers/usb/host/xhci-plat.c  |3 +
 drivers/usb/host/xhci-ring.c  |   30 +++
 drivers/usb/host/xhci.c   |   27 +
 drivers/usb/musb/musb_core.c  |3 +
 drivers/usb/musb/musb_host.c  |   23 +--
 drivers/usb/serial/mos7720.c  |1 
 drivers/usb/storage/uas.c |1 
 include/linux/bpf.h   |4 ++
 include/linux/net.h   |3 +
 include/linux/sock_diag.h |6 +++
 kernel/events/core.c  |2 -
 net/ax25/af_ax25.c|3 +
 net/ax25/ax25_ds_timer.c  |5 ++-
 net/ax25/ax25_std_timer.c |5 ++-
 net/ax25/ax25_subr.c  |3 +
 net/bridge/br_multicast.c |4 ++
 net/bridge/br_private.h   |   23 ---
 net/core/neighbour.c  |6 +++
 net/ipv4/esp4.c   |   52 --
 net/ipv4/ipmr.c   |4 +-
 net/ipv6/ip6mr.c  |1 
 net/ipv6/sit.c|4 +-
 net/kcm/kcmproc.c |1 
 net/sched/act_ipt.c   |7 +++-
 net/sched/sch_fifo.c  |4 ++
 net/sched/sch_netem.c |   12 +++
 36 files changed, 216 insertions(+), 98 deletions(-)

Andrew Goodbody (2):
  usb: musb: Stop bulk endpoint while queue is rotated
  usb: musb: Ensure rx reinit occurs for shared_fifo endpoints

Anton Blanchard (1):
  crypto: vmx - Increase priority of aes-cbc cipher

Basil Gunn (1):
  AX.25: Close socket connection on session completion

Bin Liu (3):
  usb: musb: only restore devctl when session was set in backup
  usb: musb: host: correct cppi dma channel for isoch transfer
  usb: gadget: fix spinlock dead lock in gadgetfs

Daniel Borkmann (1):
  bpf, perf: delay release of BPF prog after grace period

David Barroso (1):
  neigh: Explicitly declare RCU-bh read side critical section in 
neigh_xmit()

Eric Dumazet (2):
  net_sched: fix pfifo_head_drop behavior vs backlog
  netem: fix a use after free

Gabriel Krisman Bertazi (1):
  xhci: Cleanup only when releasing primary hcd

Greg Kroah-Hartman (1):
  Linux 4.6.4

Hans de Goede (4):
  USB: uas: Fix slave queue_depth not being set
  usb: quirks: Fix sorting
  usb: quirks: Add no-lpm quirk for Acer C120 LED Projector
  USB: xhci: Add broken streams quirk for Frescologic device id 1009

Herbert Xu (1):
  esp: Fix ESN generation under UDP encapsulation

Jason A. Donenfeld (1):
  net: Don't forget pr_fmt on net_dbg_ratelimited for CONFIG_DYNAMIC_DEBUG

Jiri Slaby (1):
  kcm: fix /proc memory leak

Linus Walleij (1):
  crypto: ux500 - memmove the right size

Mathias Krause (1):
  crypto: user - re-add size check for CRYPTO_MSG_GETALG

Mathias Nyman (1):
  xhci: Fix handling timeouted commands on hosts in weird states.

Simon Horman (1):
  sit: correct IP protocol used in ipip6_err

Steinar H. Gunderson (1):
  usb: dwc3: exynos: Fix deferred probing storm.

Sudip Mukherjee (1):
  USB: mos7720: delete parport

Thierry Reding (1):
  usb: host: ehci-tegra: Grab the correct UTMI pads reset

Thomas Petazzoni (1):
  usb: xhci-plat: properly handle probe deferral for devm_clk_get()

Tom Goff (1):
  ipmr/ip6mr: Initialize the last assert time of mfc entries.

WANG Cong (1):
  act_ipt: fix a bind refcnt leak

Willem de Bruijn (1):
  sock_diag: do not broadcast raw socket destruction

daniel (1):
  Bridge: Fix ipv6 mc snooping if bridge has no ipv6 address



signature.asc
Description: PGP signature

Linux 4.4.15

2016-07-11 Thread Greg KH

I'm announcing the release of the 4.4.15 kernel.

All users of the 4.4 kernel series must upgrade.

The updated 4.4.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git 
linux-4.4.y
and can be browsed at the normal kernel.org git web browser:

http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary

thanks,

greg k-h



 Makefile|2 -
 crypto/crypto_user.c|1 
 drivers/crypto/ux500/hash/hash_core.c   |4 +-
 drivers/crypto/vmx/aes_cbc.c|2 -
 drivers/crypto/vmx/aes_ctr.c|2 -
 drivers/net/ethernet/atheros/alx/main.c |7 +++-
 drivers/net/ethernet/cadence/macb.c |   13 
 drivers/net/ethernet/cadence/macb.h |2 -
 drivers/usb/core/quirks.c   |   23 --
 drivers/usb/dwc3/dwc3-exynos.c  |   19 ++-
 drivers/usb/gadget/legacy/inode.c   |   17 --
 drivers/usb/host/ehci-tegra.c   |2 -
 drivers/usb/host/xhci-pci.c |5 +++
 drivers/usb/host/xhci-plat.c|3 +
 drivers/usb/host/xhci-ring.c|   30 ++
 drivers/usb/host/xhci.c |   27 +---
 drivers/usb/musb/musb_core.c|3 +
 drivers/usb/musb/musb_host.c|   23 --
 drivers/usb/serial/mos7720.c|1 
 drivers/usb/storage/uas.c   |1 
 include/linux/bpf.h |4 ++
 include/linux/net.h |3 +
 include/linux/skbuff.h  |7 
 include/linux/sock_diag.h   |6 +++
 kernel/events/core.c|2 -
 net/ax25/af_ax25.c  |3 +
 net/ax25/ax25_ds_timer.c|5 ++-
 net/ax25/ax25_std_timer.c   |5 ++-
 net/ax25/ax25_subr.c|3 +
 net/bridge/br_multicast.c   |4 ++
 net/bridge/br_private.h |   23 +++---
 net/core/filter.c   |   18 ++-
 net/core/neighbour.c|6 +++
 net/ipv4/esp4.c |   52 +++-
 net/ipv4/ipmr.c |4 +-
 net/ipv6/ip6mr.c|1 
 net/ipv6/sit.c  |4 +-
 net/sched/act_csum.c|8 +---
 net/sched/act_nat.c |   18 +++
 net/sched/sch_fifo.c|4 ++
 net/sched/sch_netem.c   |   12 +++
 41 files changed, 248 insertions(+), 131 deletions(-)

Andrew Goodbody (2):
  usb: musb: Stop bulk endpoint while queue is rotated
  usb: musb: Ensure rx reinit occurs for shared_fifo endpoints

Anton Blanchard (1):
  crypto: vmx - Increase priority of aes-cbc cipher

Basil Gunn (1):
  AX.25: Close socket connection on session completion

Bin Liu (3):
  usb: musb: only restore devctl when session was set in backup
  usb: musb: host: correct cppi dma channel for isoch transfer
  usb: gadget: fix spinlock dead lock in gadgetfs

Daniel Borkmann (2):
  bpf, perf: delay release of BPF prog after grace period
  bpf: try harder on clones when writing into skb

David Barroso (1):
  neigh: Explicitly declare RCU-bh read side critical section in 
neigh_xmit()

Eric Dumazet (2):
  net_sched: fix pfifo_head_drop behavior vs backlog
  netem: fix a use after free

Feng Tang (1):
  net: alx: Work around the DMA RX overflow issue

Gabriel Krisman Bertazi (1):
  xhci: Cleanup only when releasing primary hcd

Greg Kroah-Hartman (1):
  Linux 4.4.15

Hans de Goede (4):
  USB: uas: Fix slave queue_depth not being set
  usb: quirks: Fix sorting
  usb: quirks: Add no-lpm quirk for Acer C120 LED Projector
  USB: xhci: Add broken streams quirk for Frescologic device id 1009

Herbert Xu (1):
  esp: Fix ESN generation under UDP encapsulation

Jason A. Donenfeld (1):
  net: Don't forget pr_fmt on net_dbg_ratelimited for CONFIG_DYNAMIC_DEBUG

Linus Walleij (1):
  crypto: ux500 - memmove the right size

Mathias Krause (1):
  crypto: user - re-add size check for CRYPTO_MSG_GETALG

Mathias Nyman (1):
  xhci: Fix handling timeouted commands on hosts in weird states.

Nicolas Ferre (1):
  net: macb: fix default configuration for GMAC on AT91

Simon Horman (1):
  sit: correct IP protocol used in ipip6_err

Steinar H. Gunderson (1):
  usb: dwc3: exynos: Fix deferred probing storm.

Sudip Mukherjee (1):
  USB: mos7720: delete parport

Thierry Reding (1):
  usb: host: ehci-tegra: Grab the correct UTMI pads reset

Thomas Petazzoni (1):
  usb: xhci-plat: properly handle probe deferral for devm_clk_get()

Tom Goff (1):
  ipmr/ip6mr: Initialize the last assert time of mfc entries.

Willem de Bruijn (1):
  sock_diag: do not

Linux 4.6.4

2016-07-11 Thread Greg KH

I'm announcing the release of the 4.6.4 kernel.

All users of the 4.6 kernel series must upgrade.

The updated 4.6.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git 
linux-4.6.y
and can be browsed at the normal kernel.org git web browser:

http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary

thanks,

greg k-h



 Makefile  |2 -
 crypto/crypto_user.c  |1 
 drivers/crypto/ux500/hash/hash_core.c |4 +-
 drivers/crypto/vmx/aes_cbc.c  |2 -
 drivers/crypto/vmx/aes_ctr.c  |2 -
 drivers/usb/core/quirks.c |   23 ---
 drivers/usb/dwc3/dwc3-exynos.c|   19 +++-
 drivers/usb/gadget/legacy/inode.c |   17 ---
 drivers/usb/host/ehci-tegra.c |2 -
 drivers/usb/host/xhci-pci.c   |5 +++
 drivers/usb/host/xhci-plat.c  |3 +
 drivers/usb/host/xhci-ring.c  |   30 +++
 drivers/usb/host/xhci.c   |   27 +
 drivers/usb/musb/musb_core.c  |3 +
 drivers/usb/musb/musb_host.c  |   23 +--
 drivers/usb/serial/mos7720.c  |1 
 drivers/usb/storage/uas.c |1 
 include/linux/bpf.h   |4 ++
 include/linux/net.h   |3 +
 include/linux/sock_diag.h |6 +++
 kernel/events/core.c  |2 -
 net/ax25/af_ax25.c|3 +
 net/ax25/ax25_ds_timer.c  |5 ++-
 net/ax25/ax25_std_timer.c |5 ++-
 net/ax25/ax25_subr.c  |3 +
 net/bridge/br_multicast.c |4 ++
 net/bridge/br_private.h   |   23 ---
 net/core/neighbour.c  |6 +++
 net/ipv4/esp4.c   |   52 --
 net/ipv4/ipmr.c   |4 +-
 net/ipv6/ip6mr.c  |1 
 net/ipv6/sit.c|4 +-
 net/kcm/kcmproc.c |1 
 net/sched/act_ipt.c   |7 +++-
 net/sched/sch_fifo.c  |4 ++
 net/sched/sch_netem.c |   12 +++
 36 files changed, 216 insertions(+), 98 deletions(-)

Andrew Goodbody (2):
  usb: musb: Stop bulk endpoint while queue is rotated
  usb: musb: Ensure rx reinit occurs for shared_fifo endpoints

Anton Blanchard (1):
  crypto: vmx - Increase priority of aes-cbc cipher

Basil Gunn (1):
  AX.25: Close socket connection on session completion

Bin Liu (3):
  usb: musb: only restore devctl when session was set in backup
  usb: musb: host: correct cppi dma channel for isoch transfer
  usb: gadget: fix spinlock dead lock in gadgetfs

Daniel Borkmann (1):
  bpf, perf: delay release of BPF prog after grace period

David Barroso (1):
  neigh: Explicitly declare RCU-bh read side critical section in 
neigh_xmit()

Eric Dumazet (2):
  net_sched: fix pfifo_head_drop behavior vs backlog
  netem: fix a use after free

Gabriel Krisman Bertazi (1):
  xhci: Cleanup only when releasing primary hcd

Greg Kroah-Hartman (1):
  Linux 4.6.4

Hans de Goede (4):
  USB: uas: Fix slave queue_depth not being set
  usb: quirks: Fix sorting
  usb: quirks: Add no-lpm quirk for Acer C120 LED Projector
  USB: xhci: Add broken streams quirk for Frescologic device id 1009

Herbert Xu (1):
  esp: Fix ESN generation under UDP encapsulation

Jason A. Donenfeld (1):
  net: Don't forget pr_fmt on net_dbg_ratelimited for CONFIG_DYNAMIC_DEBUG

Jiri Slaby (1):
  kcm: fix /proc memory leak

Linus Walleij (1):
  crypto: ux500 - memmove the right size

Mathias Krause (1):
  crypto: user - re-add size check for CRYPTO_MSG_GETALG

Mathias Nyman (1):
  xhci: Fix handling timeouted commands on hosts in weird states.

Simon Horman (1):
  sit: correct IP protocol used in ipip6_err

Steinar H. Gunderson (1):
  usb: dwc3: exynos: Fix deferred probing storm.

Sudip Mukherjee (1):
  USB: mos7720: delete parport

Thierry Reding (1):
  usb: host: ehci-tegra: Grab the correct UTMI pads reset

Thomas Petazzoni (1):
  usb: xhci-plat: properly handle probe deferral for devm_clk_get()

Tom Goff (1):
  ipmr/ip6mr: Initialize the last assert time of mfc entries.

WANG Cong (1):
  act_ipt: fix a bind refcnt leak

Willem de Bruijn (1):
  sock_diag: do not broadcast raw socket destruction

daniel (1):
  Bridge: Fix ipv6 mc snooping if bridge has no ipv6 address



signature.asc
Description: PGP signature

Linux 4.4.15

2016-07-11 Thread Greg KH

I'm announcing the release of the 4.4.15 kernel.

All users of the 4.4 kernel series must upgrade.

The updated 4.4.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git 
linux-4.4.y
and can be browsed at the normal kernel.org git web browser:

http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary

thanks,

greg k-h



 Makefile|2 -
 crypto/crypto_user.c|1 
 drivers/crypto/ux500/hash/hash_core.c   |4 +-
 drivers/crypto/vmx/aes_cbc.c|2 -
 drivers/crypto/vmx/aes_ctr.c|2 -
 drivers/net/ethernet/atheros/alx/main.c |7 +++-
 drivers/net/ethernet/cadence/macb.c |   13 
 drivers/net/ethernet/cadence/macb.h |2 -
 drivers/usb/core/quirks.c   |   23 --
 drivers/usb/dwc3/dwc3-exynos.c  |   19 ++-
 drivers/usb/gadget/legacy/inode.c   |   17 --
 drivers/usb/host/ehci-tegra.c   |2 -
 drivers/usb/host/xhci-pci.c |5 +++
 drivers/usb/host/xhci-plat.c|3 +
 drivers/usb/host/xhci-ring.c|   30 ++
 drivers/usb/host/xhci.c |   27 +---
 drivers/usb/musb/musb_core.c|3 +
 drivers/usb/musb/musb_host.c|   23 --
 drivers/usb/serial/mos7720.c|1 
 drivers/usb/storage/uas.c   |1 
 include/linux/bpf.h |4 ++
 include/linux/net.h |3 +
 include/linux/skbuff.h  |7 
 include/linux/sock_diag.h   |6 +++
 kernel/events/core.c|2 -
 net/ax25/af_ax25.c  |3 +
 net/ax25/ax25_ds_timer.c|5 ++-
 net/ax25/ax25_std_timer.c   |5 ++-
 net/ax25/ax25_subr.c|3 +
 net/bridge/br_multicast.c   |4 ++
 net/bridge/br_private.h |   23 +++---
 net/core/filter.c   |   18 ++-
 net/core/neighbour.c|6 +++
 net/ipv4/esp4.c |   52 +++-
 net/ipv4/ipmr.c |4 +-
 net/ipv6/ip6mr.c|1 
 net/ipv6/sit.c  |4 +-
 net/sched/act_csum.c|8 +---
 net/sched/act_nat.c |   18 +++
 net/sched/sch_fifo.c|4 ++
 net/sched/sch_netem.c   |   12 +++
 41 files changed, 248 insertions(+), 131 deletions(-)

Andrew Goodbody (2):
  usb: musb: Stop bulk endpoint while queue is rotated
  usb: musb: Ensure rx reinit occurs for shared_fifo endpoints

Anton Blanchard (1):
  crypto: vmx - Increase priority of aes-cbc cipher

Basil Gunn (1):
  AX.25: Close socket connection on session completion

Bin Liu (3):
  usb: musb: only restore devctl when session was set in backup
  usb: musb: host: correct cppi dma channel for isoch transfer
  usb: gadget: fix spinlock dead lock in gadgetfs

Daniel Borkmann (2):
  bpf, perf: delay release of BPF prog after grace period
  bpf: try harder on clones when writing into skb

David Barroso (1):
  neigh: Explicitly declare RCU-bh read side critical section in 
neigh_xmit()

Eric Dumazet (2):
  net_sched: fix pfifo_head_drop behavior vs backlog
  netem: fix a use after free

Feng Tang (1):
  net: alx: Work around the DMA RX overflow issue

Gabriel Krisman Bertazi (1):
  xhci: Cleanup only when releasing primary hcd

Greg Kroah-Hartman (1):
  Linux 4.4.15

Hans de Goede (4):
  USB: uas: Fix slave queue_depth not being set
  usb: quirks: Fix sorting
  usb: quirks: Add no-lpm quirk for Acer C120 LED Projector
  USB: xhci: Add broken streams quirk for Frescologic device id 1009

Herbert Xu (1):
  esp: Fix ESN generation under UDP encapsulation

Jason A. Donenfeld (1):
  net: Don't forget pr_fmt on net_dbg_ratelimited for CONFIG_DYNAMIC_DEBUG

Linus Walleij (1):
  crypto: ux500 - memmove the right size

Mathias Krause (1):
  crypto: user - re-add size check for CRYPTO_MSG_GETALG

Mathias Nyman (1):
  xhci: Fix handling timeouted commands on hosts in weird states.

Nicolas Ferre (1):
  net: macb: fix default configuration for GMAC on AT91

Simon Horman (1):
  sit: correct IP protocol used in ipip6_err

Steinar H. Gunderson (1):
  usb: dwc3: exynos: Fix deferred probing storm.

Sudip Mukherjee (1):
  USB: mos7720: delete parport

Thierry Reding (1):
  usb: host: ehci-tegra: Grab the correct UTMI pads reset

Thomas Petazzoni (1):
  usb: xhci-plat: properly handle probe deferral for devm_clk_get()

Tom Goff (1):
  ipmr/ip6mr: Initialize the last assert time of mfc entries.

Willem de Bruijn (1):
  sock_diag: do not

Re: Linux 4.4.15

2016-07-11 Thread Greg KH

diff --git a/Makefile b/Makefile
index fadbb9d73c6d..979088079338 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 14
+SUBLEVEL = 15
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 43fe85f20d57..7097a3395b25 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -455,6 +455,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
[CRYPTO_MSG_NEWALG  - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
[CRYPTO_MSG_DELALG  - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
[CRYPTO_MSG_UPDATEALG   - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+   [CRYPTO_MSG_GETALG  - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
[CRYPTO_MSG_DELRNG  - CRYPTO_MSG_BASE] = 0,
 };
 
diff --git a/drivers/crypto/ux500/hash/hash_core.c 
b/drivers/crypto/ux500/hash/hash_core.c
index 66b1c3313e2e..cd4398498495 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -797,7 +797,7 @@ static int hash_process_data(struct hash_device_data 
*device_data,
_data->state);
memmove(req_ctx->state.buffer,
device_data->state.buffer,
-   HASH_BLOCK_SIZE / sizeof(u32));
+   HASH_BLOCK_SIZE);
if (ret) {
dev_err(device_data->dev,
"%s: hash_resume_state() 
failed!\n",
@@ -848,7 +848,7 @@ static int hash_process_data(struct hash_device_data 
*device_data,
 
memmove(device_data->state.buffer,
req_ctx->state.buffer,
-   HASH_BLOCK_SIZE / sizeof(u32));
+   HASH_BLOCK_SIZE);
if (ret) {
dev_err(device_data->dev, "%s: 
hash_save_state() failed!\n",
__func__);
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 0b8fe2ec5315..f3801b983f42 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = {
.cra_name = "cbc(aes)",
.cra_driver_name = "p8_aes_cbc",
.cra_module = THIS_MODULE,
-   .cra_priority = 1000,
+   .cra_priority = 2000,
.cra_type = _blkcipher_type,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
.cra_alignmask = 0,
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index ee1306cd8f59..404a1b69a3ab 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = {
.cra_name = "ctr(aes)",
.cra_driver_name = "p8_aes_ctr",
.cra_module = THIS_MODULE,
-   .cra_priority = 1000,
+   .cra_priority = 2000,
.cra_type = _blkcipher_type,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
.cra_alignmask = 0,
diff --git a/drivers/net/ethernet/atheros/alx/main.c 
b/drivers/net/ethernet/atheros/alx/main.c
index bd377a6b067d..df54475d163b 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -86,9 +86,14 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t 
gfp)
while (!cur_buf->skb && next != rxq->read_idx) {
struct alx_rfd *rfd = >rfd[cur];
 
-   skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp);
+   skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size + 64, gfp);
if (!skb)
break;
+
+   /* Workround for the HW RX DMA overflow issue */
+   if (((unsigned long)skb->data & 0xfff) == 0xfc0)
+   skb_reserve(skb, 64);
+
dma = dma_map_single(>hw.pdev->dev,
 skb->data, alx->rxbuf_size,
 DMA_FROM_DEVICE);
diff --git a/drivers/net/ethernet/cadence/macb.c 
b/drivers/net/ethernet/cadence/macb.c
index 169059c92f80..8d54e7b41bbf 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -2405,9 +2405,9 @@ static int macb_init(struct platform_device *pdev)
if (bp->phy_interface == PHY_INTERFACE_MODE_RGMII)
val = GEM_BIT(RGMII);
else if (bp->phy_interface == PHY_INTERFACE_MODE_RMII &&
-(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII))
+(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII))
val = MACB_BIT(RMII);
-   else if (!(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII))
+   else if (!(bp->caps & MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII))
val =

Re: [kernel-hardening] [PATCH v4 26/29] sched: Allow putting thread_info into task_struct

2016-07-11 Thread Linus Torvalds

On Mon, Jul 11, 2016 at 9:31 AM, Mark Rutland  wrote:
>>
>> So until you do the wire that actually disables preemption you can
>> schedule away as much as you want, and after that write you no longer
>> will.
>
> I was assuming a percpu pointer to current (or preempt count).

So for the same reason that is ok *iff* you have

 - some kind of dedicated percpu register (or other base pointer - x86
has the segment thing) that gets updated when you schedule.

 - an instruction that can load 'current' directly off that register atomically.

But yes, percpu data in general is obviously not safe to access
without preemption.

 Linus

Re: [kernel-hardening] [PATCH v4 26/29] sched: Allow putting thread_info into task_struct

2016-07-11 Thread Linus Torvalds

On Mon, Jul 11, 2016 at 9:31 AM, Mark Rutland  wrote:
>>
>> So until you do the wire that actually disables preemption you can
>> schedule away as much as you want, and after that write you no longer
>> will.
>
> I was assuming a percpu pointer to current (or preempt count).

So for the same reason that is ok *iff* you have

 - some kind of dedicated percpu register (or other base pointer - x86
has the segment thing) that gets updated when you schedule.

 - an instruction that can load 'current' directly off that register atomically.

But yes, percpu data in general is obviously not safe to access
without preemption.

 Linus

[GIT PULL 4/4] arm64: defconfig: Stuff for exynos for v4.8, last round

2016-07-11 Thread Krzysztof Kozlowski

Hi,

Last round of commits for v4.8.

Best regards,
Krzysztof


The following changes since commit 1a695a905c18548062509178b98bc91e67510864:

  Linux 4.7-rc1 (2016-05-29 09:29:24 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git 
tags/samsung-defconfig64-4.8

for you to fetch changes up to 426f754be0bc258c269524bce162ae0ca1cb8927:

  arm64: defconfig: Enable more IP blocks for Exynos7 and Exynos5433 
(2016-07-11 08:06:54 +0200)


Samsung defconfig updates for ARM64 - enable drivers for
Exynos7 and Exynos5433 based boards:
1. S2MPS clock driver,
2. SoC: RTC, SPI, watchdog, EHCI, OHCI, DWC3, ADC and PWM,
3. Enable Samsung SoC sound.


Alim Akhtar (1):
  arm64: defconfig: Enable S2MPS11 clock and S3C RTC driver

Krzysztof Kozlowski (1):
  arm64: defconfig: Enable more IP blocks for Exynos7 and Exynos5433

 arch/arm64/configs/defconfig | 12 
 1 file changed, 12 insertions(+)

[GIT PULL 2/4] ARM: exynos: Stuff for v4.8, last round

2016-07-11 Thread Krzysztof Kozlowski

Hi,

Last round of commits for v4.8. On top of previous tag.

Best regards,
Krzysztof


The following changes since commit 1c03274d68f4744afe582fcff1c2e5b1c5c34b5b:

  MAINTAINERS: Extend Samsung SoC entry with S3C/S5P drivers (2016-06-23 
08:12:08 +0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git 
tags/samsung-soc-4.8-3

for you to fetch changes up to 3981b11fda14ea0b459043d97c68db0a614ec9f8:

  ARM: s3c64xx: smartq: Avoid sparse warnings (2016-07-11 17:44:11 +0200)


Samsung mach/soc update for v4.8, part 3:
Just cleanup - fix Sparse warning and constify passed iomem address.


Krzysztof Kozlowski (1):
  ARM: SAMSUNG: Constify iomem address passed to s5p_init_cpu

Thierry Reding (1):
  ARM: s3c64xx: smartq: Avoid sparse warnings

 arch/arm/mach-s3c64xx/mach-smartq.c  | 1 +
 arch/arm/plat-samsung/cpu.c  | 2 +-
 arch/arm/plat-samsung/include/plat/cpu.h | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

[GIT PULL 1/4] ARM: exynos: Drivers for v4.8, last round

2016-07-11 Thread Krzysztof Kozlowski

Hi,

Last round of commits for v4.8. On top of previous tag.

Best regards,
Krzysztof


The following changes since commit 187364b6fcabb9f4bfefcb62fab4fcda019b5810:

  cpufreq: s5pv210: use relaxed IO accesors (2016-06-22 14:00:21 +0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git 
tags/samsung-drivers-4.8-3

for you to fetch changes up to aec6341e2ac76ea8703642e83535f216b8866162:

  soc: samsung: pmu: Constify arrays with PMU data (2016-07-06 10:35:45 +0200)


Samsung drivers/soc update for v4.8, part 3
1. Fix size of allocation for Exynos SROM registers (too much was allocated).
2. Constify fix.


Krzysztof Kozlowski (1):
  soc: samsung: pmu: Constify arrays with PMU data

Seung-Woo Kim (1):
  memory: samsung: exynos-srom: Fix wrong count of registers

 drivers/memory/samsung/exynos-srom.c | 2 +-
 drivers/soc/samsung/exynos3250-pmu.c | 2 +-
 drivers/soc/samsung/exynos5420-pmu.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

[GIT PULL 4/4] arm64: defconfig: Stuff for exynos for v4.8, last round

2016-07-11 Thread Krzysztof Kozlowski

Hi,

Last round of commits for v4.8.

Best regards,
Krzysztof


The following changes since commit 1a695a905c18548062509178b98bc91e67510864:

  Linux 4.7-rc1 (2016-05-29 09:29:24 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git 
tags/samsung-defconfig64-4.8

for you to fetch changes up to 426f754be0bc258c269524bce162ae0ca1cb8927:

  arm64: defconfig: Enable more IP blocks for Exynos7 and Exynos5433 
(2016-07-11 08:06:54 +0200)


Samsung defconfig updates for ARM64 - enable drivers for
Exynos7 and Exynos5433 based boards:
1. S2MPS clock driver,
2. SoC: RTC, SPI, watchdog, EHCI, OHCI, DWC3, ADC and PWM,
3. Enable Samsung SoC sound.


Alim Akhtar (1):
  arm64: defconfig: Enable S2MPS11 clock and S3C RTC driver

Krzysztof Kozlowski (1):
  arm64: defconfig: Enable more IP blocks for Exynos7 and Exynos5433

 arch/arm64/configs/defconfig | 12 
 1 file changed, 12 insertions(+)

[GIT PULL 2/4] ARM: exynos: Stuff for v4.8, last round

2016-07-11 Thread Krzysztof Kozlowski

Hi,

Last round of commits for v4.8. On top of previous tag.

Best regards,
Krzysztof


The following changes since commit 1c03274d68f4744afe582fcff1c2e5b1c5c34b5b:

  MAINTAINERS: Extend Samsung SoC entry with S3C/S5P drivers (2016-06-23 
08:12:08 +0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git 
tags/samsung-soc-4.8-3

for you to fetch changes up to 3981b11fda14ea0b459043d97c68db0a614ec9f8:

  ARM: s3c64xx: smartq: Avoid sparse warnings (2016-07-11 17:44:11 +0200)


Samsung mach/soc update for v4.8, part 3:
Just cleanup - fix Sparse warning and constify passed iomem address.


Krzysztof Kozlowski (1):
  ARM: SAMSUNG: Constify iomem address passed to s5p_init_cpu

Thierry Reding (1):
  ARM: s3c64xx: smartq: Avoid sparse warnings

 arch/arm/mach-s3c64xx/mach-smartq.c  | 1 +
 arch/arm/plat-samsung/cpu.c  | 2 +-
 arch/arm/plat-samsung/include/plat/cpu.h | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

[GIT PULL 1/4] ARM: exynos: Drivers for v4.8, last round

2016-07-11 Thread Krzysztof Kozlowski

Hi,

Last round of commits for v4.8. On top of previous tag.

Best regards,
Krzysztof


The following changes since commit 187364b6fcabb9f4bfefcb62fab4fcda019b5810:

  cpufreq: s5pv210: use relaxed IO accesors (2016-06-22 14:00:21 +0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git 
tags/samsung-drivers-4.8-3

for you to fetch changes up to aec6341e2ac76ea8703642e83535f216b8866162:

  soc: samsung: pmu: Constify arrays with PMU data (2016-07-06 10:35:45 +0200)


Samsung drivers/soc update for v4.8, part 3
1. Fix size of allocation for Exynos SROM registers (too much was allocated).
2. Constify fix.


Krzysztof Kozlowski (1):
  soc: samsung: pmu: Constify arrays with PMU data

Seung-Woo Kim (1):
  memory: samsung: exynos-srom: Fix wrong count of registers

 drivers/memory/samsung/exynos-srom.c | 2 +-
 drivers/soc/samsung/exynos3250-pmu.c | 2 +-
 drivers/soc/samsung/exynos5420-pmu.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

[GIT PULL 3/4] arm64: dts: exynos: Minor fix for v4.8, last round

2016-07-11 Thread Krzysztof Kozlowski

Hi,

Last round of commits for v4.8.

Best regards,
Krzysztof


The following changes since commit 1a695a905c18548062509178b98bc91e67510864:

  Linux 4.7-rc1 (2016-05-29 09:29:24 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git 
tags/samsung-dt64-4.8-2

for you to fetch changes up to a1924466b784fbb64f10eeb213d335e3d1728b8b:

  arm64: dts: exynos: Modify the voltage range for BUCK2 for exynos7 
(2016-07-06 09:43:42 +0200)


Samsung DeviceTree changes for ARM64 for v4.8:
1. Adjust the voltage of CPU buck regulator so scaling could work.


Abhilash Kesavan (1):
  arm64: dts: exynos: Modify the voltage range for BUCK2 for exynos7

 arch/arm64/boot/dts/exynos/exynos7-espresso.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

[GIT PULL 3/4] arm64: dts: exynos: Minor fix for v4.8, last round

2016-07-11 Thread Krzysztof Kozlowski

Hi,

Last round of commits for v4.8.

Best regards,
Krzysztof


The following changes since commit 1a695a905c18548062509178b98bc91e67510864:

  Linux 4.7-rc1 (2016-05-29 09:29:24 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git 
tags/samsung-dt64-4.8-2

for you to fetch changes up to a1924466b784fbb64f10eeb213d335e3d1728b8b:

  arm64: dts: exynos: Modify the voltage range for BUCK2 for exynos7 
(2016-07-06 09:43:42 +0200)


Samsung DeviceTree changes for ARM64 for v4.8:
1. Adjust the voltage of CPU buck regulator so scaling could work.


Abhilash Kesavan (1):
  arm64: dts: exynos: Modify the voltage range for BUCK2 for exynos7

 arch/arm64/boot/dts/exynos/exynos7-espresso.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Re: [PATCH v4] [media] pci: Add tw5864 driver

2016-07-11 Thread Joe Perches

On Mon, 2016-07-11 at 18:17 +0300, Andrey Utkin wrote:
[]
> diff --git a/drivers/media/pci/tw5864/tw5864-core.c 
> b/drivers/media/pci/tw5864/tw5864-core.c
[]
> +static const char * const artifacts_warning =
> +"BEWARE OF KNOWN ISSUES WITH VIDEO QUALITY\n"
> +"\n"
> +"This driver was developed by Bluecherry LLC by deducing behaviour of\n"
> +"original manufacturer's driver, from both source code and execution 
> traces.\n"
> +"It is known that there are some artifacts on output video with this 
> driver:\n"
> +" - on all known hardware samples: random pixels of wrong color (mostly\n"
> +"   white, red or blue) appearing and disappearing on sequences of 
> P-frames;\n"
> +" - on some hardware samples (known with H.264 core version e006:2800):\n"
> +"   total madness on P-frames: blocks of wrong luminance; blocks of wrong\n"
> +"   colors \"creeping\" across the picture.\n"
> +"There is a workaround for both issues: avoid P-frames by setting GOP size\n"
> +"to 1. To do that, run this command on device files created by this 
> driver:\n"
> +"\n"
> +"v4l2-ctl --device /dev/videoX --set-ctrl=video_gop_size=1\n"
> +"\n";
> +
> +static char *artifacts_warning_continued =
> +"These issues are not decoding errors; all produced H.264 streams are 
> decoded\n"
> +"properly. Streams without P-frames don't have these artifacts so it's not\n"
> +"analog-to-digital conversion issues nor internal memory errors; we 
> conclude\n"
> +"it's internal H.264 encoder issues.\n"
> +"We cannot even check the original driver's behaviour because it has never\n"
> +"worked properly at all in our development environment. So these issues 
> may\n"
> +"be actually related to firmware or hardware. However it may be that 
> there's\n"
> +"just some more register settings missing in the driver which would please\n"
> +"the hardware.\n"
> +"Manufacturer didn't help much on our inquiries, but feel free to disturb\n"
> +"again the support of Intersil (owner of former Techwell).\n"
> +"\n";
[]
> +static int tw5864_initdev(struct pci_dev *pci_dev,
> +   const struct pci_device_id *pci_id)
> +{
[]
> + dev_warn(_dev->dev, "%s", artifacts_warning);
> + dev_warn(_dev->dev, "%s", artifacts_warning_continued);

Is all that verbosity useful?

And trivially:

Each of these blocks will start with the dev_ prefix
and the subsequent lines will not have the same prefix

Perhaps it'd be better to write this something like:

static const char * const artifacts_warning[] = {
"BEWARE OF KNOWN ISSUES WITH VIDEO QUALITY",
"",
"This driver was developed by Bluecherry LLC by deducing behaviour of",
"original manufacturer's driver, from both source code and execution 
traces.",
"It is known that there are some artifacts on output video with this 
driver:",
" - on all known hardware samples: random pixels of wrong color 
(mostly",
"   white, red or blue) appearing and disappearing on sequences of 
P-frames;",
" - on some hardware samples (known with H.264 core version 
e006:2800):",
"   total madness on P-frames: blocks of wrong luminance; blocks of 
wrong",
"   colors \"creeping\" across the picture.",
"There is a workaround for both issues: avoid P-frames by setting GOP 
size",
"to 1. To do that, run this command on device files created by this 
driver:",
"",
"v4l2-ctl --device /dev/videoX --set-ctrl=video_gop_size=1",
"",
"These issues are not decoding errors; all produced H.264 streams are 
decoded",
"properly. Streams without P-frames don't have these artifacts so it's 
not",
"analog-to-digital conversion issues nor internal memory errors; we 
conclude",
"it's internal H.264 encoder issues.",
"We cannot even check the original driver's behaviour because it has 
never",
"worked properly at all in our development environment. So these issues 
may",
"be actually related to firmware or hardware. However it may be that 
there's",
"just some more register settings missing in the driver which would 
please",
"the hardware.",
"Manufacturer didn't help much on our inquiries, but feel free to 
disturb",
"again the support of Intersil (owner of former Techwell).\n"
};

and use

for (i = 0; i < ARRAY_SIZE(artifacts_warning), i++)
dev_warn(_dev->dev, %s\n", artifacts_warning[i]);

so that each line is prefixed.

It also might be better to issue something like a single
line dev_warn referring to the driver code and just leave
this comment in the driver sources.

Something like:

dev_warn(_dev->dev,
"This driver has known defects in video quality\n");

Re: [PATCH v4] [media] pci: Add tw5864 driver

2016-07-11 Thread Joe Perches

On Mon, 2016-07-11 at 18:17 +0300, Andrey Utkin wrote:
[]
> diff --git a/drivers/media/pci/tw5864/tw5864-core.c 
> b/drivers/media/pci/tw5864/tw5864-core.c
[]
> +static const char * const artifacts_warning =
> +"BEWARE OF KNOWN ISSUES WITH VIDEO QUALITY\n"
> +"\n"
> +"This driver was developed by Bluecherry LLC by deducing behaviour of\n"
> +"original manufacturer's driver, from both source code and execution 
> traces.\n"
> +"It is known that there are some artifacts on output video with this 
> driver:\n"
> +" - on all known hardware samples: random pixels of wrong color (mostly\n"
> +"   white, red or blue) appearing and disappearing on sequences of 
> P-frames;\n"
> +" - on some hardware samples (known with H.264 core version e006:2800):\n"
> +"   total madness on P-frames: blocks of wrong luminance; blocks of wrong\n"
> +"   colors \"creeping\" across the picture.\n"
> +"There is a workaround for both issues: avoid P-frames by setting GOP size\n"
> +"to 1. To do that, run this command on device files created by this 
> driver:\n"
> +"\n"
> +"v4l2-ctl --device /dev/videoX --set-ctrl=video_gop_size=1\n"
> +"\n";
> +
> +static char *artifacts_warning_continued =
> +"These issues are not decoding errors; all produced H.264 streams are 
> decoded\n"
> +"properly. Streams without P-frames don't have these artifacts so it's not\n"
> +"analog-to-digital conversion issues nor internal memory errors; we 
> conclude\n"
> +"it's internal H.264 encoder issues.\n"
> +"We cannot even check the original driver's behaviour because it has never\n"
> +"worked properly at all in our development environment. So these issues 
> may\n"
> +"be actually related to firmware or hardware. However it may be that 
> there's\n"
> +"just some more register settings missing in the driver which would please\n"
> +"the hardware.\n"
> +"Manufacturer didn't help much on our inquiries, but feel free to disturb\n"
> +"again the support of Intersil (owner of former Techwell).\n"
> +"\n";
[]
> +static int tw5864_initdev(struct pci_dev *pci_dev,
> +   const struct pci_device_id *pci_id)
> +{
[]
> + dev_warn(_dev->dev, "%s", artifacts_warning);
> + dev_warn(_dev->dev, "%s", artifacts_warning_continued);

Is all that verbosity useful?

And trivially:

Each of these blocks will start with the dev_ prefix
and the subsequent lines will not have the same prefix

Perhaps it'd be better to write this something like:

static const char * const artifacts_warning[] = {
"BEWARE OF KNOWN ISSUES WITH VIDEO QUALITY",
"",
"This driver was developed by Bluecherry LLC by deducing behaviour of",
"original manufacturer's driver, from both source code and execution 
traces.",
"It is known that there are some artifacts on output video with this 
driver:",
" - on all known hardware samples: random pixels of wrong color 
(mostly",
"   white, red or blue) appearing and disappearing on sequences of 
P-frames;",
" - on some hardware samples (known with H.264 core version 
e006:2800):",
"   total madness on P-frames: blocks of wrong luminance; blocks of 
wrong",
"   colors \"creeping\" across the picture.",
"There is a workaround for both issues: avoid P-frames by setting GOP 
size",
"to 1. To do that, run this command on device files created by this 
driver:",
"",
"v4l2-ctl --device /dev/videoX --set-ctrl=video_gop_size=1",
"",
"These issues are not decoding errors; all produced H.264 streams are 
decoded",
"properly. Streams without P-frames don't have these artifacts so it's 
not",
"analog-to-digital conversion issues nor internal memory errors; we 
conclude",
"it's internal H.264 encoder issues.",
"We cannot even check the original driver's behaviour because it has 
never",
"worked properly at all in our development environment. So these issues 
may",
"be actually related to firmware or hardware. However it may be that 
there's",
"just some more register settings missing in the driver which would 
please",
"the hardware.",
"Manufacturer didn't help much on our inquiries, but feel free to 
disturb",
"again the support of Intersil (owner of former Techwell).\n"
};

and use

for (i = 0; i < ARRAY_SIZE(artifacts_warning), i++)
dev_warn(_dev->dev, %s\n", artifacts_warning[i]);

so that each line is prefixed.

It also might be better to issue something like a single
line dev_warn referring to the driver code and just leave
this comment in the driver sources.

Something like:

dev_warn(_dev->dev,
"This driver has known defects in video quality\n");

Re: [PATCH 2/2] soc: samsung: Add support for Exynos7 PMU

2016-07-11 Thread Sylwester Nawrocki

On 07/11/2016 04:44 PM, Abhilash Kesavan wrote:
>>> +   /*
>>> >> +* Set clock freeze cycle count to 0 before and after arm clamp 
>>> >> or
>>> >> +* reset signal transition
>>> >> +*/
>>> >> +   node = of_find_compatible_node(NULL, NULL,
>>> >> +   "samsung,exynos7-clock-atlas");
>>> >> +   if (node) {
>>> >> +   atlas_cmu_base = of_iomap(node, 0);
>>> >> +   if (!atlas_cmu_base)
>>> >> +   return;
>>> >> +
>>> >> +   __raw_writel(0x0,
>>> >> +   atlas_cmu_base + 
>>> >> EXYNOS7_CORE_ARMCLK_STOPCTRL);
>>> >> +   iounmap(atlas_cmu_base);
>> >
>> > Missing:
>> > of_node_put(node);
>> >
>> > ...but I think this creates unnecessary dependency on different
>> > compatible. I understand that disabling the EXTENDED_CLKSTOP is needed
>> > after configuring the PMU so this code belongs here. However
>> > everything you need is just a mapping of CMU address. The PMU driver
>> > should receive in bindings everything it needs to do its work. Either
>> > it is a phandle to something or an address for iomap. In this case the
>> > PMU should probably get two addresses: PMU and optionally CMU (part of
>> > CMU for example). Of course bindings would have to be updated.
>
> I will add an optional CMU phandle to the PMU bindings.

We could additionally split the CMU_ATLAS region into 2 regions in DT
(derived from exynos7420 documentation):

reg = <0x1180 0xF08>, // offsets 0x...0x0F04
  <0x11801000 0x8C>,  // offsets 0x1000...0x1088

so that the first can be mapped by the clk driver and the second by 
the PMU driver? It seems the first region is strictly clock functionality
related, while the second contains power control related and other 
registers.

However I'm not sure it is a good idea, for consistency this would need 
to be done also for CMU_APOLLO, CMU_MIF{0...3}.  All these CMUs don't have 
DT bindings defined yet though and there is no corresponding dts entries.

-- 
Thanks,
Sylwester

Re: [PATCH 1/2] mfd: cros_ec: Add MKBP event support

2016-07-11 Thread Lee Jones

On Mon, 11 Jul 2016, Lee Jones wrote:

> On Tue, 05 Jul 2016, Olof Johansson wrote:
> 
> > On Tue, Jul 5, 2016 at 2:00 PM, Dmitry Torokhov
> >  wrote:
> > > On July 5, 2016 1:55:44 PM PDT, Olof Johansson  wrote:
> > >>On Tue, Jul 5, 2016 at 1:54 PM, Olof Johansson  wrote:
> > >>> On Fri, Jul 1, 2016 at 2:07 AM, Enric Balletbo i Serra
> > >>>  wrote:
> >  From: Vic Yang 
> > 
> >  Newer revisions of the ChromeOS EC add more events besides the
> > >>keyboard
> >  ones. So handle interrupts in the MFD driver and let consumers
> > >>register
> >  for notifications for the events they might care.
> > 
> >  To keep backward compatibility, if the EC doesn't support MKBP
> > >>event, we
> >  fall back to the old MKBP key matrix host command.
> > 
> >  Signed-off-by: Vic Yang 
> >  Signed-off-by: Tomeu Vizoso 
> >  Tested-by: Enric Balletbo i Serra 
> >  Cc: Randall Spangler 
> >  Cc: Vincent Palatin 
> >  Cc: Benson Leung 
> > >>>
> > >>> Probably easiest to merge this through the MFD tree due to the
> > >>> overlaps, so for that purpose:
> > >>>
> > >>> Acked-by: Olof Johansson 
> > >>
> > >>Argh, I just noticed that the second patch is an input patch, not
> > >>another MFD patch. Either way, I'm OK with this going through the
> > >>input tree if that's easiest. If so, you should probably wait for an
> > >>ack from Lee as well.
> > >
> > > Hmm, I thought I already acked input portion to go through MFD tree... or 
> > > am I confusing this with some other patch?
> > 
> > Oh, then we're all set. That patch didn't thread with this one in my
> > mailbox so I didn't see the comment thread on it.
>  
> Yes, same for me.
> 
> > Lee, all yours.
> 
> Err, nice, ta!  =;-)
> 
> Eric,
> 
> Please resubmit this set 'threaded' so I might take proper care of
> it.

Wait!  Ignore that.  I got mixed up with what Olof said and the way
things looked in my inbox.  The two patches are in fact threaded.
I'll look to see to them later in the week.  Please bear with me.

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog

Re: [PATCH 2/2] soc: samsung: Add support for Exynos7 PMU

2016-07-11 Thread Sylwester Nawrocki

On 07/11/2016 04:44 PM, Abhilash Kesavan wrote:
>>> +   /*
>>> >> +* Set clock freeze cycle count to 0 before and after arm clamp 
>>> >> or
>>> >> +* reset signal transition
>>> >> +*/
>>> >> +   node = of_find_compatible_node(NULL, NULL,
>>> >> +   "samsung,exynos7-clock-atlas");
>>> >> +   if (node) {
>>> >> +   atlas_cmu_base = of_iomap(node, 0);
>>> >> +   if (!atlas_cmu_base)
>>> >> +   return;
>>> >> +
>>> >> +   __raw_writel(0x0,
>>> >> +   atlas_cmu_base + 
>>> >> EXYNOS7_CORE_ARMCLK_STOPCTRL);
>>> >> +   iounmap(atlas_cmu_base);
>> >
>> > Missing:
>> > of_node_put(node);
>> >
>> > ...but I think this creates unnecessary dependency on different
>> > compatible. I understand that disabling the EXTENDED_CLKSTOP is needed
>> > after configuring the PMU so this code belongs here. However
>> > everything you need is just a mapping of CMU address. The PMU driver
>> > should receive in bindings everything it needs to do its work. Either
>> > it is a phandle to something or an address for iomap. In this case the
>> > PMU should probably get two addresses: PMU and optionally CMU (part of
>> > CMU for example). Of course bindings would have to be updated.
>
> I will add an optional CMU phandle to the PMU bindings.

We could additionally split the CMU_ATLAS region into 2 regions in DT
(derived from exynos7420 documentation):

reg = <0x1180 0xF08>, // offsets 0x...0x0F04
  <0x11801000 0x8C>,  // offsets 0x1000...0x1088

so that the first can be mapped by the clk driver and the second by 
the PMU driver? It seems the first region is strictly clock functionality
related, while the second contains power control related and other 
registers.

However I'm not sure it is a good idea, for consistency this would need 
to be done also for CMU_APOLLO, CMU_MIF{0...3}.  All these CMUs don't have 
DT bindings defined yet though and there is no corresponding dts entries.

-- 
Thanks,
Sylwester

Re: [PATCH 1/2] mfd: cros_ec: Add MKBP event support

2016-07-11 Thread Lee Jones

On Mon, 11 Jul 2016, Lee Jones wrote:

> On Tue, 05 Jul 2016, Olof Johansson wrote:
> 
> > On Tue, Jul 5, 2016 at 2:00 PM, Dmitry Torokhov
> >  wrote:
> > > On July 5, 2016 1:55:44 PM PDT, Olof Johansson  wrote:
> > >>On Tue, Jul 5, 2016 at 1:54 PM, Olof Johansson  wrote:
> > >>> On Fri, Jul 1, 2016 at 2:07 AM, Enric Balletbo i Serra
> > >>>  wrote:
> >  From: Vic Yang 
> > 
> >  Newer revisions of the ChromeOS EC add more events besides the
> > >>keyboard
> >  ones. So handle interrupts in the MFD driver and let consumers
> > >>register
> >  for notifications for the events they might care.
> > 
> >  To keep backward compatibility, if the EC doesn't support MKBP
> > >>event, we
> >  fall back to the old MKBP key matrix host command.
> > 
> >  Signed-off-by: Vic Yang 
> >  Signed-off-by: Tomeu Vizoso 
> >  Tested-by: Enric Balletbo i Serra 
> >  Cc: Randall Spangler 
> >  Cc: Vincent Palatin 
> >  Cc: Benson Leung 
> > >>>
> > >>> Probably easiest to merge this through the MFD tree due to the
> > >>> overlaps, so for that purpose:
> > >>>
> > >>> Acked-by: Olof Johansson 
> > >>
> > >>Argh, I just noticed that the second patch is an input patch, not
> > >>another MFD patch. Either way, I'm OK with this going through the
> > >>input tree if that's easiest. If so, you should probably wait for an
> > >>ack from Lee as well.
> > >
> > > Hmm, I thought I already acked input portion to go through MFD tree... or 
> > > am I confusing this with some other patch?
> > 
> > Oh, then we're all set. That patch didn't thread with this one in my
> > mailbox so I didn't see the comment thread on it.
>  
> Yes, same for me.
> 
> > Lee, all yours.
> 
> Err, nice, ta!  =;-)
> 
> Eric,
> 
> Please resubmit this set 'threaded' so I might take proper care of
> it.

Wait!  Ignore that.  I got mixed up with what Olof said and the way
things looked in my inbox.  The two patches are in fact threaded.
I'll look to see to them later in the week.  Please bear with me.

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog

Re: [PATCH 1/2] mfd: cros_ec: Add MKBP event support

2016-07-11 Thread Lee Jones

On Tue, 05 Jul 2016, Olof Johansson wrote:

> On Tue, Jul 5, 2016 at 2:00 PM, Dmitry Torokhov
>  wrote:
> > On July 5, 2016 1:55:44 PM PDT, Olof Johansson  wrote:
> >>On Tue, Jul 5, 2016 at 1:54 PM, Olof Johansson  wrote:
> >>> On Fri, Jul 1, 2016 at 2:07 AM, Enric Balletbo i Serra
> >>>  wrote:
>  From: Vic Yang 
> 
>  Newer revisions of the ChromeOS EC add more events besides the
> >>keyboard
>  ones. So handle interrupts in the MFD driver and let consumers
> >>register
>  for notifications for the events they might care.
> 
>  To keep backward compatibility, if the EC doesn't support MKBP
> >>event, we
>  fall back to the old MKBP key matrix host command.
> 
>  Signed-off-by: Vic Yang 
>  Signed-off-by: Tomeu Vizoso 
>  Tested-by: Enric Balletbo i Serra 
>  Cc: Randall Spangler 
>  Cc: Vincent Palatin 
>  Cc: Benson Leung 
> >>>
> >>> Probably easiest to merge this through the MFD tree due to the
> >>> overlaps, so for that purpose:
> >>>
> >>> Acked-by: Olof Johansson 
> >>
> >>Argh, I just noticed that the second patch is an input patch, not
> >>another MFD patch. Either way, I'm OK with this going through the
> >>input tree if that's easiest. If so, you should probably wait for an
> >>ack from Lee as well.
> >
> > Hmm, I thought I already acked input portion to go through MFD tree... or 
> > am I confusing this with some other patch?
> 
> Oh, then we're all set. That patch didn't thread with this one in my
> mailbox so I didn't see the comment thread on it.
 
Yes, same for me.

> Lee, all yours.

Err, nice, ta!  =;-)

Eric,

Please resubmit this set 'threaded' so I might take proper care of
it.

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog

Re: [PATCH 1/2] mfd: cros_ec: Add MKBP event support

2016-07-11 Thread Lee Jones

On Tue, 05 Jul 2016, Olof Johansson wrote:

> On Tue, Jul 5, 2016 at 2:00 PM, Dmitry Torokhov
>  wrote:
> > On July 5, 2016 1:55:44 PM PDT, Olof Johansson  wrote:
> >>On Tue, Jul 5, 2016 at 1:54 PM, Olof Johansson  wrote:
> >>> On Fri, Jul 1, 2016 at 2:07 AM, Enric Balletbo i Serra
> >>>  wrote:
>  From: Vic Yang 
> 
>  Newer revisions of the ChromeOS EC add more events besides the
> >>keyboard
>  ones. So handle interrupts in the MFD driver and let consumers
> >>register
>  for notifications for the events they might care.
> 
>  To keep backward compatibility, if the EC doesn't support MKBP
> >>event, we
>  fall back to the old MKBP key matrix host command.
> 
>  Signed-off-by: Vic Yang 
>  Signed-off-by: Tomeu Vizoso 
>  Tested-by: Enric Balletbo i Serra 
>  Cc: Randall Spangler 
>  Cc: Vincent Palatin 
>  Cc: Benson Leung 
> >>>
> >>> Probably easiest to merge this through the MFD tree due to the
> >>> overlaps, so for that purpose:
> >>>
> >>> Acked-by: Olof Johansson 
> >>
> >>Argh, I just noticed that the second patch is an input patch, not
> >>another MFD patch. Either way, I'm OK with this going through the
> >>input tree if that's easiest. If so, you should probably wait for an
> >>ack from Lee as well.
> >
> > Hmm, I thought I already acked input portion to go through MFD tree... or 
> > am I confusing this with some other patch?
> 
> Oh, then we're all set. That patch didn't thread with this one in my
> mailbox so I didn't see the comment thread on it.
 
Yes, same for me.

> Lee, all yours.

Err, nice, ta!  =;-)

Eric,

Please resubmit this set 'threaded' so I might take proper care of
it.

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog

Re: [kernel-hardening] [PATCH v4 26/29] sched: Allow putting thread_info into task_struct

2016-07-11 Thread Mark Rutland

On Mon, Jul 11, 2016 at 09:06:58AM -0700, Linus Torvalds wrote:
> On Jul 11, 2016 7:55 AM, "Andy Lutomirski" <[1]l...@amacapital.net> wrote:
> >
> > How do you intend to find 'current' to get to the preempt count
> > without first disabling preemption?
>
> Actually, that is the classic case of "not a problem".
>
> The thing is, it doesn't matter if you schedule away while looking up
> current or the preempt count - because both values are idempotent wet
> scheduling.
>
> So until you do the wire that actually disables preemption you can
> schedule away as much as you want, and after that write you no longer
> will.

I was assuming a percpu pointer to current (or preempt count).

The percpu offset might be stale at the point you try to dereference
that, even though current itself hasn't changed, and you may access the
wrong CPU's value.

> This is different wrt a per-cpu area - which is clearly not idempotent wrt
> scheduling.
>
> The reason per-cpu works on x86 is that we have an atomic rmw operation
> that is *also* atomic wrt the CPU lookup (thanks to the segment base)

Sure, understood.

Mark.

Re: [kernel-hardening] [PATCH v4 26/29] sched: Allow putting thread_info into task_struct

2016-07-11 Thread Mark Rutland

On Mon, Jul 11, 2016 at 09:06:58AM -0700, Linus Torvalds wrote:
> On Jul 11, 2016 7:55 AM, "Andy Lutomirski" <[1]l...@amacapital.net> wrote:
> >
> > How do you intend to find 'current' to get to the preempt count
> > without first disabling preemption?
>
> Actually, that is the classic case of "not a problem".
>
> The thing is, it doesn't matter if you schedule away while looking up
> current or the preempt count - because both values are idempotent wet
> scheduling.
>
> So until you do the wire that actually disables preemption you can
> schedule away as much as you want, and after that write you no longer
> will.

I was assuming a percpu pointer to current (or preempt count).

The percpu offset might be stale at the point you try to dereference
that, even though current itself hasn't changed, and you may access the
wrong CPU's value.

> This is different wrt a per-cpu area - which is clearly not idempotent wrt
> scheduling.
>
> The reason per-cpu works on x86 is that we have an atomic rmw operation
> that is *also* atomic wrt the CPU lookup (thanks to the segment base)

Sure, understood.

Mark.

Re: [PATCH v2 06/13] sched: Store maximum per-cpu capacity in root domain

2016-07-11 Thread Dietmar Eggemann

On 11/07/16 11:18, Peter Zijlstra wrote:
> On Wed, Jun 22, 2016 at 06:03:17PM +0100, Morten Rasmussen wrote:
>> @@ -6905,11 +6906,19 @@ static int build_sched_domains(const struct cpumask 
>> *cpu_map,
>>  /* Attach the domains */
>>  rcu_read_lock();
>>  for_each_cpu(i, cpu_map) {
>> +rq = cpu_rq(i);
>>  sd = *per_cpu_ptr(d.sd, i);
>>  cpu_attach_domain(sd, d.rd, i);
>> +
>> +if (rq->cpu_capacity_orig > rq->rd->max_cpu_capacity)
>> +rq->rd->max_cpu_capacity = rq->cpu_capacity_orig;
>>  }
> 
> Should you not set that _before_ cpu_attach_domain(), such that the
> state is up-to-date when its published?

yes, much better.

> Also, since its lockless, should we not use {READ,WRITE}_ONCE() with it?

You mean for rq->rd->max_cpu_capacity ? IMHO, there is a data dependency
between the read and the write and the code only runs on one cpu.

I assume here that this is related to item 2 'Overlapping loads and
stores within a particular CPU ...' in GUARANTEES of
doc/Documentation/memory-barriers.txt.

Do I miss something?

>>  rcu_read_unlock();
>>  
>> +if (rq)
>> +pr_info("span: %*pbl (max cpu_capacity = %lu)\n",
>> +cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
>> +
> 
> While a single statement, it is multi line, please add brackets.

OK.

> 
>>  ret = 0;
>>  error:

Re: [PATCH v2 06/13] sched: Store maximum per-cpu capacity in root domain

2016-07-11 Thread Dietmar Eggemann

On 11/07/16 11:18, Peter Zijlstra wrote:
> On Wed, Jun 22, 2016 at 06:03:17PM +0100, Morten Rasmussen wrote:
>> @@ -6905,11 +6906,19 @@ static int build_sched_domains(const struct cpumask 
>> *cpu_map,
>>  /* Attach the domains */
>>  rcu_read_lock();
>>  for_each_cpu(i, cpu_map) {
>> +rq = cpu_rq(i);
>>  sd = *per_cpu_ptr(d.sd, i);
>>  cpu_attach_domain(sd, d.rd, i);
>> +
>> +if (rq->cpu_capacity_orig > rq->rd->max_cpu_capacity)
>> +rq->rd->max_cpu_capacity = rq->cpu_capacity_orig;
>>  }
> 
> Should you not set that _before_ cpu_attach_domain(), such that the
> state is up-to-date when its published?

yes, much better.

> Also, since its lockless, should we not use {READ,WRITE}_ONCE() with it?

You mean for rq->rd->max_cpu_capacity ? IMHO, there is a data dependency
between the read and the write and the code only runs on one cpu.

I assume here that this is related to item 2 'Overlapping loads and
stores within a particular CPU ...' in GUARANTEES of
doc/Documentation/memory-barriers.txt.

Do I miss something?

>>  rcu_read_unlock();
>>  
>> +if (rq)
>> +pr_info("span: %*pbl (max cpu_capacity = %lu)\n",
>> +cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
>> +
> 
> While a single statement, it is multi line, please add brackets.

OK.

> 
>>  ret = 0;
>>  error:

[tip:x86/fpu] x86/fpu/xstate: Re-enable XSAVES

2016-07-11 Thread tip-bot for Yu-cheng Yu

Commit-ID:  b8be15d588060a03569ac85dc4a0247460988f5b
Gitweb: http://git.kernel.org/tip/b8be15d588060a03569ac85dc4a0247460988f5b
Author: Yu-cheng Yu 
AuthorDate: Mon, 11 Jul 2016 09:18:57 -0700
Committer:  Ingo Molnar 
CommitDate: Mon, 11 Jul 2016 16:44:01 +0200

x86/fpu/xstate: Re-enable XSAVES

We did not handle XSAVES instructions correctly. There were issues in
converting between standard and compacted format when interfacing with
user-space. These issues have been corrected.

Add a WARN_ONCE() to make it clear that XSAVES supervisor states are not
yet implemented.

Signed-off-by: Yu-cheng Yu 
Signed-off-by: Fenghua Yu 
Reviewed-by: Dave Hansen 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Ravi V Shankar 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1468253937-40008-5-git-send-email-fenghua...@intel.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/init.c   | 15 ---
 arch/x86/kernel/fpu/xstate.c |  9 +
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 60f3839..93982ae 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -230,21 +230,6 @@ static void __init 
fpu__init_system_xstate_size_legacy(void)
}
 
fpu_user_xstate_size = fpu_kernel_xstate_size;
-
-   /*
-* Quirk: we don't yet handle the XSAVES* instructions
-* correctly, as we don't correctly convert between
-* standard and compacted format when interfacing
-* with user-space - so disable it for now.
-*
-* The difference is small: with recent CPUs the
-* compacted format is only marginally smaller than
-* the standard FPU state format.
-*
-* ( This is easy to backport while we are fixing
-*   XSAVES* support. )
-*/
-   setup_clear_cpu_cap(X86_FEATURE_XSAVES);
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 4fb8dd7..3169bca 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -221,6 +221,15 @@ void fpu__init_cpu_xstate(void)
 {
if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
return;
+   /*
+* Make it clear that XSAVES supervisor states are not yet
+* implemented should anyone expect it to work by changing
+* bits in XFEATURE_MASK_* macros and XCR0.
+*/
+   WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR),
+   "x86/fpu: XSAVES supervisor states are not yet implemented.\n");
+
+   xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
 
cr4_set_bits(X86_CR4_OSXSAVE);
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);

[tip:x86/fpu] x86/fpu/xstate: Re-enable XSAVES

2016-07-11 Thread tip-bot for Yu-cheng Yu

Commit-ID:  b8be15d588060a03569ac85dc4a0247460988f5b
Gitweb: http://git.kernel.org/tip/b8be15d588060a03569ac85dc4a0247460988f5b
Author: Yu-cheng Yu 
AuthorDate: Mon, 11 Jul 2016 09:18:57 -0700
Committer:  Ingo Molnar 
CommitDate: Mon, 11 Jul 2016 16:44:01 +0200

x86/fpu/xstate: Re-enable XSAVES

We did not handle XSAVES instructions correctly. There were issues in
converting between standard and compacted format when interfacing with
user-space. These issues have been corrected.

Add a WARN_ONCE() to make it clear that XSAVES supervisor states are not
yet implemented.

Signed-off-by: Yu-cheng Yu 
Signed-off-by: Fenghua Yu 
Reviewed-by: Dave Hansen 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Ravi V Shankar 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1468253937-40008-5-git-send-email-fenghua...@intel.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/init.c   | 15 ---
 arch/x86/kernel/fpu/xstate.c |  9 +
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 60f3839..93982ae 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -230,21 +230,6 @@ static void __init 
fpu__init_system_xstate_size_legacy(void)
}
 
fpu_user_xstate_size = fpu_kernel_xstate_size;
-
-   /*
-* Quirk: we don't yet handle the XSAVES* instructions
-* correctly, as we don't correctly convert between
-* standard and compacted format when interfacing
-* with user-space - so disable it for now.
-*
-* The difference is small: with recent CPUs the
-* compacted format is only marginally smaller than
-* the standard FPU state format.
-*
-* ( This is easy to backport while we are fixing
-*   XSAVES* support. )
-*/
-   setup_clear_cpu_cap(X86_FEATURE_XSAVES);
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 4fb8dd7..3169bca 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -221,6 +221,15 @@ void fpu__init_cpu_xstate(void)
 {
if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
return;
+   /*
+* Make it clear that XSAVES supervisor states are not yet
+* implemented should anyone expect it to work by changing
+* bits in XFEATURE_MASK_* macros and XCR0.
+*/
+   WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR),
+   "x86/fpu: XSAVES supervisor states are not yet implemented.\n");
+
+   xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
 
cr4_set_bits(X86_CR4_OSXSAVE);
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);

[tip:x86/fpu] x86/fpu/xstate: Return NULL for disabled xstate component address

2016-07-11 Thread tip-bot for Yu-cheng Yu

Commit-ID:  5060b91513b866f774da15dfd82157864c4b1683
Gitweb: http://git.kernel.org/tip/5060b91513b866f774da15dfd82157864c4b1683
Author: Yu-cheng Yu 
AuthorDate: Mon, 11 Jul 2016 09:18:55 -0700
Committer:  Ingo Molnar 
CommitDate: Mon, 11 Jul 2016 16:44:00 +0200

x86/fpu/xstate: Return NULL for disabled xstate component address

It is an error to request a disabled XSAVE/XSAVES component address.
For that case, make __raw_xsave_addr() return a NULL and issue a
warning.

Signed-off-by: Yu-cheng Yu 
Signed-off-by: Fenghua Yu 
Reviewed-by: Dave Hansen 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Ravi V Shankar 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1468253937-40008-3-git-send-email-fenghua...@intel.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/xstate.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index f8d1aff..4fb8dd7 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -760,6 +760,11 @@ void *__raw_xsave_addr(struct xregs_state *xsave, int 
xstate_feature_mask)
 {
int feature_nr = fls64(xstate_feature_mask) - 1;
 
+   if (!xfeature_enabled(feature_nr)) {
+   WARN_ON_FPU(1);
+   return NULL;
+   }
+
return (void *)xsave + xstate_comp_offsets[feature_nr];
 }
 /*

[tip:x86/fpu] x86/fpu/xstate: Fix fpstate_init() for XRSTORS

2016-07-11 Thread tip-bot for Yu-cheng Yu

Commit-ID:  35ac2d7ba787eb4b7418a5a6f5919c25e10a780a
Gitweb: http://git.kernel.org/tip/35ac2d7ba787eb4b7418a5a6f5919c25e10a780a
Author: Yu-cheng Yu 
AuthorDate: Mon, 11 Jul 2016 09:18:56 -0700
Committer:  Ingo Molnar 
CommitDate: Mon, 11 Jul 2016 16:44:00 +0200

x86/fpu/xstate: Fix fpstate_init() for XRSTORS

In XSAVES mode if fpstate_init() is used to initialize a
task's extended state area, xsave.header.xcomp_bv[63] must
be set. Otherwise, when the task is scheduled, a warning is
triggered from copy_kernel_to_xregs().

One such test case is: setting an invalid extended state
through PTRACE. When xstateregs_set() rejects the syscall
and re-initializes the task's extended state area. This triggers
the warning mentioned above.

Signed-off-by: Yu-cheng Yu 
Signed-off-by: Fenghua Yu 
Reviewed-by: Dave Hansen 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Ravi V Shankar 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1468253937-40008-4-git-send-email-fenghua...@intel.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/types.h | 6 ++
 arch/x86/kernel/fpu/core.c   | 8 
 2 files changed, 14 insertions(+)

diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 12dd648..48df486 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -232,6 +232,12 @@ struct xstate_header {
 } __attribute__((packed));
 
 /*
+ * xstate_header.xcomp_bv[63] indicates that the extended_state_area
+ * is in compacted format.
+ */
+#define XCOMP_BV_COMPACTED_FORMAT ((u64)1 << 63)
+
+/*
  * This is our most modern FPU state format, as saved by the XSAVE
  * and restored by the XRSTOR instructions.
  *
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index c759bd0..3fc03a0 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -229,6 +230,13 @@ void fpstate_init(union fpregs_state *state)
 
memset(state, 0, fpu_kernel_xstate_size);
 
+   /*
+* XRSTORS requires that this bit is set in xcomp_bv, or
+* it will #GP. Make sure it is replaced after the memset().
+*/
+   if (static_cpu_has(X86_FEATURE_XSAVES))
+   state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT;
+
if (static_cpu_has(X86_FEATURE_FXSR))
fpstate_init_fxstate(>fxsave);
else

[tip:x86/fpu] x86/fpu/xstate: Return NULL for disabled xstate component address

2016-07-11 Thread tip-bot for Yu-cheng Yu

Commit-ID:  5060b91513b866f774da15dfd82157864c4b1683
Gitweb: http://git.kernel.org/tip/5060b91513b866f774da15dfd82157864c4b1683
Author: Yu-cheng Yu 
AuthorDate: Mon, 11 Jul 2016 09:18:55 -0700
Committer:  Ingo Molnar 
CommitDate: Mon, 11 Jul 2016 16:44:00 +0200

x86/fpu/xstate: Return NULL for disabled xstate component address

It is an error to request a disabled XSAVE/XSAVES component address.
For that case, make __raw_xsave_addr() return a NULL and issue a
warning.

Signed-off-by: Yu-cheng Yu 
Signed-off-by: Fenghua Yu 
Reviewed-by: Dave Hansen 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Ravi V Shankar 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1468253937-40008-3-git-send-email-fenghua...@intel.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/xstate.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index f8d1aff..4fb8dd7 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -760,6 +760,11 @@ void *__raw_xsave_addr(struct xregs_state *xsave, int 
xstate_feature_mask)
 {
int feature_nr = fls64(xstate_feature_mask) - 1;
 
+   if (!xfeature_enabled(feature_nr)) {
+   WARN_ON_FPU(1);
+   return NULL;
+   }
+
return (void *)xsave + xstate_comp_offsets[feature_nr];
 }
 /*

[tip:x86/fpu] x86/fpu/xstate: Fix fpstate_init() for XRSTORS

2016-07-11 Thread tip-bot for Yu-cheng Yu

Commit-ID:  35ac2d7ba787eb4b7418a5a6f5919c25e10a780a
Gitweb: http://git.kernel.org/tip/35ac2d7ba787eb4b7418a5a6f5919c25e10a780a
Author: Yu-cheng Yu 
AuthorDate: Mon, 11 Jul 2016 09:18:56 -0700
Committer:  Ingo Molnar 
CommitDate: Mon, 11 Jul 2016 16:44:00 +0200

x86/fpu/xstate: Fix fpstate_init() for XRSTORS

In XSAVES mode if fpstate_init() is used to initialize a
task's extended state area, xsave.header.xcomp_bv[63] must
be set. Otherwise, when the task is scheduled, a warning is
triggered from copy_kernel_to_xregs().

One such test case is: setting an invalid extended state
through PTRACE. When xstateregs_set() rejects the syscall
and re-initializes the task's extended state area. This triggers
the warning mentioned above.

Signed-off-by: Yu-cheng Yu 
Signed-off-by: Fenghua Yu 
Reviewed-by: Dave Hansen 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Ravi V Shankar 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1468253937-40008-4-git-send-email-fenghua...@intel.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/fpu/types.h | 6 ++
 arch/x86/kernel/fpu/core.c   | 8 
 2 files changed, 14 insertions(+)

diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 12dd648..48df486 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -232,6 +232,12 @@ struct xstate_header {
 } __attribute__((packed));
 
 /*
+ * xstate_header.xcomp_bv[63] indicates that the extended_state_area
+ * is in compacted format.
+ */
+#define XCOMP_BV_COMPACTED_FORMAT ((u64)1 << 63)
+
+/*
  * This is our most modern FPU state format, as saved by the XSAVE
  * and restored by the XRSTOR instructions.
  *
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index c759bd0..3fc03a0 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -229,6 +230,13 @@ void fpstate_init(union fpregs_state *state)
 
memset(state, 0, fpu_kernel_xstate_size);
 
+   /*
+* XRSTORS requires that this bit is set in xcomp_bv, or
+* it will #GP. Make sure it is replaced after the memset().
+*/
+   if (static_cpu_has(X86_FEATURE_XSAVES))
+   state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT;
+
if (static_cpu_has(X86_FEATURE_FXSR))
fpstate_init_fxstate(>fxsave);
else

Re: [PATCH v2 1/6] dt-bindings: clock: add DT binding for the Xtal clock on Armada 3700

2016-07-11 Thread Gregory CLEMENT

Hi Thomas,
 
 On ven., juil. 08 2016, Thomas Petazzoni  
wrote:

> Hello,
>
> On Fri,  8 Jul 2016 00:37:46 +0200, Gregory CLEMENT wrote:
>
>> +gpio1: gpio@13800 {
>> +compatible = "marvell,mvebu-gpio-3700", "syscon", "simple-mfd";
>
> I find this compatible string not very consistent with what we do for
> other drivers, it should have been:
>
>   marvell,armada-3700-gpio

Thanks for pointing this. We missed it during the last review. I agree
that using marvell,armada-3700-gpio is more appropriate, especially
because the gpio controller on Armada 37xx seems to be different that
the ones used on the other mvebu SoCs.

Gregory

>
> or something like that.
>
>
>> +xtalclk: xtal-clk {
>> +compatible = "marvell,armada-3700-xtal-clock";
>
> See here for example.
>
> Thomas
> -- 
> Thomas Petazzoni, CTO, Free Electrons
> Embedded Linux, Kernel and Android engineering
> http://free-electrons.com

-- 
Gregory Clement, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com

Re: [PATCH v2 1/6] dt-bindings: clock: add DT binding for the Xtal clock on Armada 3700

2016-07-11 Thread Gregory CLEMENT

Hi Thomas,
 
 On ven., juil. 08 2016, Thomas Petazzoni  
wrote:

> Hello,
>
> On Fri,  8 Jul 2016 00:37:46 +0200, Gregory CLEMENT wrote:
>
>> +gpio1: gpio@13800 {
>> +compatible = "marvell,mvebu-gpio-3700", "syscon", "simple-mfd";
>
> I find this compatible string not very consistent with what we do for
> other drivers, it should have been:
>
>   marvell,armada-3700-gpio

Thanks for pointing this. We missed it during the last review. I agree
that using marvell,armada-3700-gpio is more appropriate, especially
because the gpio controller on Armada 37xx seems to be different that
the ones used on the other mvebu SoCs.

Gregory

>
> or something like that.
>
>
>> +xtalclk: xtal-clk {
>> +compatible = "marvell,armada-3700-xtal-clock";
>
> See here for example.
>
> Thomas
> -- 
> Thomas Petazzoni, CTO, Free Electrons
> Embedded Linux, Kernel and Android engineering
> http://free-electrons.com

-- 
Gregory Clement, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com

[tip:x86/fpu] x86/fpu/xstate: Fix __fpu_restore_sig() for XSAVES

2016-07-11 Thread tip-bot for Yu-cheng Yu

Commit-ID:  1fc2b67b43d5001b92b3a002b94ad0137e99
Gitweb: http://git.kernel.org/tip/1fc2b67b43d5001b92b3a002b94ad0137e99
Author: Yu-cheng Yu 
AuthorDate: Mon, 11 Jul 2016 09:18:54 -0700
Committer:  Ingo Molnar 
CommitDate: Mon, 11 Jul 2016 16:43:59 +0200

x86/fpu/xstate: Fix __fpu_restore_sig() for XSAVES

When the kernel is using XSAVES compacted format, we cannot do
__copy_from_user() from a signal frame, which has standard-format data.
Fix it by using copyin_to_xsaves(), which converts between formats and
filters out all supervisor states that we do not allow userspace to
write.

Signed-off-by: Yu-cheng Yu 
Signed-off-by: Fenghua Yu 
Reviewed-by: Dave Hansen 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Ravi V Shankar 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1468253937-40008-2-git-send-email-fenghua...@intel.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/signal.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 8aa96cb..9e231d8 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -323,8 +323,15 @@ static int __fpu__restore_sig(void __user *buf, void 
__user *buf_fx, int size)
 */
fpu__drop(fpu);
 
-   if (__copy_from_user(>state.xsave, buf_fx, state_size) ||
-   __copy_from_user(, buf, sizeof(env))) {
+   if (using_compacted_format()) {
+   err = copyin_to_xsaves(NULL, buf_fx,
+  >state.xsave);
+   } else {
+   err = __copy_from_user(>state.xsave,
+  buf_fx, state_size);
+   }
+
+   if (err || __copy_from_user(, buf, sizeof(env))) {
fpstate_init(>state);
trace_x86_fpu_init_state(fpu);
err = -1;

[tip:x86/fpu] x86/fpu/xstate: Fix __fpu_restore_sig() for XSAVES

2016-07-11 Thread tip-bot for Yu-cheng Yu

Commit-ID:  1fc2b67b43d5001b92b3a002b94ad0137e99
Gitweb: http://git.kernel.org/tip/1fc2b67b43d5001b92b3a002b94ad0137e99
Author: Yu-cheng Yu 
AuthorDate: Mon, 11 Jul 2016 09:18:54 -0700
Committer:  Ingo Molnar 
CommitDate: Mon, 11 Jul 2016 16:43:59 +0200

x86/fpu/xstate: Fix __fpu_restore_sig() for XSAVES

When the kernel is using XSAVES compacted format, we cannot do
__copy_from_user() from a signal frame, which has standard-format data.
Fix it by using copyin_to_xsaves(), which converts between formats and
filters out all supervisor states that we do not allow userspace to
write.

Signed-off-by: Yu-cheng Yu 
Signed-off-by: Fenghua Yu 
Reviewed-by: Dave Hansen 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Ravi V Shankar 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1468253937-40008-2-git-send-email-fenghua...@intel.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/fpu/signal.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 8aa96cb..9e231d8 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -323,8 +323,15 @@ static int __fpu__restore_sig(void __user *buf, void 
__user *buf_fx, int size)
 */
fpu__drop(fpu);
 
-   if (__copy_from_user(>state.xsave, buf_fx, state_size) ||
-   __copy_from_user(, buf, sizeof(env))) {
+   if (using_compacted_format()) {
+   err = copyin_to_xsaves(NULL, buf_fx,
+  >state.xsave);
+   } else {
+   err = __copy_from_user(>state.xsave,
+  buf_fx, state_size);
+   }
+
+   if (err || __copy_from_user(, buf, sizeof(env))) {
fpstate_init(>state);
trace_x86_fpu_init_state(fpu);
err = -1;

< 2 3 4 5 6 7 8 9 10 11 >

601 - 700 of 1694 matches

Mail list logo