[PATCH v3 02/13] futex: Rename futex_pi_state to futex_state

2016-09-30 Thread Waiman Long
The futex_pi_state structure will be overloaded in later patches to
store state information about non-PI futexes. So the structure name
itself is no longer a good description of its purpose. So its name
is changed to futex_state, a more generic name.

Some of the functions that process the futex states are also renamed.

Signed-off-by: Waiman Long 
---
 include/linux/sched.h |4 +-
 kernel/futex.c|  107 +
 2 files changed, 56 insertions(+), 55 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 62c68e5..fefd7f7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -126,7 +126,7 @@ struct sched_attr {
u64 sched_period;
 };
 
-struct futex_pi_state;
+struct futex_state;
 struct robust_list_head;
 struct bio_list;
 struct fs_struct;
@@ -1772,7 +1772,7 @@ struct task_struct {
struct compat_robust_list_head __user *compat_robust_list;
 #endif
struct list_head pi_state_list;
-   struct futex_pi_state *pi_state_cache;
+   struct futex_state *pi_state_cache;
 #endif
 #ifdef CONFIG_PERF_EVENTS
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
diff --git a/kernel/futex.c b/kernel/futex.c
index 91724ec..f4e20ad 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -192,11 +192,12 @@ int __read_mostly futex_cmpxchg_enabled;
 #define FLAGS_HAS_TIMEOUT  0x04
 
 /*
- * Priority Inheritance state:
+ * Futex state object:
+ *  - Priority Inheritance state
  */
-struct futex_pi_state {
+struct futex_state {
/*
-* list of 'owned' pi_state instances - these have to be
+* list of 'owned' state instances - these have to be
 * cleaned up in do_exit() if the task exits prematurely:
 */
struct list_head list;
@@ -240,7 +241,7 @@ struct futex_q {
struct task_struct *task;
spinlock_t *lock_ptr;
union futex_key key;
-   struct futex_pi_state *pi_state;
+   struct futex_state *pi_state;
struct rt_mutex_waiter *rt_waiter;
union futex_key *requeue_pi_key;
u32 bitset;
@@ -797,76 +798,76 @@ static int get_futex_value_locked(u32 *dest, u32 __user 
*from)
 /*
  * PI code:
  */
-static int refill_pi_state_cache(void)
+static int refill_futex_state_cache(void)
 {
-   struct futex_pi_state *pi_state;
+   struct futex_state *state;
 
if (likely(current->pi_state_cache))
return 0;
 
-   pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
+   state = kzalloc(sizeof(*state), GFP_KERNEL);
 
-   if (!pi_state)
+   if (!state)
return -ENOMEM;
 
-   INIT_LIST_HEAD(_state->list);
+   INIT_LIST_HEAD(>list);
/* pi_mutex gets initialized later */
-   pi_state->owner = NULL;
-   atomic_set(_state->refcount, 1);
-   pi_state->key = FUTEX_KEY_INIT;
+   state->owner = NULL;
+   atomic_set(>refcount, 1);
+   state->key = FUTEX_KEY_INIT;
 
-   current->pi_state_cache = pi_state;
+   current->pi_state_cache = state;
 
return 0;
 }
 
-static struct futex_pi_state * alloc_pi_state(void)
+static struct futex_state *alloc_futex_state(void)
 {
-   struct futex_pi_state *pi_state = current->pi_state_cache;
+   struct futex_state *state = current->pi_state_cache;
 
-   WARN_ON(!pi_state);
+   WARN_ON(!state);
current->pi_state_cache = NULL;
 
-   return pi_state;
+   return state;
 }
 
 /*
- * Drops a reference to the pi_state object and frees or caches it
+ * Drops a reference to the futex state object and frees or caches it
  * when the last reference is gone.
  *
  * Must be called with the hb lock held.
  */
-static void put_pi_state(struct futex_pi_state *pi_state)
+static void put_futex_state(struct futex_state *state)
 {
-   if (!pi_state)
+   if (!state)
return;
 
-   if (!atomic_dec_and_test(_state->refcount))
+   if (!atomic_dec_and_test(>refcount))
return;
 
/*
-* If pi_state->owner is NULL, the owner is most probably dying
-* and has cleaned up the pi_state already
+* If state->owner is NULL, the owner is most probably dying
+* and has cleaned up the futex state already
 */
-   if (pi_state->owner) {
-   raw_spin_lock_irq(_state->owner->pi_lock);
-   list_del_init(_state->list);
-   raw_spin_unlock_irq(_state->owner->pi_lock);
+   if (state->owner) {
+   raw_spin_lock_irq(>owner->pi_lock);
+   list_del_init(>list);
+   raw_spin_unlock_irq(>owner->pi_lock);
 
-   rt_mutex_proxy_unlock(_state->pi_mutex, pi_state->owner);
+   rt_mutex_proxy_unlock(>pi_mutex, state->owner);
}
 
if (current->pi_state_cache)
-   kfree(pi_state);
+   kfree(state);
else {
/*
-* 

[PATCH v3 05/13] futex: Add a new futex type field into futex_state

2016-09-30 Thread Waiman Long
As the futex_state structure will be overloaded in later patches
to be used by non-PI futexes, it is necessary to add a type field to
distinguish among different types of futexes.

Signed-off-by: Waiman Long 
---
 kernel/futex.c |   14 ++
 1 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 0d1b1be..a496c01 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -191,6 +191,10 @@ int __read_mostly futex_cmpxchg_enabled;
 #define FLAGS_CLOCKRT  0x02
 #define FLAGS_HAS_TIMEOUT  0x04
 
+enum futex_type {
+   TYPE_PI = 0,
+};
+
 /*
  * Futex state object:
  *  - Priority Inheritance state
@@ -210,6 +214,7 @@ struct futex_state {
struct task_struct *owner;
atomic_t refcount;
 
+   enum futex_type type;
union futex_key key;
 };
 
@@ -897,10 +902,10 @@ static void put_futex_state(struct futex_state *state)
return;
 
/*
-* If state->owner is NULL, the owner is most probably dying
-* and has cleaned up the futex state already
+* If state->owner is NULL and the type is TYPE_PI, the owner is
+* most probably dying and has cleaned up the futex state already
 */
-   if (state->owner) {
+   if (state->owner && (state->type == TYPE_PI)) {
task_pi_list_del(state->owner, state, false);
 
rt_mutex_proxy_unlock(>pi_mutex, state->owner);
@@ -1056,7 +1061,7 @@ static int attach_to_pi_state(u32 uval, struct 
futex_state *pi_state,
/*
 * Userspace might have messed up non-PI and PI futexes [3]
 */
-   if (unlikely(!pi_state))
+   if (unlikely(!pi_state || (pi_state->type != TYPE_PI)))
return -EINVAL;
 
WARN_ON(!atomic_read(_state->refcount));
@@ -1174,6 +1179,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key 
*key,
 
/* Store the key for possible exit cleanups: */
pi_state->key = *key;
+   pi_state->type = TYPE_PI;
 
WARN_ON(!list_empty(_state->list));
list_add(_state->list, >pi_state_list);
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 09/13] futex: Implement lock handoff for TP futexes to prevent lock starvation

2016-09-30 Thread Waiman Long
The current TP futexes has no guarantee that the top waiter
(serialization mutex owner) can get the lock within a finite time.
As a result, lock starvation can happen.

A lock handoff mechanism is added to the TP futexes to prevent lock
starvation from happening. The idea is that the top waiter can set a
special handoff_pid variable with its own pid. The futex owner will
then check this variable at unlock time to see if it should free the
futex or change its value to the designated pid to transfer the lock
directly to the top waiter.

This change does have the effect of limiting the amount of unfairness
and hence may adversely affect performance depending on the workloads.

Using a futex locking microbenchmark on a 4-socket 72-core 144-thread
Haswell-EX system running 4.8-rc7 based kernel, the performance data
for the TP futexes before and after the patch were as follows:

  CS length Before patchAfter patch% change
  - ---
  5  54,622,973  59,455,380  +8.8%
 10  42,577,283  41,161,387  -3.3%
 20  29,424,333  37,718,989 +28.2%
 30  33,664,095  29,474,986 -12.4%
 40  27,998,446  24,774,389 -11.5%
 50  25,793,991  22,718,432 -11.9%
  1us sleep 179,067 177,662  -0.8%

In fact, when the critical section is a 1us sleep, the wait-wake
futexes perform a bit better than the TP futexes as the former futexes
are more unfair in this case. So lock starvation can happen with
wait-wake futexes.

WW futexTP futex

Total locking ops   175,824 177,662
Per-thread avg/sec  122 123
Per-thread min/sec2 106
Per-thread max/sec  498 199
% Stddev  6.26%   0.97%

Signed-off-by: Waiman Long 
---
 kernel/futex.c |   55 ---
 1 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 5fa9a10..d260410 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -228,6 +228,8 @@ struct futex_state {
struct task_struct *owner;
atomic_t refcount;
 
+   u32 handoff_pid;/* For TP futexes only */
+
enum futex_type type;
union futex_key key;
 };
@@ -898,6 +900,7 @@ static int refill_futex_state_cache(void)
 
/* pi_mutex gets initialized later */
state->owner = NULL;
+   state->handoff_pid = 0;
atomic_set(>refcount, 1);
state->key = FUTEX_KEY_INIT;
 
@@ -3303,8 +3306,9 @@ void exit_robust_list(struct task_struct *curr)
  *
  * Within the kernel, trylocks are done ignoring the FUTEX_WAITERS bit. The
  * purpose of this FUTEX_WAITERS bit is to make the unlocker wake up the
- * serialization mutex owner. Not having the FUTEX_WAITERS bit set doesn't
- * mean there is no waiter in the kernel.
+ * serialization mutex owner or the hand off the lock directly to the top
+ * waiter. Not having the FUTEX_WAITERS bit set doesn't mean there is no
+ * waiter in the kernel.
  *
  * Like PI futexes, TP futexes are orthogonal to robust futexes can be
  * used together.
@@ -3320,6 +3324,19 @@ void exit_robust_list(struct task_struct *curr)
  * the ownership of the futex.
  */
 
+/*
+ * Spinning and sleeping thresholds for enabling lock handoff and prevent
+ * lock starvation.
+ *
+ * The current threshold values setting is kind of arbitrary. Setting them
+ * too low will impact the amount of lock stealing possible thus reducing
+ * performance. Setting them too high may make the top waiter wait too long
+ * before getting the lock. The current set of values are a bit on the
+ * high side to ensure adequate performance.
+ */
+#define TP_SPIN_THRESHOLD   (1 << 14)
+#define TP_SLEEP_DECREMENT  (TP_SPIN_THRESHOLD/64)
+
 /**
  * lookup_futex_state - Looking up the futex state structure.
  * @hb: hash bucket
@@ -3398,6 +3415,7 @@ static inline int put_futex_state_unlocked(struct 
futex_state *state)
  * If waiter is true
  * then
  *   don't preserve the flag bits;
+ *   check for handoff (futex word == own pid)
  * else
  *   preserve the flag bits
  * endif
@@ -3416,6 +3434,9 @@ static inline int futex_trylock(u32 __user *uaddr, u32 
vpid, u32 *puval,
 
*puval = uval;
 
+   if (waiter && (uval & FUTEX_TID_MASK) == vpid)
+   return 1;
+
if (uval & FUTEX_TID_MASK)
return 0;   /* Trylock fails */
 
@@ -3478,7 +3499,7 @@ static inline int futex_set_waiters_bit(u32 __user 
*uaddr, u32 *puval)
 static int futex_spin_on_owner(u32 __user *uaddr, u32 vpid,
   struct futex_state *state)
 {
-   int ret;
+   int ret, loop = TP_SPIN_THRESHOLD;
u32 uval;
u32 owner_pid = 0;
struct 

[PATCH v3 08/13] futex: Enable robust handling of TP futexes

2016-09-30 Thread Waiman Long
The TP futexes don't have code to handle the death of futex
owners. There are 2 different cases that need to be considered.

As top waiter gets a reference to the task structure of the futex
owner, the task structure will never go away even if the owner dies.
When the futex owner died while the top waiter is spinning, the task
structure will be marked dead or the pid won't have a matching task
structure if the task died before a reference is taken. Alternatively,
if robust futex attribute is enabled, the FUTEX_OWNER_DIED bit of
the futex word may be set. In all those cases, what the top waiter
need to do is to grab the futex directly. An informational message
will also printed to highlight this event.

If the futex owner died while the top waiter is sleeping, we need to
make the exit processing code to wake up the top waiter. This is done
by chaining the futex state object into the pi_state_list of the futex
owner before the top waiter sleeps so that if exit_pi_state_list()
is called, the wakeup will happen. The top waiter needs to remove
its futex state object from the pi_state_list of the old owner if
the ownership changes hand.

Signed-off-by: Waiman Long 
---
 kernel/futex.c |   82 +++
 1 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index bfe17a9..5fa9a10 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -973,7 +973,7 @@ static struct task_struct * futex_find_get_task(pid_t pid)
 }
 
 /*
- * This task is holding PI mutexes at exit time => bad.
+ * This task is holding PI or TP mutexes at exit time => bad.
  * Kernel cleans up PI-state, but userspace is likely hosed.
  * (Robust-futex cleanup is separate and might save the day for userspace.)
  */
@@ -990,12 +990,29 @@ void exit_pi_state_list(struct task_struct *curr)
 * We are a ZOMBIE and nobody can enqueue itself on
 * pi_state_list anymore, but we have to be careful
 * versus waiters unqueueing themselves:
+*
+* For TP futexes, the only purpose of showing up in the
+* pi_state_list is for this function to wake up the serialization
+* mutex owner (state->owner). We don't actually need to take the
+* HB lock. The futex state and task struct won't go away as long
+* as we hold the pi_lock.
 */
raw_spin_lock_irq(>pi_lock);
while (!list_empty(head)) {
 
next = head->next;
pi_state = list_entry(next, struct futex_state, list);
+
+   if (pi_state->type == TYPE_TP) {
+   struct task_struct *owner = READ_ONCE(pi_state->owner);
+
+   WARN_ON(list_empty(_state->list));
+   list_del_init(_state->list);
+   if (owner)
+   wake_up_process(owner);
+   continue;
+   }
+
key = pi_state->key;
hb = hash_futex();
raw_spin_unlock_irq(>pi_lock);
@@ -3152,8 +3169,8 @@ retry:
goto retry;
 
/*
-* Wake robust non-PI futexes here. The wakeup of
-* PI futexes happens in exit_pi_state():
+* Wake robust wait-wake futexes here. The wakeup of
+* PI and TP futexes happens in exit_pi_state():
 */
if (!pi && (uval & FUTEX_WAITERS))
futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
@@ -3295,6 +3312,12 @@ void exit_robust_list(struct task_struct *curr)
  * Unlike the other futexes, the futex_q structures aren't used. Instead,
  * they will queue up in the serialization mutex of the futex state container
  * queued in the hash bucket.
+ *
+ * To handle the exceptional case that the futex owner died, the robust
+ * futexes list mechanism is used to for waking up sleeping top waiter.
+ * Checks are also made in the futex_spin_on_owner() loop for dead task
+ * structure or invalid pid. In both cases, the top waiter will take over
+ * the ownership of the futex.
  */
 
 /**
@@ -3459,6 +3482,7 @@ static int futex_spin_on_owner(u32 __user *uaddr, u32 
vpid,
u32 uval;
u32 owner_pid = 0;
struct task_struct *owner_task = NULL;
+   bool on_owner_pi_list = false;
 
WRITE_ONCE(state->owner, current);
preempt_disable();
@@ -3468,13 +3492,47 @@ static int futex_spin_on_owner(u32 __user *uaddr, u32 
vpid,
break;
 
if ((uval & FUTEX_TID_MASK) != owner_pid) {
-   if (owner_task)
+   if (owner_task) {
+   /*
+* task_pi_list_del() should always be
+* done before put_task_struct(). The futex
+* state may have been dequeued if the task
+

[PATCH v3 04/13] futex: Consolidate pure pi_state_list add & delete codes to helpers

2016-09-30 Thread Waiman Long
Two new helper functions (task_pi_list_add & task_pi_list_del)
are created to consolidate all the pure pi_state_list addition and
insertion codes. The set_owner argument in task_pi_list_add() will
be needed in a later patch.

Signed-off-by: Waiman Long 
---
 kernel/futex.c |   67 +++-
 1 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index ebe59fa..0d1b1be 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -813,6 +813,42 @@ static inline int get_futex_value(u32 *dest, u32 __user 
*from)
 /*
  * PI code:
  */
+
+/**
+ * task_pi_list_add - add futex state object to a task's pi_state_list
+ * @task : task structure
+ * @state: futex state object
+ * @set_owner: set futex state owner to the given task if true
+ */
+static inline void task_pi_list_add(struct task_struct *task,
+   struct futex_state *state,
+   const bool set_owner)
+{
+   raw_spin_lock_irq(>pi_lock);
+   WARN_ON(!list_empty(>list));
+   list_add(>list, >pi_state_list);
+   if (set_owner)
+   state->owner = task;
+   raw_spin_unlock_irq(>pi_lock);
+}
+
+/**
+ * task_pi_list_del - delete futex state object from a task's pi_state_list
+ * @task : task structure
+ * @state: futex state object
+ * @warn : warn if list is empty when set
+ */
+static inline void task_pi_list_del(struct task_struct *task,
+   struct futex_state *state,
+   const bool warn)
+{
+   raw_spin_lock_irq(>pi_lock);
+   if (warn)
+   WARN_ON(list_empty(>list));
+   list_del_init(>list);
+   raw_spin_unlock_irq(>pi_lock);
+}
+
 static int refill_futex_state_cache(void)
 {
struct futex_state *state;
@@ -865,9 +901,7 @@ static void put_futex_state(struct futex_state *state)
 * and has cleaned up the futex state already
 */
if (state->owner) {
-   raw_spin_lock_irq(>owner->pi_lock);
-   list_del_init(>list);
-   raw_spin_unlock_irq(>owner->pi_lock);
+   task_pi_list_del(state->owner, state, false);
 
rt_mutex_proxy_unlock(>pi_mutex, state->owner);
}
@@ -1388,16 +1422,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, 
struct futex_q *this,
return ret;
}
 
-   raw_spin_lock(_state->owner->pi_lock);
-   WARN_ON(list_empty(_state->list));
-   list_del_init(_state->list);
-   raw_spin_unlock(_state->owner->pi_lock);
-
-   raw_spin_lock(_owner->pi_lock);
-   WARN_ON(!list_empty(_state->list));
-   list_add(_state->list, _owner->pi_state_list);
-   pi_state->owner = new_owner;
-   raw_spin_unlock(_owner->pi_lock);
+   task_pi_list_del(pi_state->owner, pi_state, true);
+   task_pi_list_add(new_owner, pi_state, true);
 
raw_spin_unlock_irq(_state->pi_mutex.wait_lock);
 
@@ -2202,19 +2228,10 @@ retry:
 * We fixed up user space. Now we need to fix the pi_state
 * itself.
 */
-   if (pi_state->owner != NULL) {
-   raw_spin_lock_irq(_state->owner->pi_lock);
-   WARN_ON(list_empty(_state->list));
-   list_del_init(_state->list);
-   raw_spin_unlock_irq(_state->owner->pi_lock);
-   }
+   if (pi_state->owner != NULL)
+   task_pi_list_del(pi_state->owner, pi_state, true);
 
-   pi_state->owner = newowner;
-
-   raw_spin_lock_irq(>pi_lock);
-   WARN_ON(!list_empty(_state->list));
-   list_add(_state->list, >pi_state_list);
-   raw_spin_unlock_irq(>pi_lock);
+   task_pi_list_add(newowner, pi_state, true);
return 0;
 
/*
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 03/13] futex: Add helpers to get & cmpxchg futex value without lock

2016-09-30 Thread Waiman Long
Two new helper functions cmpxchg_futex_value() and get_futex_value()
are added to access and change the futex value without the hash
bucket lock.  As a result, page fault is enabled and the page will
be faulted in if not present yet.

Signed-off-by: Waiman Long 
---
 kernel/futex.c |   15 +++
 1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index f4e20ad..ebe59fa 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -794,6 +794,21 @@ static int get_futex_value_locked(u32 *dest, u32 __user 
*from)
return ret ? -EFAULT : 0;
 }
 
+/*
+ * The equivalents of the above cmpxchg_futex_value_locked() and
+ * get_futex_value_locked which are called without the hash bucket lock
+ * and so can have page fault enabled.
+ */
+static inline int cmpxchg_futex_value(u32 *curval, u32 __user *uaddr,
+ u32 uval, u32 newval)
+{
+   return futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
+}
+
+static inline int get_futex_value(u32 *dest, u32 __user *from)
+{
+   return __get_user(*dest, from);
+}
 
 /*
  * PI code:
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 10/13] futex: Inform FUTEX_LOCK callers on how the lock is acquired

2016-09-30 Thread Waiman Long
As there are three different ways for a TP futex waiter in the kernel
to acquire the lock. It may be useful to pass this information out so
that user space has a better view of what is happening in the kernel.
With this change, different non-negative values will now be returned
depending on how the lock is acquired in the kernel.

Signed-off-by: Waiman Long 
---
 kernel/futex.c |   30 ++
 1 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index d260410..1219f32 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3337,7 +3337,17 @@ void exit_robust_list(struct task_struct *curr)
 #define TP_SPIN_THRESHOLD   (1 << 14)
 #define TP_SLEEP_DECREMENT  (TP_SPIN_THRESHOLD/64)
 
-/**
+/*
+ * futex_lock() returned values to identify how the lock is acquired:
+ * 0 - steals the lock
+ * 1 - top waiter (mutex owner) acquires the lock
+ * 2 - handed off the lock
+ */
+#define TP_LOCK_STOLEN 0
+#define TP_LOCK_ACQUIRED   1
+#define TP_LOCK_HANDOFF2
+
+ /**
  * lookup_futex_state - Looking up the futex state structure.
  * @hb: hash bucket
  * @key:futex key
@@ -3420,9 +3430,11 @@ static inline int put_futex_state_unlocked(struct 
futex_state *state)
  *   preserve the flag bits
  * endif
  *
- * Return: 1 if lock acquired;
+ * Return: TP_LOCK_ACQUIRED if lock acquired;
+ *TP_LOCK_HANDOFF if lock was handed off;
  *0 if lock acquisition failed;
  *-EFAULT if an error happened.
+ **puval will contain the latest futex value when trylock fails.
  */
 static inline int futex_trylock(u32 __user *uaddr, u32 vpid, u32 *puval,
const bool waiter)
@@ -3435,7 +3447,7 @@ static inline int futex_trylock(u32 __user *uaddr, u32 
vpid, u32 *puval,
*puval = uval;
 
if (waiter && (uval & FUTEX_TID_MASK) == vpid)
-   return 1;
+   return TP_LOCK_HANDOFF;
 
if (uval & FUTEX_TID_MASK)
return 0;   /* Trylock fails */
@@ -3446,7 +3458,7 @@ static inline int futex_trylock(u32 __user *uaddr, u32 
vpid, u32 *puval,
if (unlikely(cmpxchg_futex_value(puval, uaddr, uval, vpid|flags)))
return -EFAULT;
 
-   return *puval == uval;
+   return (*puval == uval) ? TP_LOCK_ACQUIRED : 0;
 }
 
 /**
@@ -3659,8 +3671,10 @@ efault:
  * This function is not inlined so that it can show up separately in perf
  * profile for performance analysis purpose.
  *
- * Return: 0   - lock acquired
- *< 0 - an error happens
+ * Return: TP_LOCK_ACQUIRED - lock acquired normally
+ *TP_LOCK_HANDOFF  - lock handed off directly from unlocker
+ *TP_LOCK_STOLEN   - lock stolen
+ *< 0  - an error happens
  */
 static noinline int futex_lock(u32 __user *uaddr, unsigned int flags)
 {
@@ -3676,7 +3690,7 @@ static noinline int futex_lock(u32 __user *uaddr, 
unsigned int flags)
 */
ret = futex_trylock(uaddr, vpid, , false);
if (ret)
-   goto out;
+   return (ret < 0) ? ret : TP_LOCK_STOLEN;
 
/*
 * Detect deadlocks.
@@ -3750,7 +3764,7 @@ out_put_state_key:
put_futex_key();
 
 out:
-   return (ret < 0) ? ret : 0;
+   return ret;
 }
 
 /*
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 00/13] futex: Introducing throughput-optimized futexes

2016-09-30 Thread Waiman Long
 v2->v3:
  - Use the abbreviation TP for the new futexes instead of TO.
  - Make a number of changes accordingly to review comments from
ThomasG, PeterZ and MikeG.
  - Breaks the main futex patch into smaller pieces to make them easier
to review.
  - Integrate the microbenchmark into the "perf bench futex" tool
so that others can use it too.

 v1->v2:
  - Adds an explicit lock hand-off mechanism.
  - Adds timeout support.
  - Simplifies the required userspace code.
  - Fixes a number of problems in the v1 code.

This patchset introduces a new futex implementation called
throughput-optimized (TP) futexes. It is similar to PI futexes in its
calling convention, but provides better throughput than the wait-wake
futexes by encouraging more lock stealing and optimistic spinning
especially when the lock holders don't sleep and the critical sections
are not very short.

A manpage patch that documents changes to the futex(2) system call
will be sent out separately.

Patch 1 consolidates the duplicated timer setup code.

Patch 2 renames the futex_pi_state structure to futex_state as it
will no longer be specific to the PI futexes. Some futex_pi_state
related functions are also renamed.

Patch 3 adds some helper functions to be used in later patches.

Patch 4 consolidates the codes that need to add or delete futex_state
to pi_state_list.

Patch 5 adds a new type field to the futex_state structure.

Patch 6 changes the futex_hash_bucket to include another list for
futex state objects and a spinlock to guard the new list.

Patch 7 is the core of this patchset and introduction of the new
TP futexes.

Patch 8 enables more robust handling when the futex owners died
unexpectedly.

Patch 9 implements the lock handoff mechanism to prevent lock
starvation.

Patch 10 enables the FUTEX_LOCK call to return a status code to show
how the lock is being acquired.

Patch 11 adds code support the specification of a timeout value in
the FUTEX_LOCK call.

Patch 12 adds a new documentation file about the TP futexes.

Patch 13 adds the futex microbenchmark that was used to produce the
performance data to the "perf bench futex" tool. Others can then use
it to do their own evaluation.

Waiman Long (13):
  futex: Consolidate duplicated timer setup code
  futex: Rename futex_pi_state to futex_state
  futex: Add helpers to get & cmpxchg futex value without lock
  futex: Consolidate pure pi_state_list add & delete codes to helpers
  futex: Add a new futex type field into futex_state
  futex: Allow direct attachment of futex_state objects to hash bucket
  futex: Throughput-optimized (TP) futexes
  futex: Enable robust handling of TP futexes
  futex: Implement lock handoff for TP futexes to prevent lock
starvation
  futex: Inform FUTEX_LOCK callers on how the lock is acquired
  futex: Add timeout support to TP futexes
  futex, doc: TP futexes document
  perf bench: New microbenchmark for userspace mutex performance

 Documentation/00-INDEX |2 +
 Documentation/tp-futex.txt |  147 +++
 include/linux/sched.h  |4 +-
 include/uapi/linux/futex.h |4 +
 kernel/futex.c |  943 +++-
 tools/perf/bench/Build |1 +
 tools/perf/bench/bench.h   |1 +
 tools/perf/bench/futex-mutex.c |  558 
 tools/perf/bench/futex.h   |   25 +
 tools/perf/builtin-bench.c |4 +
 10 files changed, 1575 insertions(+), 114 deletions(-)
 create mode 100644 Documentation/tp-futex.txt
 create mode 100644 tools/perf/bench/futex-mutex.c

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 13/13] perf bench: New microbenchmark for userspace mutex performance

2016-09-30 Thread Waiman Long
This microbenchmark simulates how the use of different futex types
can affect the actual performanace of userspace mutex locks. The
usage is:

perf bench futex mutex 

Three sets of simple mutex lock and unlock functions are implemented
using the wait-wake, PI and TP futexes respectively. This
microbenchmark then runs the locking rate measurement tests using
either one of those mutexes or all of them consecutively.

Signed-off-by: Waiman Long 
---
 tools/perf/bench/Build |1 +
 tools/perf/bench/bench.h   |1 +
 tools/perf/bench/futex-mutex.c |  558 
 tools/perf/bench/futex.h   |   25 ++
 tools/perf/builtin-bench.c |4 +
 5 files changed, 589 insertions(+), 0 deletions(-)
 create mode 100644 tools/perf/bench/futex-mutex.c

diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 60bf119..dc6dfc1 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -6,6 +6,7 @@ perf-y += futex-wake.o
 perf-y += futex-wake-parallel.o
 perf-y += futex-requeue.o
 perf-y += futex-lock-pi.o
+perf-y += futex-mutex.o
 
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
 perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 579a592..b0632df 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -36,6 +36,7 @@ int bench_futex_wake_parallel(int argc, const char **argv, 
const char *prefix);
 int bench_futex_requeue(int argc, const char **argv, const char *prefix);
 /* pi futexes */
 int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
+int bench_futex_mutex(int argc, const char **argv, const char *prefix);
 
 #define BENCH_FORMAT_DEFAULT_STR   "default"
 #define BENCH_FORMAT_DEFAULT   0
diff --git a/tools/perf/bench/futex-mutex.c b/tools/perf/bench/futex-mutex.c
new file mode 100644
index 000..d48e198
--- /dev/null
+++ b/tools/perf/bench/futex-mutex.c
@@ -0,0 +1,558 @@
+/*
+ * Copyright (C) 2016 Waiman Long
+ *
+ * This microbenchmark simulates how the use of different futex types can
+ * affect the actual performanace of userspace locking primitives like mutex.
+ *
+ * The raw throughput of the futex lock and unlock calls is not a good
+ * indication of actual throughput of the mutex code as it may not really
+ * need to call into the kernel. Therefore, 3 simple mutex lock and unlock
+ * functions are written to implenment a mutex lock using the wait-wake,
+ * PI and TP futexes respectively. These functions serve as the basis for
+ * measuring the locking throughput.
+ */
+
+#include 
+
+#include 
+#include 
+#include "../util/stat.h"
+#include "../perf-sys.h"
+#include 
+#include 
+#include 
+#include 
+#include "bench.h"
+#include "futex.h"
+
+#include 
+#include 
+#include 
+
+#definegettid()syscall(SYS_gettid)
+#define __cacheline_aligned__attribute__((__aligned__(64)))
+
+typedef u32 futex_t;
+typedef void (*mutex_lock_fn_t)(futex_t *futex, int tid);
+typedef void (*mutex_unlock_fn_t)(futex_t *futex, int tid);
+
+
+struct worker {
+   futex_t *futex;
+   pthread_t thread;
+
+   /*
+* Per-thread operation statistics
+*/
+   unsigned int ops;   /* # of locking operations  */
+   unsigned int locks; /* # of lock futex calls*/
+   unsigned int unlocks;   /* # of unlock futex calls  */
+   unsigned int eagains;   /* # of EAGAIN errors   */
+   unsigned int lockerrs;  /* # of other lock errors   */
+   unsigned int unlockerrs;/* # of unlock errors   */
+   unsigned int wakeups;   /* # of wakeups (unlock return) */
+   unsigned int handoffs;  /* # of lock handoff (TP only)  */
+   unsigned int steals;/* # of lock steals (TP only)   */
+} __cacheline_aligned;
+
+/*
+ * Global cache-aligned futex
+ */
+static futex_t global_futex __cacheline_aligned;
+
+static __thread futex_t thread_id; /* Thread ID */
+static __thread int counter;   /* Sleep counter */
+
+static struct worker *worker;
+static unsigned int nsecs = 10;
+static bool verbose, done, fshared, exit_now;
+static unsigned int ncpus, nthreads;
+static int futex_flag;
+static const char *ftype = "all";
+static int csload = 1;
+static int wratio;
+struct timeval start, end, runtime;
+static unsigned int worker_start;
+static unsigned int threads_starting;
+static struct stats throughput_stats;
+static mutex_lock_fn_t mutex_lock_fn;
+static mutex_unlock_fn_t mutex_unlock_fn;
+
+/*
+ * CS - Lock critical section
+ */
+static const struct option options[] = {
+   OPT_STRING  ('f', "futex-type", , "type", "Specify futex type: 
WW, PI, TP, all (default)"),
+   OPT_INTEGER ('l', "load",   ,   "Specify # of cpu_relax's 
inside CS, default = 1"),
+   OPT_UINTEGER('t', "threads",, "Specify number of threads, 
default = # of CPUs"),
+   OPT_UINTEGER('r', "runtime",,

[PATCH v3 06/13] futex: Allow direct attachment of futex_state objects to hash bucket

2016-09-30 Thread Waiman Long
Currently, the futex state objects can only be located indirectly as

hash bucket => futex_q => futex state

Actually it can be beneficial in some cases to locate the futex state
object directly from the hash bucket without the futex_q middleman.
Therefore, a new list head to link the futex state objects as well
as a new spinlock to manage them are added to the hash bucket.

To limit size increase for UP systems, these new fields are only for
SMP machines where the cacheline alignment of the hash bucket leaves
it with enough empty space for the new fields.

Signed-off-by: Waiman Long 
---
 kernel/futex.c |   24 
 1 files changed, 24 insertions(+), 0 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index a496c01..646445f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -207,6 +207,11 @@ struct futex_state {
struct list_head list;
 
/*
+* Can link to fs_head in the owning hash bucket.
+*/
+   struct list_head fs_list;
+
+   /*
 * The PI object:
 */
struct rt_mutex pi_mutex;
@@ -262,11 +267,24 @@ static const struct futex_q futex_q_init = {
  * Hash buckets are shared by all the futex_keys that hash to the same
  * location.  Each key may have multiple futex_q structures, one for each task
  * waiting on a futex.
+ *
+ * Alternatively (in SMP), a key can be associated with a unique futex_state
+ * object where multiple waiters waiting for that futex can queue up in that
+ * futex_state object without using the futex_q structure. A separate
+ * futex_state lock (fs_lock) is used for processing those futex_state objects.
  */
 struct futex_hash_bucket {
atomic_t waiters;
spinlock_t lock;
struct plist_head chain;
+
+#ifdef CONFIG_SMP
+   /*
+* Fields for managing futex_state object list
+*/
+   spinlock_t fs_lock;
+   struct list_head fs_head;
+#endif
 } cacheline_aligned_in_smp;
 
 /*
@@ -867,6 +885,8 @@ static int refill_futex_state_cache(void)
return -ENOMEM;
 
INIT_LIST_HEAD(>list);
+   INIT_LIST_HEAD(>fs_list);
+
/* pi_mutex gets initialized later */
state->owner = NULL;
atomic_set(>refcount, 1);
@@ -3356,6 +3376,10 @@ static int __init futex_init(void)
atomic_set(_queues[i].waiters, 0);
plist_head_init(_queues[i].chain);
spin_lock_init(_queues[i].lock);
+#ifdef CONFIG_SMP
+   INIT_LIST_HEAD(_queues[i].fs_head);
+   spin_lock_init(_queues[i].fs_lock);
+#endif
}
 
return 0;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 01/13] futex: Consolidate duplicated timer setup code

2016-09-30 Thread Waiman Long
A new futex_setup_timer() helper function is added to consolidate all
the hrtimer_sleeper setup code.

Signed-off-by: Waiman Long 
---
 kernel/futex.c |   67 +++-
 1 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 46cb3a3..91724ec 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -468,6 +468,35 @@ static void drop_futex_key_refs(union futex_key *key)
 }
 
 /**
+ * futex_setup_timer - set up the sleeping hrtimer.
+ * @time:  ptr to the given timeout value
+ * @timeout:   the hrtimer_sleeper structure to be set up
+ * @flags: futex flags
+ * @range_ns:  optional range in ns
+ *
+ * Return: Initialized hrtimer_sleeper structure or NULL if no timeout
+ *value given
+ */
+static inline struct hrtimer_sleeper *
+futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
+ int flags, u64 range_ns)
+{
+   if (!time)
+   return NULL;
+
+   hrtimer_init_on_stack(>timer, (flags & FLAGS_CLOCKRT) ?
+ CLOCK_REALTIME : CLOCK_MONOTONIC,
+ HRTIMER_MODE_ABS);
+   hrtimer_init_sleeper(timeout, current);
+   if (range_ns)
+   hrtimer_set_expires_range_ns(>timer, *time, range_ns);
+   else
+   hrtimer_set_expires(>timer, *time);
+
+   return timeout;
+}
+
+/**
  * get_futex_key() - Get parameters which are the keys for a futex
  * @uaddr: virtual address of the futex
  * @fshared:   0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
@@ -2393,7 +2422,7 @@ out:
 static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
  ktime_t *abs_time, u32 bitset)
 {
-   struct hrtimer_sleeper timeout, *to = NULL;
+   struct hrtimer_sleeper timeout, *to;
struct restart_block *restart;
struct futex_hash_bucket *hb;
struct futex_q q = futex_q_init;
@@ -2403,17 +2432,8 @@ static int futex_wait(u32 __user *uaddr, unsigned int 
flags, u32 val,
return -EINVAL;
q.bitset = bitset;
 
-   if (abs_time) {
-   to = 
-
-   hrtimer_init_on_stack(>timer, (flags & FLAGS_CLOCKRT) ?
- CLOCK_REALTIME : CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS);
-   hrtimer_init_sleeper(to, current);
-   hrtimer_set_expires_range_ns(>timer, *abs_time,
-current->timer_slack_ns);
-   }
-
+   to = futex_setup_timer(abs_time, , flags,
+  current->timer_slack_ns);
 retry:
/*
 * Prepare to wait on uaddr. On success, holds hb lock and increments
@@ -2493,7 +2513,7 @@ static long futex_wait_restart(struct restart_block 
*restart)
 static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
 ktime_t *time, int trylock)
 {
-   struct hrtimer_sleeper timeout, *to = NULL;
+   struct hrtimer_sleeper timeout, *to;
struct futex_hash_bucket *hb;
struct futex_q q = futex_q_init;
int res, ret;
@@ -2501,13 +2521,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int 
flags,
if (refill_pi_state_cache())
return -ENOMEM;
 
-   if (time) {
-   to = 
-   hrtimer_init_on_stack(>timer, CLOCK_REALTIME,
- HRTIMER_MODE_ABS);
-   hrtimer_init_sleeper(to, current);
-   hrtimer_set_expires(>timer, *time);
-   }
+   to = futex_setup_timer(time, , FLAGS_CLOCKRT, 0);
 
 retry:
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, , VERIFY_WRITE);
@@ -2802,7 +2816,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, 
unsigned int flags,
 u32 val, ktime_t *abs_time, u32 bitset,
 u32 __user *uaddr2)
 {
-   struct hrtimer_sleeper timeout, *to = NULL;
+   struct hrtimer_sleeper timeout, *to;
struct rt_mutex_waiter rt_waiter;
struct rt_mutex *pi_mutex = NULL;
struct futex_hash_bucket *hb;
@@ -2816,15 +2830,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, 
unsigned int flags,
if (!bitset)
return -EINVAL;
 
-   if (abs_time) {
-   to = 
-   hrtimer_init_on_stack(>timer, (flags & FLAGS_CLOCKRT) ?
- CLOCK_REALTIME : CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS);
-   hrtimer_init_sleeper(to, current);
-   hrtimer_set_expires_range_ns(>timer, *abs_time,
-current->timer_slack_ns);
-   }
+   to = futex_setup_timer(abs_time, , flags,
+  current->timer_slack_ns);
 
/*
 * The waiter is allocated on our stack, manipulated by 

Re: [PATCH 0/2] Moving runnable code from Documentation (last 2 patches)

2016-09-30 Thread Shuah Khan
On 09/26/2016 12:40 PM, Shuah Khan wrote:
> This patch series contains the last 2 patches to complete moving runnable
> code from Documentation to selftests, samples, and tools.
> 
> The first patch moves blackfin gptimers-example to samples and removes
> CONFIG_BUILD_DOCSRC.
> 
> The second one updates 00-INDEX files under Documentation to reflect the
> move of runnable code from Documentation.
> 
> Shuah Khan (2):
>   samples: move blackfin gptimers-example from Documentation
>   Doc: update 00-INDEX files to reflect the runnable code move
> 

Hi Jon and Michal,

Do these patches look good to you? I can get these into 4.9-rc1
with your Ack.

thanks,
-- Shuah

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] doc: convert 80211 DocBook to RST

2016-09-30 Thread Florian Margaine
Off-commit comment: this is a line-by-line, title-by-title,
section-by-section, comment-by-comment conversion of the DocBook
80211.tmpl file to RST.

The 80211.tmpl file has also been deleted, and removed from the
DocBook/Makefile list -- I'm not sure this should be done.

I'm sure that reorganization is something planned too, but a first
conversion away from DocBook is a good idea anyway, and I'd need to
know much more about the topic at hand to do the reorganization.

Regards,
-- 
Florian Margaine


pgp66UbS6Tztv.pgp
Description: OpenPGP digital signature


[PATCH] doc: convert 80211 DocBook to RST

2016-09-30 Thread Florian Margaine
Signed-off-by: Florian Margaine 

---
 .../actions-and-configuration.rst  |   9 +
 .../80211/cfg80211-subsystem/data-path-helpers.rst |   9 +
 .../cfg80211-subsystem/device-registration.rst |   9 +
 Documentation/80211/cfg80211-subsystem/index.rst   |  14 +
 .../regulatory-enforcement-infrastructure.rst  |   9 +
 .../cfg80211-subsystem/rfkill-integration.rst  |   9 +
 .../scanning-and-bss-list-handling.rst |   9 +
 .../80211/cfg80211-subsystem/test-mode.rst |   9 +
 .../80211/cfg80211-subsystem/utility-functions.rst |   9 +
 ...reless-80211-networking-in-the-linux-kernel.rst |  47 ++
 Documentation/80211/index.rst  |   9 +
 .../advanced/access-point-mode-support.rst |  19 +
 .../advanced/aggregation.rst   |  18 +
 .../advanced/beacon-filter-support.rst |   9 +
 .../advanced/hardware-crypto-acceleration.rst  |   9 +
 .../advanced/hardware-scan-offload.rst |   8 +
 .../mac80211-developers-guide/advanced/index.rst   |  21 +
 .../advanced/led-support.rst   |  11 +
 .../advanced/multiple-queues-and-qos-support.rst   |   8 +
 .../advanced/powersave-support.rst |   6 +
 .../advanced/spatial-multiplexing-powersave.rst|   9 +
 .../advanced/station-handling.rst  |   8 +
 .../supporting-multiple-virtual-interfaces.rst |  14 +
 .../basic/basic-hardware-handling.rst  |  22 +
 .../basic/frame-filtering.rst  |   9 +
 .../mac80211-developers-guide/basic/index.rst  |  20 +
 .../basic/phy-configuration.rst|  11 +
 .../basic/receive-and-transmit-processing.rst  |  35 ++
 .../basic/the-mac80211-workqueue.rst   |   9 +
 .../basic/virtual-interfaces.rst   |  20 +
 .../80211/mac80211-developers-guide/index.rst  |  22 +
 .../internals/aggregation.rst  |   6 +
 .../mac80211-developers-guide/internals/index.rst  |  16 +
 .../internals/key-handling.rst |  14 +
 .../internals/receive-processing.rst   |   5 +
 .../internals/station-info-handling.rst|  15 +
 .../internals/synchronisation.rst  |   7 +
 .../internals/transmit-processing.rst  |   3 +
 .../rate-control/index.rst |  12 +
 .../rate-control/rate-control-api.rst  |   8 +
 Documentation/DocBook/80211.tmpl   | 584
-
Documentation/DocBook/Makefile |   2 +-
Documentation/index.rst|   1 + 43 files
changed, 518 insertions(+), 585 deletions(-) create mode 100644
Documentation/80211/cfg80211-subsystem/actions-and-configuration.rst
create mode 100644
Documentation/80211/cfg80211-subsystem/data-path-helpers.rst create
mode 100644
Documentation/80211/cfg80211-subsystem/device-registration.rst create
mode 100644 Documentation/80211/cfg80211-subsystem/index.rst create
mode 100644
Documentation/80211/cfg80211-subsystem/regulatory-enforcement-infrastructure.rst
create mode 100644
Documentation/80211/cfg80211-subsystem/rfkill-integration.rst create
mode 100644
Documentation/80211/cfg80211-subsystem/scanning-and-bss-list-handling.rst
create mode 100644 Documentation/80211/cfg80211-subsystem/test-mode.rst
create mode 100644
Documentation/80211/cfg80211-subsystem/utility-functions.rst create
mode 100644
Documentation/80211/explaining-wireless-80211-networking-in-the-linux-kernel.rst
create mode 100644 Documentation/80211/index.rst create mode 100644
Documentation/80211/mac80211-developers-guide/advanced/access-point-mode-support.rst
create mode 100644
Documentation/80211/mac80211-developers-guide/advanced/aggregation.rst
create mode 100644
Documentation/80211/mac80211-developers-guide/advanced/beacon-filter-support.rst
create mode 100644
Documentation/80211/mac80211-developers-guide/advanced/hardware-crypto-acceleration.rst
create mode 100644
Documentation/80211/mac80211-developers-guide/advanced/hardware-scan-offload.rst
create mode 100644
Documentation/80211/mac80211-developers-guide/advanced/index.rst create
mode 100644
Documentation/80211/mac80211-developers-guide/advanced/led-support.rst
create mode 100644
Documentation/80211/mac80211-developers-guide/advanced/multiple-queues-and-qos-support.rst
create mode 100644
Documentation/80211/mac80211-developers-guide/advanced/powersave-support.rst
create mode 100644
Documentation/80211/mac80211-developers-guide/advanced/spatial-multiplexing-powersave.rst
create mode 100644
Documentation/80211/mac80211-developers-guide/advanced/station-handling.rst
create mode 100644
Documentation/80211/mac80211-developers-guide/advanced/supporting-multiple-virtual-interfaces.rst
create mode 100644
Documentation/80211/mac80211-developers-guide/basic/basic-hardware-handling.rst
create mode 100644

Re: md/dm-crypt: Rename a jump label in crypt_message() ?

2016-09-30 Thread SF Markus Elfring
> If you continue discussing after that point,

I guess that such a condition is not needed.


> then you make a clear statement that it isn't an accident.

I hope that most of my software development activities are not "an accident".
Is the intent for any update suggestion (like the renaming of a jump label
in this case) reasonable to some degree?


> You are deliberately wasting their time.

I imagine that I do not really try to "waste" others time. But I am trying
also to change some "things".
There are circumstances when these contributions are interpreted as "wasted 
efforts".
Is the change acceptance usually higher for other update patterns?


> Something along "I will not listen.

I am listening while my responses might not fit to your current expectations.


> I will not change.

I have got also some personal change opportunities.


> Nothing you tell me will ever make it worth your time to do so"?

While you can be so clear about a rejection for this software module at the 
moment,
other contributors showed occasionally more positive information.

Regards,
Markus
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: utime accounting regression since 4.6 (was: Re: [PACTH v2 0/3] Implement /proc//totmaps)

2016-09-30 Thread Rik van Riel
On Fri, 2016-09-30 at 11:49 +0200, Michal Hocko wrote:
> [CC Mike and Mel as they have seen some accounting oddities
>  when doing performance testing. They can share details but
>  essentially the system time just gets too high]
> 
> For your reference the email thread started
> http://lkml.kernel.org/r/20160823143330.gl23...@dhcp22.suse.cz
> 
> I suspect this is mainly for short lived processes - like kernel
> compile
> $ /usr/bin/time -v make mm/mmap.o
> [...]
> User time (seconds): 0.45
> System time (seconds): 0.82
> Percent of CPU this job got: 111%
> Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.14
> $ rm mm/mmap.o
> $ /usr/bin/time -v make mm/mmap.o
> [...]
> User time (seconds): 0.47
> System time (seconds): 1.55
> Percent of CPU this job got: 107%
> Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.88

I was not able to get the "expected" results from
your last reproducer, but this one does happen on
my system, too.

The bad news is, I still have no clue what is causing
it...

-- 
All Rights Reversed.

signature.asc
Description: This is a digitally signed message part


Re: md/dm-crypt: Rename a jump label in crypt_message() ?

2016-09-30 Thread SF Markus Elfring
> I don't like out labels, but that's my opinion.

Thanks for this acknowledgement that you have still got a special opinion
about such an identifier.

 
> There is nothing in CodingStyle which says you can't do it.

Does the terse description there try to suggest also to choose
better identifiers for source code places?

Does the meaning of such a coding style specification include also
the selection of a more pleasing identifier than "error"
for this software module?

Regards,
Markus
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: md/dm-crypt: Rename a jump label in crypt_message() ?

2016-09-30 Thread Bjørn Mork
SF Markus Elfring  writes:

>> When someone tells you that you are wasting their time,
>
> This information can be useful to some degree

Yes.  If you continue discussing after that point, then you make a clear
statement that it isn't an accident.  You are deliberately wasting their
time.

A lot of people already know this.  But you're right that it would be
useful to make it even clearer.  Maybe you could add a note about it to
each patch?  Something along "I will not listen.  I will not change.
Nothing you tell me will ever make it worth your time to do so"?

Just an idea...



Bjørn
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: md/dm-crypt: Rename a jump label in crypt_message() ?

2016-09-30 Thread Dan Carpenter
On Fri, Sep 30, 2016 at 01:32:23PM +0200, SF Markus Elfring wrote:
> > I specifically did not say that "out:" or "error:" labels are bad names.
> 
> Did you inform me once that you had also a special opinion about an identifier
> like "out"?

I don't like out labels, but that's my opinion.  There is nothing in
CodingStyle which says you can't do it.

regards,
dan carpenter

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: md/dm-crypt: Rename a jump label in crypt_message() ?

2016-09-30 Thread SF Markus Elfring
> When someone tells you that you are wasting their time,

This information can be useful to some degree


> then that is not a subject for further discussion.

I got an other impression. I guess that there are constraints for such a 
response
which can become interesting for further considerations.

Is it just usual that other software changes are more welcome?

Regards,
Markus
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: md/dm-crypt: Rename a jump label in crypt_message() ?

2016-09-30 Thread Bjørn Mork
SF Markus Elfring  writes:

>> go around bothering everyone with waste of time cleanup patches.
>
> I find it still debatable if the shown software development efforts
> are really "wasted".

When someone tells you that you are wasting their time, then that is not
a subject for further discussion.


Bjørn
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: md/dm-crypt: Rename a jump label in crypt_message() ?

2016-09-30 Thread SF Markus Elfring
> Again, I wrote the paragraph in CodingStyle.

This is obvious from the corresponding commit "add some more error
handling guidelines" (ea04036032edda6f771c1381d03832d2ed0f6c31
on 2014-12-02).


> I just said that it's a good idea to think about label names

I agree also to such a desire.


> instead of using GW-BASIC style numbered labels,

Is this kind of wording another weakness in the discussed document?
How many guidance do programmers get from such a software specification?

I came a few source code places along where I proposed corresponding changes.


> I didn't say

You did not say anything about some details as it is often easier to express
several aspects in vague and general terms.


> go around bothering everyone with waste of time cleanup patches.

I find it still debatable if the shown software development efforts
are really "wasted".

It seems that also the Linux development community is mixed about
related interpretations.


> I specifically did not say that "out:" or "error:" labels are bad names.

Did you inform me once that you had also a special opinion about an identifier
like "out"?

The C compiler will accept them as usual. But do we occasionally prefer
to express implementation details a bit better there?


> Those are common style in the kernel.

* Which impressions can you get from a statement like "goto fail;"
  or "goto error;"?

* Do any exception handling implementations should be reconsidered
  at such places?


> Please stop sending these patches.

Could it happen that the change acceptance will increase also for
the suggested renaming of jump labels if maintainers from other subsystems
would dare to respond once more in a positive way for such a software 
refactoring?

Regards,
Markus
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: utime accounting regression since 4.6 (was: Re: [PACTH v2 0/3] Implement /proc//totmaps)

2016-09-30 Thread Michal Hocko
[CC Mike and Mel as they have seen some accounting oddities
 when doing performance testing. They can share details but
 essentially the system time just gets too high]

For your reference the email thread started
http://lkml.kernel.org/r/20160823143330.gl23...@dhcp22.suse.cz

I suspect this is mainly for short lived processes - like kernel compile
$ /usr/bin/time -v make mm/mmap.o
[...]
User time (seconds): 0.45
System time (seconds): 0.82
Percent of CPU this job got: 111%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.14
$ rm mm/mmap.o
$ /usr/bin/time -v make mm/mmap.o
[...]
User time (seconds): 0.47
System time (seconds): 1.55
Percent of CPU this job got: 107%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.88

This is quite unexpected for a cache hot compile. I would expect most of
the time being spent in userspace.

$ perf report | grep kernel.vmlinux
 2.01%  as[kernel.vmlinux] [k] page_fault
 0.59%  cc1   [kernel.vmlinux] [k] page_fault
 0.15%  git   [kernel.vmlinux] [k] page_fault
 0.12%  bash  [kernel.vmlinux] [k] page_fault
 0.11%  sh[kernel.vmlinux] [k] page_fault
 0.08%  gcc   [kernel.vmlinux] [k] page_fault
 0.06%  make  [kernel.vmlinux] [k] page_fault
 0.04%  rm[kernel.vmlinux] [k] page_fault
 0.03%  ld[kernel.vmlinux] [k] page_fault
 0.02%  bash  [kernel.vmlinux] [k] entry_SYSCALL_64
 0.01%  git   [kernel.vmlinux] [k] entry_SYSCALL_64
 0.01%  cat   [kernel.vmlinux] [k] page_fault
 0.01%  collect2  [kernel.vmlinux] [k] page_fault
 0.00%  sh[kernel.vmlinux] [k] entry_SYSCALL_64
 0.00%  rm[kernel.vmlinux] [k] entry_SYSCALL_64
 0.00%  grep  [kernel.vmlinux] [k] page_fault

doesn't show anything unexpected.

Original Rik's reply follows:

On Tue 23-08-16 17:46:11, Rik van Riel wrote:
> On Tue, 2016-08-23 at 16:33 +0200, Michal Hocko wrote:
[...]
> > OK, so it seems I found it. I was quite lucky because
> > account_user_time
> > is not all that popular function and there were basically no changes
> > besides Riks ff9a9b4c4334 ("sched, time: Switch
> > VIRT_CPU_ACCOUNTING_GEN
> > to jiffy granularity") and that seems to cause the regression.
> > Reverting
> > the commit on top of the current mmotm seems to fix the issue for me.
> > 
> > And just to give Rik more context. While debugging overhead of the
> > /proc//smaps I am getting a misleading output from /usr/bin/time
> > -v
> > (source for ./max_mmap is [1])
> > 
> > root@test1:~# uname -r
> > 4.5.0-rc6-bisect1-00025-gff9a9b4c4334
> > root@test1:~# ./max_map 
> > pid:2990 maps:65515
> > root@test1:~# /usr/bin/time -v awk '/^Rss/{rss+=$2} /^Pss/{pss+=$2}
> > END {printf "rss:%d pss:%d\n", rss, pss}' /proc/2990/smaps
> > rss:263368 pss:262203
> > Command being timed: "awk /^Rss/{rss+=$2} /^Pss/{pss+=$2} END
> > {printf "rss:%d pss:%d\n", rss, pss} /proc/2990/smaps"
> > User time (seconds): 0.00
> > System time (seconds): 0.45
> > Percent of CPU this job got: 98%
> > 
> 
> > root@test1:~# /usr/bin/time -v awk '/^Rss/{rss+=$2} /^Pss/{pss+=$2}
> > END {printf "rss:%d pss:%d\n", rss, pss}' /proc/3015/smaps
> > rss:263316 pss:262199
> > Command being timed: "awk /^Rss/{rss+=$2} /^Pss/{pss+=$2} END
> > {printf "rss:%d pss:%d\n", rss, pss} /proc/3015/smaps"
> > User time (seconds): 0.18
> > System time (seconds): 0.29
> > Percent of CPU this job got: 97%
> 
> The patch in question makes user and system
> time accounting essentially tick-based. If
> jiffies changes while the task is in user
> mode, time gets accounted as user time, if
> jiffies changes while the task is in system
> mode, time gets accounted as system time.
> 
> If you get "unlucky", with a job like the
> above, it is possible all time gets accounted
> to system time.
> 
> This would be true both with the system running
> with a periodic timer tick (before and after my
> patch is applied), and in nohz_idle mode (after
> my patch).
> 
> However, it does seem quite unlikely that you
> get zero user time, since you have 125 timer
> ticks in half a second. Furthermore, you do not
> even have NO_HZ_FULL enabled...
> 
> Does the workload consistently get zero user
> time?
> 
> If so, we need to dig further to see under
> what precise circumstances that happens.
> 
> On my laptop, with kernel 4.6.3-300.fc24.x86_64
> I get this:
> 
> $ /usr/bin/time -v awk '/^Rss/{rss+=$2} /^Pss/{pss+=$2} END {printf
> "rss:%d pss:%d\n", rss, pss}' /proc/19825/smaps
> rss:263368 pss:262145
>   Command being timed: "awk /^Rss/{rss+=$2} /^Pss/{pss+=$2} END
> {printf "rss:%d pss:%d\n", rss, pss} /proc/19825/smaps"
>   User time (seconds): 0.64
>   System time (seconds): 0.19
>   Percent of CPU this 

Re: [PATCH v8 0/8] thunderbolt: Introducing Thunderbolt(TM) Networking

2016-09-30 Thread gre...@linuxfoundation.org
On Fri, Sep 30, 2016 at 08:37:36AM +, Levy, Amir (Jer) wrote:
> On Fri, Sep 30 2016, 09:40 AM, David Miller wrote:
> > From: Greg KH 
> > Date: Fri, 30 Sep 2016 08:30:05 +0200
> > 
> > > On Fri, Sep 30, 2016 at 01:55:55AM -0400, David Miller wrote:
> > >> From: Amir Levy 
> > >> Date: Wed, 28 Sep 2016 17:44:22 +0300
> > >>
> > >> > This driver enables Thunderbolt Networking on non-Apple platforms
> > >> > running Linux.
> > >>
> > >> Greg, any idea where this should get merged once fully vetted?  I can
> > >> take it through the net-next tree, but I'm fine with another more
> > >> appropriate tree taking it as well.
> > >
> > > I am supposed to be taking thunderbolt patches, but if this really is
> > > a network driver, it should go under drivers/net/ somewhere.  It needs
> > > more review though, it's not ready to go through anyone's tree just
> > > yet :)
> > >
> > > I'll let the thunderbolt maintainer go through it first before asking
> > > for a netdev review.
> > 
> > Ok, thanks Greg.
> 
> Greg, David,
> Andreas replied to similar request on patch v6:
> "This driver is independent from mine. It uses an interface provided
> by the firmware which is not present on Apple hardware and with which
> I am not familiar (also it does networking, not pci with which I am
> also not familiar). So I cannot comment on the driver itself. I don't
> mind a second driver, if that is what you are asking."

Yes, but I still need an ack from the thunderbolt maintainer that you
aren't doing anything foolish with that interface before I can take the
code.

> Note that Thunderbolt Networking is the first feature we would like to
> submit, but the next features aren't related to network, but more to
> Thunderbolt functionality. 

If this really is a real network device, it should probably live in
drivers/net/ like other network drivers.

> This is the reason I created the directory thunderbolt/icm, since the
> next features requires ICM to be enabled as well.

As long as you have ICM split out so that other drivers can use it, it
should be fine, no matter where in the tree it lives, right?

> I also followed the firewire as example that includes net.c (in
> drivers/firewire directory) along with other firewire functionality. 

That's the old-style of placing files.  We have moved the USB network
drivers out of drivers/usb/ a while ago.  The current thought is to
group drivers of specific types, not busses, together wherever possible,
as that is usually the majority of the logic in the driver (i.e. a USB
network driver has more network-driver specific logic than USB-specific
logic.)

Hope this helps explain things.  I'll get to your patches next week,
they are in my queue at the moment, but have conferences to deal with at
the moment.  Don't let my delay stop you from working on further "ICM"
drivers if needed, you can always send new series of patches that build
on this one when you have it ready.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v8 0/8] thunderbolt: Introducing Thunderbolt(TM) Networking

2016-09-30 Thread Levy, Amir (Jer)
On Fri, Sep 30 2016, 09:40 AM, David Miller wrote:
> From: Greg KH 
> Date: Fri, 30 Sep 2016 08:30:05 +0200
> 
> > On Fri, Sep 30, 2016 at 01:55:55AM -0400, David Miller wrote:
> >> From: Amir Levy 
> >> Date: Wed, 28 Sep 2016 17:44:22 +0300
> >>
> >> > This driver enables Thunderbolt Networking on non-Apple platforms
> >> > running Linux.
> >>
> >> Greg, any idea where this should get merged once fully vetted?  I can
> >> take it through the net-next tree, but I'm fine with another more
> >> appropriate tree taking it as well.
> >
> > I am supposed to be taking thunderbolt patches, but if this really is
> > a network driver, it should go under drivers/net/ somewhere.  It needs
> > more review though, it's not ready to go through anyone's tree just
> > yet :)
> >
> > I'll let the thunderbolt maintainer go through it first before asking
> > for a netdev review.
> 
> Ok, thanks Greg.

Greg, David,
Andreas replied to similar request on patch v6:
"This driver is independent from mine. It uses an interface provided by the 
firmware which is not present on Apple hardware and with which I am not 
familiar (also it does networking, not pci with which I am also not familiar). 
So I cannot comment on the driver itself. I don't mind a second driver, if that 
is what you are asking."

Note that Thunderbolt Networking is the first feature we would like to submit, 
but the next features aren't related to network, but more to Thunderbolt 
functionality. 
This is the reason I created the directory thunderbolt/icm, since the next 
features requires ICM to be enabled as well.
I also followed the firewire as example that includes net.c (in 
drivers/firewire directory) along with other firewire functionality. 

Thanks,
Amir

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v8 0/8] thunderbolt: Introducing Thunderbolt(TM) Networking

2016-09-30 Thread Greg KH
On Fri, Sep 30, 2016 at 01:55:55AM -0400, David Miller wrote:
> From: Amir Levy 
> Date: Wed, 28 Sep 2016 17:44:22 +0300
> 
> > This driver enables Thunderbolt Networking on non-Apple platforms
> > running Linux.
> 
> Greg, any idea where this should get merged once fully vetted?  I can
> take it through the net-next tree, but I'm fine with another more
> appropriate tree taking it as well.

I am supposed to be taking thunderbolt patches, but if this really is a
network driver, it should go under drivers/net/ somewhere.  It needs
more review though, it's not ready to go through anyone's tree just yet :)

I'll let the thunderbolt maintainer go through it first before asking
for a netdev review.

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html