[PATCH] mm/slub.c: fix sysfs filename in comment
/sys/kernel/slab/xx/defrag_ratio should be remote_node_defrag_ratio. Signed-off-by: Li Peng--- mm/slub.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 4dbb109e..6ef1540 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1735,11 +1735,11 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, * may return off node objects because partial slabs are obtained * from other nodes and filled up. * -* If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes -* defrag_ratio = 1000) then every (well almost) allocation will -* first attempt to defrag slab caches on other nodes. This means -* scanning over all nodes to look for partial slabs which may be -* expensive if we do it every time we are trying to find a slab +* If /sys/kernel/slab/xx/remote_node_defrag_ratio is set to 100 +* (which makes defrag_ratio = 1000) then every (well almost) +* allocation will first attempt to defrag slab caches on other nodes. +* This means scanning over all nodes to look for partial slabs which +* may be expensive if we do it every time we are trying to find a slab * with available objects. */ if (!s->remote_node_defrag_ratio || -- 1.8.3.1
[PATCH 4.5 022/101] bpf: fix check_map_func_compatibility logic
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Alexei Starovoitov[ Upstream commit 6aff67c85c9e5a4bc99e5211c1bac547936626ca ] The commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter") introduced clever way to check bpf_helper<->map_type compatibility. Later on commit a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") adjusted the logic and inadvertently broke it. Get rid of the clever bool compare and go back to two-way check from map and from helper perspective. Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") Reported-by: Jann Horn Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 53 ++ 1 file changed, 32 insertions(+), 21 deletions(-) --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -239,15 +239,6 @@ static const char * const reg_type_str[] [CONST_IMM] = "imm", }; -static const struct { - int map_type; - int func_id; -} func_limit[] = { - {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call}, - {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read}, - {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output}, -}; - static void print_verifier_state(struct verifier_env *env) { enum bpf_reg_type t; @@ -898,24 +889,44 @@ static int check_func_arg(struct verifie static int check_map_func_compatibility(struct bpf_map *map, int func_id) { - bool bool_map, bool_func; - int i; - if (!map) return 0; - for (i = 0; i < ARRAY_SIZE(func_limit); i++) { - bool_map = (map->map_type == func_limit[i].map_type); - bool_func = (func_id == func_limit[i].func_id); - /* only when map & func pair match it can continue. -* don't allow any other map type to be passed into -* the special func; -*/ - if (bool_func && bool_map != bool_func) - return -EINVAL; + /* We need a two way check, first is from map perspective ... */ + switch (map->map_type) { + case BPF_MAP_TYPE_PROG_ARRAY: + if (func_id != BPF_FUNC_tail_call) + goto error; + break; + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + if (func_id != BPF_FUNC_perf_event_read && + func_id != BPF_FUNC_perf_event_output) + goto error; + break; + default: + break; + } + + /* ... and second from the function itself. */ + switch (func_id) { + case BPF_FUNC_tail_call: + if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + goto error; + break; + case BPF_FUNC_perf_event_read: + case BPF_FUNC_perf_event_output: + if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) + goto error; + break; + default: + break; } return 0; +error: + verbose("cannot pass map_type %d into func %d\n", + map->map_type, func_id); + return -EINVAL; } static int check_call(struct verifier_env *env, int func_id)
[PATCH] mm/slub.c: fix sysfs filename in comment
/sys/kernel/slab/xx/defrag_ratio should be remote_node_defrag_ratio. Signed-off-by: Li Peng --- mm/slub.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 4dbb109e..6ef1540 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1735,11 +1735,11 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, * may return off node objects because partial slabs are obtained * from other nodes and filled up. * -* If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes -* defrag_ratio = 1000) then every (well almost) allocation will -* first attempt to defrag slab caches on other nodes. This means -* scanning over all nodes to look for partial slabs which may be -* expensive if we do it every time we are trying to find a slab +* If /sys/kernel/slab/xx/remote_node_defrag_ratio is set to 100 +* (which makes defrag_ratio = 1000) then every (well almost) +* allocation will first attempt to defrag slab caches on other nodes. +* This means scanning over all nodes to look for partial slabs which +* may be expensive if we do it every time we are trying to find a slab * with available objects. */ if (!s->remote_node_defrag_ratio || -- 1.8.3.1
[PATCH 4.5 022/101] bpf: fix check_map_func_compatibility logic
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Alexei Starovoitov [ Upstream commit 6aff67c85c9e5a4bc99e5211c1bac547936626ca ] The commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter") introduced clever way to check bpf_helper<->map_type compatibility. Later on commit a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") adjusted the logic and inadvertently broke it. Get rid of the clever bool compare and go back to two-way check from map and from helper perspective. Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") Reported-by: Jann Horn Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 53 ++ 1 file changed, 32 insertions(+), 21 deletions(-) --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -239,15 +239,6 @@ static const char * const reg_type_str[] [CONST_IMM] = "imm", }; -static const struct { - int map_type; - int func_id; -} func_limit[] = { - {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call}, - {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read}, - {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output}, -}; - static void print_verifier_state(struct verifier_env *env) { enum bpf_reg_type t; @@ -898,24 +889,44 @@ static int check_func_arg(struct verifie static int check_map_func_compatibility(struct bpf_map *map, int func_id) { - bool bool_map, bool_func; - int i; - if (!map) return 0; - for (i = 0; i < ARRAY_SIZE(func_limit); i++) { - bool_map = (map->map_type == func_limit[i].map_type); - bool_func = (func_id == func_limit[i].func_id); - /* only when map & func pair match it can continue. -* don't allow any other map type to be passed into -* the special func; -*/ - if (bool_func && bool_map != bool_func) - return -EINVAL; + /* We need a two way check, first is from map perspective ... */ + switch (map->map_type) { + case BPF_MAP_TYPE_PROG_ARRAY: + if (func_id != BPF_FUNC_tail_call) + goto error; + break; + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + if (func_id != BPF_FUNC_perf_event_read && + func_id != BPF_FUNC_perf_event_output) + goto error; + break; + default: + break; + } + + /* ... and second from the function itself. */ + switch (func_id) { + case BPF_FUNC_tail_call: + if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + goto error; + break; + case BPF_FUNC_perf_event_read: + case BPF_FUNC_perf_event_output: + if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) + goto error; + break; + default: + break; } return 0; +error: + verbose("cannot pass map_type %d into func %d\n", + map->map_type, func_id); + return -EINVAL; } static int check_call(struct verifier_env *env, int func_id)
[PATCH 4.5 023/101] samples/bpf: fix trace_output example
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Alexei Starovoitov[ Upstream commit 569cc39d39385a74b23145496bca2df5ac8b2fb8 ] llvm cannot always recognize memset as builtin function and optimize it away, so just delete it. It was a leftover from testing of bpf_perf_event_output() with large data structures. Fixes: 39111695b1b8 ("samples: bpf: add bpf_perf_event_output example") Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- samples/bpf/trace_output_kern.c |1 - 1 file changed, 1 deletion(-) --- a/samples/bpf/trace_output_kern.c +++ b/samples/bpf/trace_output_kern.c @@ -18,7 +18,6 @@ int bpf_prog1(struct pt_regs *ctx) u64 cookie; } data; - memset(, 0, sizeof(data)); data.pid = bpf_get_current_pid_tgid(); data.cookie = 0x12345678;
[PATCH 4.5 023/101] samples/bpf: fix trace_output example
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Alexei Starovoitov [ Upstream commit 569cc39d39385a74b23145496bca2df5ac8b2fb8 ] llvm cannot always recognize memset as builtin function and optimize it away, so just delete it. It was a leftover from testing of bpf_perf_event_output() with large data structures. Fixes: 39111695b1b8 ("samples: bpf: add bpf_perf_event_output example") Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- samples/bpf/trace_output_kern.c |1 - 1 file changed, 1 deletion(-) --- a/samples/bpf/trace_output_kern.c +++ b/samples/bpf/trace_output_kern.c @@ -18,7 +18,6 @@ int bpf_prog1(struct pt_regs *ctx) u64 cookie; } data; - memset(, 0, sizeof(data)); data.pid = bpf_get_current_pid_tgid(); data.cookie = 0x12345678;
[PATCH 4.5 029/101] sch_dsmark: update backlog as well
4.5-stable review patch. If anyone has any objections, please let me know. -- From: WANG Cong[ Upstream commit bdf17661f63a79c3cb4209b970b1cc39e34f7543 ] Similarly, we need to update backlog too when we update qlen. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_dsmark.c |3 +++ 1 file changed, 3 insertions(+) --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -258,6 +258,7 @@ static int dsmark_enqueue(struct sk_buff return err; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -280,6 +281,7 @@ static struct sk_buff *dsmark_dequeue(st return NULL; qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; index = skb->tc_index & (p->indices - 1); @@ -395,6 +397,7 @@ static void dsmark_reset(struct Qdisc *s pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); qdisc_reset(p->q); + sch->qstats.backlog = 0; sch->q.qlen = 0; }
[PATCH 4.5 019/101] net/mlx4_en: fix spurious timestamping callbacks
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Eric Dumazet[ Upstream commit fc96256c906362e845d848d0f6a6354450059e81 ] When multiple skb are TX-completed in a row, we might incorrectly keep a timestamp of a prior skb and cause extra work. Fixes: ec693d47010e8 ("net/mlx4_en: Add HW timestamping (TS) support") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reviewed-by: Eran Ben Elisha Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_tx.c |6 -- 1 file changed, 4 insertions(+), 2 deletions(-) --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -402,7 +402,6 @@ static bool mlx4_en_process_tx_cq(struct u32 packets = 0; u32 bytes = 0; int factor = priv->cqe_factor; - u64 timestamp = 0; int done = 0; int budget = priv->tx_work_limit; u32 last_nr_txbb; @@ -442,9 +441,12 @@ static bool mlx4_en_process_tx_cq(struct new_index = be16_to_cpu(cqe->wqe_index) & size_mask; do { + u64 timestamp = 0; + txbbs_skipped += last_nr_txbb; ring_index = (ring_index + last_nr_txbb) & size_mask; - if (ring->tx_info[ring_index].ts_requested) + + if (unlikely(ring->tx_info[ring_index].ts_requested)) timestamp = mlx4_en_get_cqe_ts(cqe); /* free next descriptor */
[PATCH 4.5 024/101] net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Tim Bingham[ Upstream commit 2c94b53738549d81dc7464a32117d1f5112c64d3 ] Prior to commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") the implementation of net_dbg_ratelimited() was buggy for both the DEBUG and CONFIG_DYNAMIC_DEBUG cases. The bug was that net_ratelimit() was being called and, despite returning true, nothing was being printed to the console. This resulted in messages like the following - "net_ratelimit: %d callbacks suppressed" with no other output nearby. After commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") the bug is fixed for the DEBUG case. However, there's no output at all for CONFIG_DYNAMIC_DEBUG case. This patch restores debug output (if enabled) for the CONFIG_DYNAMIC_DEBUG case. Add a definition of net_dbg_ratelimited() for the CONFIG_DYNAMIC_DEBUG case. The implementation takes care to check that dynamic debugging is enabled before calling net_ratelimit(). Fixes: d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") Signed-off-by: Tim Bingham Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/net.h | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) --- a/include/linux/net.h +++ b/include/linux/net.h @@ -245,7 +245,15 @@ do { \ net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__) #define net_info_ratelimited(fmt, ...) \ net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__) -#if defined(DEBUG) +#if defined(CONFIG_DYNAMIC_DEBUG) +#define net_dbg_ratelimited(fmt, ...) \ +do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&\ + net_ratelimit())\ + __dynamic_pr_debug(, fmt, ##__VA_ARGS__);\ +} while (0) +#elif defined(DEBUG) #define net_dbg_ratelimited(fmt, ...) \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) #else
[PATCH 4.5 029/101] sch_dsmark: update backlog as well
4.5-stable review patch. If anyone has any objections, please let me know. -- From: WANG Cong [ Upstream commit bdf17661f63a79c3cb4209b970b1cc39e34f7543 ] Similarly, we need to update backlog too when we update qlen. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_dsmark.c |3 +++ 1 file changed, 3 insertions(+) --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -258,6 +258,7 @@ static int dsmark_enqueue(struct sk_buff return err; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -280,6 +281,7 @@ static struct sk_buff *dsmark_dequeue(st return NULL; qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; index = skb->tc_index & (p->indices - 1); @@ -395,6 +397,7 @@ static void dsmark_reset(struct Qdisc *s pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); qdisc_reset(p->q); + sch->qstats.backlog = 0; sch->q.qlen = 0; }
[PATCH 4.5 019/101] net/mlx4_en: fix spurious timestamping callbacks
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Eric Dumazet [ Upstream commit fc96256c906362e845d848d0f6a6354450059e81 ] When multiple skb are TX-completed in a row, we might incorrectly keep a timestamp of a prior skb and cause extra work. Fixes: ec693d47010e8 ("net/mlx4_en: Add HW timestamping (TS) support") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reviewed-by: Eran Ben Elisha Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_tx.c |6 -- 1 file changed, 4 insertions(+), 2 deletions(-) --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -402,7 +402,6 @@ static bool mlx4_en_process_tx_cq(struct u32 packets = 0; u32 bytes = 0; int factor = priv->cqe_factor; - u64 timestamp = 0; int done = 0; int budget = priv->tx_work_limit; u32 last_nr_txbb; @@ -442,9 +441,12 @@ static bool mlx4_en_process_tx_cq(struct new_index = be16_to_cpu(cqe->wqe_index) & size_mask; do { + u64 timestamp = 0; + txbbs_skipped += last_nr_txbb; ring_index = (ring_index + last_nr_txbb) & size_mask; - if (ring->tx_info[ring_index].ts_requested) + + if (unlikely(ring->tx_info[ring_index].ts_requested)) timestamp = mlx4_en_get_cqe_ts(cqe); /* free next descriptor */
[PATCH 4.5 024/101] net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Tim Bingham [ Upstream commit 2c94b53738549d81dc7464a32117d1f5112c64d3 ] Prior to commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") the implementation of net_dbg_ratelimited() was buggy for both the DEBUG and CONFIG_DYNAMIC_DEBUG cases. The bug was that net_ratelimit() was being called and, despite returning true, nothing was being printed to the console. This resulted in messages like the following - "net_ratelimit: %d callbacks suppressed" with no other output nearby. After commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") the bug is fixed for the DEBUG case. However, there's no output at all for CONFIG_DYNAMIC_DEBUG case. This patch restores debug output (if enabled) for the CONFIG_DYNAMIC_DEBUG case. Add a definition of net_dbg_ratelimited() for the CONFIG_DYNAMIC_DEBUG case. The implementation takes care to check that dynamic debugging is enabled before calling net_ratelimit(). Fixes: d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") Signed-off-by: Tim Bingham Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/net.h | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) --- a/include/linux/net.h +++ b/include/linux/net.h @@ -245,7 +245,15 @@ do { \ net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__) #define net_info_ratelimited(fmt, ...) \ net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__) -#if defined(DEBUG) +#if defined(CONFIG_DYNAMIC_DEBUG) +#define net_dbg_ratelimited(fmt, ...) \ +do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&\ + net_ratelimit())\ + __dynamic_pr_debug(, fmt, ##__VA_ARGS__);\ +} while (0) +#elif defined(DEBUG) #define net_dbg_ratelimited(fmt, ...) \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) #else
[PATCH 4.5 046/101] zsmalloc: fix zs_can_compact() integer overflow
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Sergey Senozhatskycommit 44f43e99fe70833058482d183e99fdfd11220996 upstream. zs_can_compact() has two race conditions in its core calculation: unsigned long obj_wasted = zs_stat_get(class, OBJ_ALLOCATED) - zs_stat_get(class, OBJ_USED); 1) classes are not locked, so the numbers of allocated and used objects can change by the concurrent ops happening on other CPUs 2) shrinker invokes it from preemptible context Depending on the circumstances, thus, OBJ_ALLOCATED can become less than OBJ_USED, which can result in either very high or negative `total_scan' value calculated later in do_shrink_slab(). do_shrink_slab() has some logic to prevent those cases: vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-64 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 However, due to the way `total_scan' is calculated, not every shrinker->count_objects() overflow can be spotted and handled. To demonstrate the latter, I added some debugging code to do_shrink_slab() (x86_64) and the results were: vmscan: OVERFLOW: shrinker->count_objects() == -1 [18446744073709551615] vmscan: but total_scan > 0: 92679974445502 vmscan: resulting total_scan: 92679974445502 [..] vmscan: OVERFLOW: shrinker->count_objects() == -1 [18446744073709551615] vmscan: but total_scan > 0: 22634041808232578 vmscan: resulting total_scan: 22634041808232578 Even though shrinker->count_objects() has returned an overflowed value, the resulting `total_scan' is positive, and, what is more worrisome, it is insanely huge. This value is getting used later on in shrinker->scan_objects() loop: while (total_scan >= batch_size || total_scan >= freeable) { unsigned long ret; unsigned long nr_to_scan = min(batch_size, total_scan); shrinkctl->nr_to_scan = nr_to_scan; ret = shrinker->scan_objects(shrinker, shrinkctl); if (ret == SHRINK_STOP) break; freed += ret; count_vm_events(SLABS_SCANNED, nr_to_scan); total_scan -= nr_to_scan; cond_resched(); } `total_scan >= batch_size' is true for a very-very long time and 'total_scan >= freeable' is also true for quite some time, because `freeable < 0' and `total_scan' is large enough, for example, 22634041808232578. The only break condition, in the given scheme of things, is shrinker->scan_objects() == SHRINK_STOP test, which is a bit too weak to rely on, especially in heavy zsmalloc-usage scenarios. To fix the issue, take a pool stat snapshot and use it instead of racy zs_stat_get() calls. Link: http://lkml.kernel.org/r/20160509140052.3389-1-sergey.senozhat...@gmail.com Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/zsmalloc.c |7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1732,10 +1732,13 @@ static struct page *isolate_source_page( static unsigned long zs_can_compact(struct size_class *class) { unsigned long obj_wasted; + unsigned long obj_allocated = zs_stat_get(class, OBJ_ALLOCATED); + unsigned long obj_used = zs_stat_get(class, OBJ_USED); - obj_wasted = zs_stat_get(class, OBJ_ALLOCATED) - - zs_stat_get(class, OBJ_USED); + if (obj_allocated <= obj_used) + return 0; + obj_wasted = obj_allocated - obj_used; obj_wasted /= get_maxobj_per_zspage(class->size, class->pages_per_zspage);
[PATCH 4.5 047/101] mm: thp: calculate the mapcount correctly for THP pages during WP faults
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Andrea Arcangelicommit 6d0a07edd17cfc12fdc1f36de8072fa17cc3666f upstream. This will provide fully accuracy to the mapcount calculation in the write protect faults, so page pinning will not get broken by false positive copy-on-writes. total_mapcount() isn't the right calculation needed in reuse_swap_page(), so this introduces a page_trans_huge_mapcount() that is effectively the full accurate return value for page_mapcount() if dealing with Transparent Hugepages, however we only use the page_trans_huge_mapcount() during COW faults where it strictly needed, due to its higher runtime cost. This also provide at practical zero cost the total_mapcount information which is needed to know if we can still relocate the page anon_vma to the local vma. If page_trans_huge_mapcount() returns 1 we can reuse the page no matter if it's a pte or a pmd_trans_huge triggering the fault, but we can only relocate the page anon_vma to the local vma->anon_vma if we're sure it's only this "vma" mapping the whole THP physical range. Kirill A. Shutemov discovered the problem with moving the page anon_vma to the local vma->anon_vma in a previous version of this patch and another problem in the way page_move_anon_rmap() was called. Andrew Morton discovered that CONFIG_SWAP=n wouldn't build in a previous version, because reuse_swap_page must be a macro to call page_trans_huge_mapcount from swap.h, so this uses a macro again instead of an inline function. With this change at least it's a less dangerous usage than it was before, because "page" is used only once now, while with the previous code reuse_swap_page(page++) would have called page_mapcount on page+1 and it would have increased page twice instead of just once. Dean Luick noticed an uninitialized variable that could result in a rmap inefficiency for the non-THP case in a previous version. Mike Marciniszyn said: : Our RDMA tests are seeing an issue with memory locking that bisects to : commit 61f5d698cc97 ("mm: re-enable THP") : : The test program registers two rather large MRs (512M) and RDMA : writes data to a passive peer using the first and RDMA reads it back : into the second MR and compares that data. The sizes are chosen randomly : between 0 and 1024 bytes. : : The test will get through a few (<= 4 iterations) and then gets a : compare error. : : Tracing indicates the kernel logical addresses associated with the individual : pages at registration ARE correct , the data in the "RDMA read response only" : packets ARE correct. : : The "corruption" occurs when the packet crosse two pages that are not physically : contiguous. The second page reads back as zero in the program. : : It looks like the user VA at the point of the compare error no longer points to : the same physical address as was registered. : : This patch totally resolves the issue! Link: http://lkml.kernel.org/r/1462547040-1737-2-git-send-email-aarca...@redhat.com Signed-off-by: Andrea Arcangeli Reviewed-by: "Kirill A. Shutemov" Reviewed-by: Dean Luick Tested-by: Alex Williamson Tested-by: Mike Marciniszyn Tested-by: Josh Collier Cc: Marc Haber Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h |9 ++ include/linux/swap.h |6 ++-- mm/huge_memory.c | 71 --- mm/memory.c | 22 ++- mm/swapfile.c| 13 + 5 files changed, 95 insertions(+), 26 deletions(-) --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -456,11 +456,20 @@ static inline int page_mapcount(struct p #ifdef CONFIG_TRANSPARENT_HUGEPAGE int total_mapcount(struct page *page); +int page_trans_huge_mapcount(struct page *page, int *total_mapcount); #else static inline int total_mapcount(struct page *page) { return page_mapcount(page); } +static inline int page_trans_huge_mapcount(struct page *page, + int *total_mapcount) +{ + int mapcount = page_mapcount(page); + if (total_mapcount) + *total_mapcount = mapcount; + return mapcount; +} #endif static inline int page_count(struct page *page) --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -418,7 +418,7 @@ extern sector_t swapdev_block(int, pgoff extern int page_swapcount(struct page *); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); -extern int reuse_swap_page(struct page *); +extern bool reuse_swap_page(struct page *, int *); extern int try_to_free_swap(struct page *);
[PATCH 4.5 020/101] bpf: fix double-fdput in replace_map_fd_with_map_ptr()
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Jann Horn[ Upstream commit 8358b02bf67d3a5d8a825070e1aa73f25fb2e4c7 ] When bpf(BPF_PROG_LOAD, ...) was invoked with a BPF program whose bytecode references a non-map file descriptor as a map file descriptor, the error handling code called fdput() twice instead of once (in __bpf_map_get() and in replace_map_fd_with_map_ptr()). If the file descriptor table of the current task is shared, this causes f_count to be decremented too much, allowing the struct file to be freed while it is still in use (use-after-free). This can be exploited to gain root privileges by an unprivileged user. This bug was introduced in commit 0246e64d9a5f ("bpf: handle pseudo BPF_LD_IMM64 insn"), but is only exploitable since commit 1be7f75d1668 ("bpf: enable non-root eBPF programs") because previously, CAP_SYS_ADMIN was required to reach the vulnerable code. (posted publicly according to request by maintainer) Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c |1 - 1 file changed, 1 deletion(-) --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2004,7 +2004,6 @@ static int replace_map_fd_with_map_ptr(s if (IS_ERR(map)) { verbose("fd %d is not pointing to valid bpf_map\n", insn->imm); - fdput(f); return PTR_ERR(map); }
[PATCH 4.5 043/101] net/route: enforce hoplimit max value
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Paolo Abeni[ Upstream commit 626abd59e51d4d8c6367e03aae252a8aa759ac78 ] Currently, when creating or updating a route, no check is performed in both ipv4 and ipv6 code to the hoplimit value. The caller can i.e. set hoplimit to 256, and when such route will be used, packets will be sent with hoplimit/ttl equal to 0. This commit adds checks for the RTAX_HOPLIMIT value, in both ipv4 ipv6 route code, substituting any value greater than 255 with 255. This is consistent with what is currently done for ADVMSS and MTU in the ipv4 code. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_semantics.c |2 ++ net/ipv6/route.c |2 ++ 2 files changed, 4 insertions(+) --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -975,6 +975,8 @@ fib_convert_metrics(struct fib_info *fi, val = 65535 - 40; if (type == RTAX_MTU && val > 65535 - 15) val = 65535 - 15; + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) return -EINVAL; fi->fib_metrics[type - 1] = val; --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1737,6 +1737,8 @@ static int ip6_convert_metrics(struct mx } else { val = nla_get_u32(nla); } + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) goto err;
[PATCH 4.5 041/101] net: thunderx: avoid exposing kernel stack
4.5-stable review patch. If anyone has any objections, please let me know. -- From: "xypron.g...@gmx.de"[ Upstream commit 161de2caf68c549c266e571ffba8e2163886fb10 ] Reserved fields should be set to zero to avoid exposing bits from the kernel stack. Signed-off-by: Heinrich Schuchardt Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c |4 1 file changed, 4 insertions(+) --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -519,6 +519,7 @@ static void nicvf_rcv_queue_config(struc nicvf_config_vlan_stripping(nic, nic->netdev->features); /* Enable Receive queue */ + memset(_cfg, 0, sizeof(struct rq_cfg)); rq_cfg.ena = 1; rq_cfg.tcp_ena = 0; nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)_cfg); @@ -551,6 +552,7 @@ void nicvf_cmp_queue_config(struct nicvf qidx, (u64)(cq->dmem.phys_base)); /* Enable Completion queue */ + memset(_cfg, 0, sizeof(struct cq_cfg)); cq_cfg.ena = 1; cq_cfg.reset = 0; cq_cfg.caching = 0; @@ -599,6 +601,7 @@ static void nicvf_snd_queue_config(struc qidx, (u64)(sq->dmem.phys_base)); /* Enable send queue & set queue size */ + memset(_cfg, 0, sizeof(struct sq_cfg)); sq_cfg.ena = 1; sq_cfg.reset = 0; sq_cfg.ldwb = 0; @@ -635,6 +638,7 @@ static void nicvf_rbdr_config(struct nic /* Enable RBDR & set queue size */ /* Buffer size should be in multiples of 128 bytes */ + memset(_cfg, 0, sizeof(struct rbdr_cfg)); rbdr_cfg.ena = 1; rbdr_cfg.reset = 0; rbdr_cfg.ldwb = 0;
[PATCH 4.5 046/101] zsmalloc: fix zs_can_compact() integer overflow
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Sergey Senozhatsky commit 44f43e99fe70833058482d183e99fdfd11220996 upstream. zs_can_compact() has two race conditions in its core calculation: unsigned long obj_wasted = zs_stat_get(class, OBJ_ALLOCATED) - zs_stat_get(class, OBJ_USED); 1) classes are not locked, so the numbers of allocated and used objects can change by the concurrent ops happening on other CPUs 2) shrinker invokes it from preemptible context Depending on the circumstances, thus, OBJ_ALLOCATED can become less than OBJ_USED, which can result in either very high or negative `total_scan' value calculated later in do_shrink_slab(). do_shrink_slab() has some logic to prevent those cases: vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-64 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62 However, due to the way `total_scan' is calculated, not every shrinker->count_objects() overflow can be spotted and handled. To demonstrate the latter, I added some debugging code to do_shrink_slab() (x86_64) and the results were: vmscan: OVERFLOW: shrinker->count_objects() == -1 [18446744073709551615] vmscan: but total_scan > 0: 92679974445502 vmscan: resulting total_scan: 92679974445502 [..] vmscan: OVERFLOW: shrinker->count_objects() == -1 [18446744073709551615] vmscan: but total_scan > 0: 22634041808232578 vmscan: resulting total_scan: 22634041808232578 Even though shrinker->count_objects() has returned an overflowed value, the resulting `total_scan' is positive, and, what is more worrisome, it is insanely huge. This value is getting used later on in shrinker->scan_objects() loop: while (total_scan >= batch_size || total_scan >= freeable) { unsigned long ret; unsigned long nr_to_scan = min(batch_size, total_scan); shrinkctl->nr_to_scan = nr_to_scan; ret = shrinker->scan_objects(shrinker, shrinkctl); if (ret == SHRINK_STOP) break; freed += ret; count_vm_events(SLABS_SCANNED, nr_to_scan); total_scan -= nr_to_scan; cond_resched(); } `total_scan >= batch_size' is true for a very-very long time and 'total_scan >= freeable' is also true for quite some time, because `freeable < 0' and `total_scan' is large enough, for example, 22634041808232578. The only break condition, in the given scheme of things, is shrinker->scan_objects() == SHRINK_STOP test, which is a bit too weak to rely on, especially in heavy zsmalloc-usage scenarios. To fix the issue, take a pool stat snapshot and use it instead of racy zs_stat_get() calls. Link: http://lkml.kernel.org/r/20160509140052.3389-1-sergey.senozhat...@gmail.com Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/zsmalloc.c |7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1732,10 +1732,13 @@ static struct page *isolate_source_page( static unsigned long zs_can_compact(struct size_class *class) { unsigned long obj_wasted; + unsigned long obj_allocated = zs_stat_get(class, OBJ_ALLOCATED); + unsigned long obj_used = zs_stat_get(class, OBJ_USED); - obj_wasted = zs_stat_get(class, OBJ_ALLOCATED) - - zs_stat_get(class, OBJ_USED); + if (obj_allocated <= obj_used) + return 0; + obj_wasted = obj_allocated - obj_used; obj_wasted /= get_maxobj_per_zspage(class->size, class->pages_per_zspage);
[PATCH 4.5 047/101] mm: thp: calculate the mapcount correctly for THP pages during WP faults
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Andrea Arcangeli commit 6d0a07edd17cfc12fdc1f36de8072fa17cc3666f upstream. This will provide fully accuracy to the mapcount calculation in the write protect faults, so page pinning will not get broken by false positive copy-on-writes. total_mapcount() isn't the right calculation needed in reuse_swap_page(), so this introduces a page_trans_huge_mapcount() that is effectively the full accurate return value for page_mapcount() if dealing with Transparent Hugepages, however we only use the page_trans_huge_mapcount() during COW faults where it strictly needed, due to its higher runtime cost. This also provide at practical zero cost the total_mapcount information which is needed to know if we can still relocate the page anon_vma to the local vma. If page_trans_huge_mapcount() returns 1 we can reuse the page no matter if it's a pte or a pmd_trans_huge triggering the fault, but we can only relocate the page anon_vma to the local vma->anon_vma if we're sure it's only this "vma" mapping the whole THP physical range. Kirill A. Shutemov discovered the problem with moving the page anon_vma to the local vma->anon_vma in a previous version of this patch and another problem in the way page_move_anon_rmap() was called. Andrew Morton discovered that CONFIG_SWAP=n wouldn't build in a previous version, because reuse_swap_page must be a macro to call page_trans_huge_mapcount from swap.h, so this uses a macro again instead of an inline function. With this change at least it's a less dangerous usage than it was before, because "page" is used only once now, while with the previous code reuse_swap_page(page++) would have called page_mapcount on page+1 and it would have increased page twice instead of just once. Dean Luick noticed an uninitialized variable that could result in a rmap inefficiency for the non-THP case in a previous version. Mike Marciniszyn said: : Our RDMA tests are seeing an issue with memory locking that bisects to : commit 61f5d698cc97 ("mm: re-enable THP") : : The test program registers two rather large MRs (512M) and RDMA : writes data to a passive peer using the first and RDMA reads it back : into the second MR and compares that data. The sizes are chosen randomly : between 0 and 1024 bytes. : : The test will get through a few (<= 4 iterations) and then gets a : compare error. : : Tracing indicates the kernel logical addresses associated with the individual : pages at registration ARE correct , the data in the "RDMA read response only" : packets ARE correct. : : The "corruption" occurs when the packet crosse two pages that are not physically : contiguous. The second page reads back as zero in the program. : : It looks like the user VA at the point of the compare error no longer points to : the same physical address as was registered. : : This patch totally resolves the issue! Link: http://lkml.kernel.org/r/1462547040-1737-2-git-send-email-aarca...@redhat.com Signed-off-by: Andrea Arcangeli Reviewed-by: "Kirill A. Shutemov" Reviewed-by: Dean Luick Tested-by: Alex Williamson Tested-by: Mike Marciniszyn Tested-by: Josh Collier Cc: Marc Haber Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h |9 ++ include/linux/swap.h |6 ++-- mm/huge_memory.c | 71 --- mm/memory.c | 22 ++- mm/swapfile.c| 13 + 5 files changed, 95 insertions(+), 26 deletions(-) --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -456,11 +456,20 @@ static inline int page_mapcount(struct p #ifdef CONFIG_TRANSPARENT_HUGEPAGE int total_mapcount(struct page *page); +int page_trans_huge_mapcount(struct page *page, int *total_mapcount); #else static inline int total_mapcount(struct page *page) { return page_mapcount(page); } +static inline int page_trans_huge_mapcount(struct page *page, + int *total_mapcount) +{ + int mapcount = page_mapcount(page); + if (total_mapcount) + *total_mapcount = mapcount; + return mapcount; +} #endif static inline int page_count(struct page *page) --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -418,7 +418,7 @@ extern sector_t swapdev_block(int, pgoff extern int page_swapcount(struct page *); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); -extern int reuse_swap_page(struct page *); +extern bool reuse_swap_page(struct page *, int *); extern int try_to_free_swap(struct page *); struct backing_dev_info; @@ -513,8 +513,8 @@ static inline int swp_swapcount(swp_entr return 0; } -#define reuse_swap_page(page) \ - (!PageTransCompound(page) && page_mapcount(page) == 1) +#define reuse_swap_page(page, total_mapcount) \ +
[PATCH 4.5 020/101] bpf: fix double-fdput in replace_map_fd_with_map_ptr()
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Jann Horn [ Upstream commit 8358b02bf67d3a5d8a825070e1aa73f25fb2e4c7 ] When bpf(BPF_PROG_LOAD, ...) was invoked with a BPF program whose bytecode references a non-map file descriptor as a map file descriptor, the error handling code called fdput() twice instead of once (in __bpf_map_get() and in replace_map_fd_with_map_ptr()). If the file descriptor table of the current task is shared, this causes f_count to be decremented too much, allowing the struct file to be freed while it is still in use (use-after-free). This can be exploited to gain root privileges by an unprivileged user. This bug was introduced in commit 0246e64d9a5f ("bpf: handle pseudo BPF_LD_IMM64 insn"), but is only exploitable since commit 1be7f75d1668 ("bpf: enable non-root eBPF programs") because previously, CAP_SYS_ADMIN was required to reach the vulnerable code. (posted publicly according to request by maintainer) Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c |1 - 1 file changed, 1 deletion(-) --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2004,7 +2004,6 @@ static int replace_map_fd_with_map_ptr(s if (IS_ERR(map)) { verbose("fd %d is not pointing to valid bpf_map\n", insn->imm); - fdput(f); return PTR_ERR(map); }
[PATCH 4.5 043/101] net/route: enforce hoplimit max value
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Paolo Abeni [ Upstream commit 626abd59e51d4d8c6367e03aae252a8aa759ac78 ] Currently, when creating or updating a route, no check is performed in both ipv4 and ipv6 code to the hoplimit value. The caller can i.e. set hoplimit to 256, and when such route will be used, packets will be sent with hoplimit/ttl equal to 0. This commit adds checks for the RTAX_HOPLIMIT value, in both ipv4 ipv6 route code, substituting any value greater than 255 with 255. This is consistent with what is currently done for ADVMSS and MTU in the ipv4 code. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_semantics.c |2 ++ net/ipv6/route.c |2 ++ 2 files changed, 4 insertions(+) --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -975,6 +975,8 @@ fib_convert_metrics(struct fib_info *fi, val = 65535 - 40; if (type == RTAX_MTU && val > 65535 - 15) val = 65535 - 15; + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) return -EINVAL; fi->fib_metrics[type - 1] = val; --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1737,6 +1737,8 @@ static int ip6_convert_metrics(struct mx } else { val = nla_get_u32(nla); } + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) goto err;
[PATCH 4.5 041/101] net: thunderx: avoid exposing kernel stack
4.5-stable review patch. If anyone has any objections, please let me know. -- From: "xypron.g...@gmx.de" [ Upstream commit 161de2caf68c549c266e571ffba8e2163886fb10 ] Reserved fields should be set to zero to avoid exposing bits from the kernel stack. Signed-off-by: Heinrich Schuchardt Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c |4 1 file changed, 4 insertions(+) --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -519,6 +519,7 @@ static void nicvf_rcv_queue_config(struc nicvf_config_vlan_stripping(nic, nic->netdev->features); /* Enable Receive queue */ + memset(_cfg, 0, sizeof(struct rq_cfg)); rq_cfg.ena = 1; rq_cfg.tcp_ena = 0; nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)_cfg); @@ -551,6 +552,7 @@ void nicvf_cmp_queue_config(struct nicvf qidx, (u64)(cq->dmem.phys_base)); /* Enable Completion queue */ + memset(_cfg, 0, sizeof(struct cq_cfg)); cq_cfg.ena = 1; cq_cfg.reset = 0; cq_cfg.caching = 0; @@ -599,6 +601,7 @@ static void nicvf_snd_queue_config(struc qidx, (u64)(sq->dmem.phys_base)); /* Enable send queue & set queue size */ + memset(_cfg, 0, sizeof(struct sq_cfg)); sq_cfg.ena = 1; sq_cfg.reset = 0; sq_cfg.ldwb = 0; @@ -635,6 +638,7 @@ static void nicvf_rbdr_config(struct nic /* Enable RBDR & set queue size */ /* Buffer size should be in multiples of 128 bytes */ + memset(_cfg, 0, sizeof(struct rbdr_cfg)); rbdr_cfg.ena = 1; rbdr_cfg.reset = 0; rbdr_cfg.ldwb = 0;
[PATCH 4.5 045/101] ocfs2: fix posix_acl_create deadlock
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Junxiao Bicommit c25a1e0671fbca7b2c0d0757d533bd2650d6dc0c upstream. Commit 702e5bc68ad2 ("ocfs2: use generic posix ACL infrastructure") refactored code to use posix_acl_create. The problem with this function is that it is not mindful of the cluster wide inode lock making it unsuitable for use with ocfs2 inode creation with ACLs. For example, when used in ocfs2_mknod, this function can cause deadlock as follows. The parent dir inode lock is taken when calling posix_acl_create -> get_acl -> ocfs2_iop_get_acl which takes the inode lock again. This can cause deadlock if there is a blocked remote lock request waiting for the lock to be downconverted. And same deadlock happened in ocfs2_reflink. This fix is to revert back using ocfs2_init_acl. Fixes: 702e5bc68ad2 ("ocfs2: use generic posix ACL infrastructure") Signed-off-by: Tariq Saeed Signed-off-by: Junxiao Bi Cc: Mark Fasheh Cc: Joel Becker Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/acl.c | 63 fs/ocfs2/acl.h |4 +++ fs/ocfs2/namei.c| 23 + fs/ocfs2/refcounttree.c | 17 +--- fs/ocfs2/xattr.c| 14 +++--- fs/ocfs2/xattr.h|4 --- 6 files changed, 77 insertions(+), 48 deletions(-) --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -346,3 +346,66 @@ int ocfs2_acl_chmod(struct inode *inode, posix_acl_release(acl); return ret; } + +/* + * Initialize the ACLs of a new inode. If parent directory has default ACL, + * then clone to new inode. Called from ocfs2_mknod. + */ +int ocfs2_init_acl(handle_t *handle, + struct inode *inode, + struct inode *dir, + struct buffer_head *di_bh, + struct buffer_head *dir_bh, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_alloc_context *data_ac) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct posix_acl *acl = NULL; + int ret = 0, ret2; + umode_t mode; + + if (!S_ISLNK(inode->i_mode)) { + if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { + acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT, + dir_bh); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl) { + mode = inode->i_mode & ~current_umask(); + ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); + if (ret) { + mlog_errno(ret); + goto cleanup; + } + } + } + if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { + if (S_ISDIR(inode->i_mode)) { + ret = ocfs2_set_acl(handle, inode, di_bh, + ACL_TYPE_DEFAULT, acl, + meta_ac, data_ac); + if (ret) + goto cleanup; + } + mode = inode->i_mode; + ret = __posix_acl_create(, GFP_NOFS, ); + if (ret < 0) + return ret; + + ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); + if (ret2) { + mlog_errno(ret2); + ret = ret2; + goto cleanup; + } + if (ret > 0) { + ret = ocfs2_set_acl(handle, inode, + di_bh, ACL_TYPE_ACCESS, + acl, meta_ac, data_ac); + } + } +cleanup: + posix_acl_release(acl); + return ret; +} --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -36,5 +36,9 @@ int ocfs2_set_acl(handle_t *handle, struct ocfs2_alloc_context *meta_ac, struct ocfs2_alloc_context *data_ac); extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *); +extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, + struct buffer_head *, struct buffer_head *, + struct ocfs2_alloc_context *, + struct ocfs2_alloc_context *); #endif /* OCFS2_ACL_H */ --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -259,7 +259,6 @@ static int ocfs2_mknod(struct inode *dir struct
[PATCH 4.5 045/101] ocfs2: fix posix_acl_create deadlock
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Junxiao Bi commit c25a1e0671fbca7b2c0d0757d533bd2650d6dc0c upstream. Commit 702e5bc68ad2 ("ocfs2: use generic posix ACL infrastructure") refactored code to use posix_acl_create. The problem with this function is that it is not mindful of the cluster wide inode lock making it unsuitable for use with ocfs2 inode creation with ACLs. For example, when used in ocfs2_mknod, this function can cause deadlock as follows. The parent dir inode lock is taken when calling posix_acl_create -> get_acl -> ocfs2_iop_get_acl which takes the inode lock again. This can cause deadlock if there is a blocked remote lock request waiting for the lock to be downconverted. And same deadlock happened in ocfs2_reflink. This fix is to revert back using ocfs2_init_acl. Fixes: 702e5bc68ad2 ("ocfs2: use generic posix ACL infrastructure") Signed-off-by: Tariq Saeed Signed-off-by: Junxiao Bi Cc: Mark Fasheh Cc: Joel Becker Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/acl.c | 63 fs/ocfs2/acl.h |4 +++ fs/ocfs2/namei.c| 23 + fs/ocfs2/refcounttree.c | 17 +--- fs/ocfs2/xattr.c| 14 +++--- fs/ocfs2/xattr.h|4 --- 6 files changed, 77 insertions(+), 48 deletions(-) --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -346,3 +346,66 @@ int ocfs2_acl_chmod(struct inode *inode, posix_acl_release(acl); return ret; } + +/* + * Initialize the ACLs of a new inode. If parent directory has default ACL, + * then clone to new inode. Called from ocfs2_mknod. + */ +int ocfs2_init_acl(handle_t *handle, + struct inode *inode, + struct inode *dir, + struct buffer_head *di_bh, + struct buffer_head *dir_bh, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_alloc_context *data_ac) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct posix_acl *acl = NULL; + int ret = 0, ret2; + umode_t mode; + + if (!S_ISLNK(inode->i_mode)) { + if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { + acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT, + dir_bh); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl) { + mode = inode->i_mode & ~current_umask(); + ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); + if (ret) { + mlog_errno(ret); + goto cleanup; + } + } + } + if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { + if (S_ISDIR(inode->i_mode)) { + ret = ocfs2_set_acl(handle, inode, di_bh, + ACL_TYPE_DEFAULT, acl, + meta_ac, data_ac); + if (ret) + goto cleanup; + } + mode = inode->i_mode; + ret = __posix_acl_create(, GFP_NOFS, ); + if (ret < 0) + return ret; + + ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); + if (ret2) { + mlog_errno(ret2); + ret = ret2; + goto cleanup; + } + if (ret > 0) { + ret = ocfs2_set_acl(handle, inode, + di_bh, ACL_TYPE_ACCESS, + acl, meta_ac, data_ac); + } + } +cleanup: + posix_acl_release(acl); + return ret; +} --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -36,5 +36,9 @@ int ocfs2_set_acl(handle_t *handle, struct ocfs2_alloc_context *meta_ac, struct ocfs2_alloc_context *data_ac); extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *); +extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, + struct buffer_head *, struct buffer_head *, + struct ocfs2_alloc_context *, + struct ocfs2_alloc_context *); #endif /* OCFS2_ACL_H */ --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -259,7 +259,6 @@ static int ocfs2_mknod(struct inode *dir struct ocfs2_dir_lookup_result lookup = { NULL, }; sigset_t oldset; int did_block_signals = 0; - struct posix_acl *default_acl = NULL, *acl = NULL; struct ocfs2_dentry_lock *dl = NULL;
[PATCH 4.5 050/101] crypto: hash - Fix page length clamping in hash walk
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Herbert Xucommit 13f4bb78cf6a312bbdec367ba3da044b09bf0e29 upstream. The crypto hash walk code is broken when supplied with an offset greater than or equal to PAGE_SIZE. This patch fixes it by adjusting walk->pg and walk->offset when this happens. Reported-by: Steffen Klassert Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ahash.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -69,8 +69,9 @@ static int hash_walk_new_entry(struct cr struct scatterlist *sg; sg = walk->sg; - walk->pg = sg_page(sg); walk->offset = sg->offset; + walk->pg = sg_page(walk->sg) + (walk->offset >> PAGE_SHIFT); + walk->offset = offset_in_page(walk->offset); walk->entrylen = sg->length; if (walk->entrylen > walk->total)
[PATCH 4.5 050/101] crypto: hash - Fix page length clamping in hash walk
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Herbert Xu commit 13f4bb78cf6a312bbdec367ba3da044b09bf0e29 upstream. The crypto hash walk code is broken when supplied with an offset greater than or equal to PAGE_SIZE. This patch fixes it by adjusting walk->pg and walk->offset when this happens. Reported-by: Steffen Klassert Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/ahash.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -69,8 +69,9 @@ static int hash_walk_new_entry(struct cr struct scatterlist *sg; sg = walk->sg; - walk->pg = sg_page(sg); walk->offset = sg->offset; + walk->pg = sg_page(walk->sg) + (walk->offset >> PAGE_SHIFT); + walk->offset = offset_in_page(walk->offset); walk->entrylen = sg->length; if (walk->entrylen > walk->total)
[PATCH 4.5 048/101] crypto: qat - fix invalid pf2vf_resp_wq logic
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Tadeusz Strukcommit 9e209fcfb804da262e38e5cd2e680c47a41f0f95 upstream. The pf2vf_resp_wq is a global so it has to be created at init and destroyed at exit, instead of per device. Tested-by: Suresh Marikkannu Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/qat/qat_common/adf_common_drv.h |2 + drivers/crypto/qat/qat_common/adf_ctl_drv.c|6 + drivers/crypto/qat/qat_common/adf_sriov.c | 26 +++-- 3 files changed, 24 insertions(+), 10 deletions(-) --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -144,6 +144,8 @@ void adf_disable_aer(struct adf_accel_de void adf_dev_restore(struct adf_accel_dev *accel_dev); int adf_init_aer(void); void adf_exit_aer(void); +int adf_init_pf_wq(void); +void adf_exit_pf_wq(void); int adf_init_admin_comms(struct adf_accel_dev *accel_dev); void adf_exit_admin_comms(struct adf_accel_dev *accel_dev); int adf_send_admin_init(struct adf_accel_dev *accel_dev); --- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c +++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c @@ -462,12 +462,17 @@ static int __init adf_register_ctl_devic if (adf_init_aer()) goto err_aer; + if (adf_init_pf_wq()) + goto err_pf_wq; + if (qat_crypto_register()) goto err_crypto_register; return 0; err_crypto_register: + adf_exit_pf_wq(); +err_pf_wq: adf_exit_aer(); err_aer: adf_chr_drv_destroy(); @@ -480,6 +485,7 @@ static void __exit adf_unregister_ctl_de { adf_chr_drv_destroy(); adf_exit_aer(); + adf_exit_pf_wq(); qat_crypto_unregister(); adf_clean_vf_map(false); mutex_destroy(_ctl_lock); --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -119,11 +119,6 @@ static int adf_enable_sriov(struct adf_a int i; u32 reg; - /* Workqueue for PF2VF responses */ - pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq"); - if (!pf2vf_resp_wq) - return -ENOMEM; - for (i = 0, vf_info = accel_dev->pf.vf_info; i < totalvfs; i++, vf_info++) { /* This ptr will be populated when VFs will be created */ @@ -216,11 +211,6 @@ void adf_disable_sriov(struct adf_accel_ kfree(accel_dev->pf.vf_info); accel_dev->pf.vf_info = NULL; - - if (pf2vf_resp_wq) { - destroy_workqueue(pf2vf_resp_wq); - pf2vf_resp_wq = NULL; - } } EXPORT_SYMBOL_GPL(adf_disable_sriov); @@ -304,3 +294,19 @@ int adf_sriov_configure(struct pci_dev * return numvfs; } EXPORT_SYMBOL_GPL(adf_sriov_configure); + +int __init adf_init_pf_wq(void) +{ + /* Workqueue for PF2VF responses */ + pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq"); + + return !pf2vf_resp_wq ? -ENOMEM : 0; +} + +void adf_exit_pf_wq(void) +{ + if (pf2vf_resp_wq) { + destroy_workqueue(pf2vf_resp_wq); + pf2vf_resp_wq = NULL; + } +}
[PATCH 4.5 049/101] crypto: qat - fix adf_ctl_drv.c:undefined reference to adf_init_pf_wq
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Tadeusz Strukcommit 6dc5df71ee5c8b44607928bfe27be50314dcf848 upstream. Fix undefined reference issue reported by kbuild test robot. Reported-by: kbuild test robot Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/qat/qat_common/adf_common_drv.h | 13 +++-- 1 file changed, 11 insertions(+), 2 deletions(-) --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -144,8 +144,6 @@ void adf_disable_aer(struct adf_accel_de void adf_dev_restore(struct adf_accel_dev *accel_dev); int adf_init_aer(void); void adf_exit_aer(void); -int adf_init_pf_wq(void); -void adf_exit_pf_wq(void); int adf_init_admin_comms(struct adf_accel_dev *accel_dev); void adf_exit_admin_comms(struct adf_accel_dev *accel_dev); int adf_send_admin_init(struct adf_accel_dev *accel_dev); @@ -238,6 +236,8 @@ void adf_enable_vf2pf_interrupts(struct uint32_t vf_mask); void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); +int adf_init_pf_wq(void); +void adf_exit_pf_wq(void); #else static inline int adf_sriov_configure(struct pci_dev *pdev, int numvfs) { @@ -255,5 +255,14 @@ static inline void adf_enable_pf2vf_inte static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) { } + +static inline int adf_init_pf_wq(void) +{ + return 0; +} + +static inline void adf_exit_pf_wq(void) +{ +} #endif #endif
[PATCH 4.5 048/101] crypto: qat - fix invalid pf2vf_resp_wq logic
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Tadeusz Struk commit 9e209fcfb804da262e38e5cd2e680c47a41f0f95 upstream. The pf2vf_resp_wq is a global so it has to be created at init and destroyed at exit, instead of per device. Tested-by: Suresh Marikkannu Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/qat/qat_common/adf_common_drv.h |2 + drivers/crypto/qat/qat_common/adf_ctl_drv.c|6 + drivers/crypto/qat/qat_common/adf_sriov.c | 26 +++-- 3 files changed, 24 insertions(+), 10 deletions(-) --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -144,6 +144,8 @@ void adf_disable_aer(struct adf_accel_de void adf_dev_restore(struct adf_accel_dev *accel_dev); int adf_init_aer(void); void adf_exit_aer(void); +int adf_init_pf_wq(void); +void adf_exit_pf_wq(void); int adf_init_admin_comms(struct adf_accel_dev *accel_dev); void adf_exit_admin_comms(struct adf_accel_dev *accel_dev); int adf_send_admin_init(struct adf_accel_dev *accel_dev); --- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c +++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c @@ -462,12 +462,17 @@ static int __init adf_register_ctl_devic if (adf_init_aer()) goto err_aer; + if (adf_init_pf_wq()) + goto err_pf_wq; + if (qat_crypto_register()) goto err_crypto_register; return 0; err_crypto_register: + adf_exit_pf_wq(); +err_pf_wq: adf_exit_aer(); err_aer: adf_chr_drv_destroy(); @@ -480,6 +485,7 @@ static void __exit adf_unregister_ctl_de { adf_chr_drv_destroy(); adf_exit_aer(); + adf_exit_pf_wq(); qat_crypto_unregister(); adf_clean_vf_map(false); mutex_destroy(_ctl_lock); --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -119,11 +119,6 @@ static int adf_enable_sriov(struct adf_a int i; u32 reg; - /* Workqueue for PF2VF responses */ - pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq"); - if (!pf2vf_resp_wq) - return -ENOMEM; - for (i = 0, vf_info = accel_dev->pf.vf_info; i < totalvfs; i++, vf_info++) { /* This ptr will be populated when VFs will be created */ @@ -216,11 +211,6 @@ void adf_disable_sriov(struct adf_accel_ kfree(accel_dev->pf.vf_info); accel_dev->pf.vf_info = NULL; - - if (pf2vf_resp_wq) { - destroy_workqueue(pf2vf_resp_wq); - pf2vf_resp_wq = NULL; - } } EXPORT_SYMBOL_GPL(adf_disable_sriov); @@ -304,3 +294,19 @@ int adf_sriov_configure(struct pci_dev * return numvfs; } EXPORT_SYMBOL_GPL(adf_sriov_configure); + +int __init adf_init_pf_wq(void) +{ + /* Workqueue for PF2VF responses */ + pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq"); + + return !pf2vf_resp_wq ? -ENOMEM : 0; +} + +void adf_exit_pf_wq(void) +{ + if (pf2vf_resp_wq) { + destroy_workqueue(pf2vf_resp_wq); + pf2vf_resp_wq = NULL; + } +}
[PATCH 4.5 049/101] crypto: qat - fix adf_ctl_drv.c:undefined reference to adf_init_pf_wq
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Tadeusz Struk commit 6dc5df71ee5c8b44607928bfe27be50314dcf848 upstream. Fix undefined reference issue reported by kbuild test robot. Reported-by: kbuild test robot Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/qat/qat_common/adf_common_drv.h | 13 +++-- 1 file changed, 11 insertions(+), 2 deletions(-) --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -144,8 +144,6 @@ void adf_disable_aer(struct adf_accel_de void adf_dev_restore(struct adf_accel_dev *accel_dev); int adf_init_aer(void); void adf_exit_aer(void); -int adf_init_pf_wq(void); -void adf_exit_pf_wq(void); int adf_init_admin_comms(struct adf_accel_dev *accel_dev); void adf_exit_admin_comms(struct adf_accel_dev *accel_dev); int adf_send_admin_init(struct adf_accel_dev *accel_dev); @@ -238,6 +236,8 @@ void adf_enable_vf2pf_interrupts(struct uint32_t vf_mask); void adf_enable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev); +int adf_init_pf_wq(void); +void adf_exit_pf_wq(void); #else static inline int adf_sriov_configure(struct pci_dev *pdev, int numvfs) { @@ -255,5 +255,14 @@ static inline void adf_enable_pf2vf_inte static inline void adf_disable_pf2vf_interrupts(struct adf_accel_dev *accel_dev) { } + +static inline int adf_init_pf_wq(void) +{ + return 0; +} + +static inline void adf_exit_pf_wq(void) +{ +} #endif #endif
[PATCH 4.5 052/101] ALSA: usb-audio: Quirk for yet another Phoenix Audio devices (v2)
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Takashi Iwaicommit 2d2c038af423e820d89db2b5d7774b67ba49 upstream. Phoenix Audio MT202pcs (1de7:0114) and MT202exe (1de7:0013) need the same workaround as TMX320 for avoiding the firmware bug. It fixes the frequent error about the sample rate inquiries and the slow device probe as consequence. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=117321 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c |2 ++ 1 file changed, 2 insertions(+) --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1139,7 +1139,9 @@ bool snd_usb_get_sample_rate_quirk(struc case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ + case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */ case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */ + case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */ case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */ return true; }
[PATCH 4.5 051/101] crypto: testmgr - Use kmalloc memory for RSA input
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Herbert Xucommit df27b26f04ed388ff4cc2b5d8cfdb5d97678816f upstream. As akcipher uses an SG interface, you must not use vmalloc memory as input for it. This patch fixes testmgr to copy the vmalloc test vectors to kmalloc memory before running the test. This patch also removes a superfluous sg_virt call in do_test_rsa. Reported-by: Anatoly Pugachev Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/testmgr.c | 27 ++- 1 file changed, 22 insertions(+), 5 deletions(-) --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1849,6 +1849,7 @@ static int alg_test_drbg(const struct al static int do_test_rsa(struct crypto_akcipher *tfm, struct akcipher_testvec *vecs) { + char *xbuf[XBUFSIZE]; struct akcipher_request *req; void *outbuf_enc = NULL; void *outbuf_dec = NULL; @@ -1857,9 +1858,12 @@ static int do_test_rsa(struct crypto_akc int err = -ENOMEM; struct scatterlist src, dst, src_tab[2]; + if (testmgr_alloc_buf(xbuf)) + return err; + req = akcipher_request_alloc(tfm, GFP_KERNEL); if (!req) - return err; + goto free_xbuf; init_completion(); @@ -1877,9 +1881,14 @@ static int do_test_rsa(struct crypto_akc if (!outbuf_enc) goto free_req; + if (WARN_ON(vecs->m_size > PAGE_SIZE)) + goto free_all; + + memcpy(xbuf[0], vecs->m, vecs->m_size); + sg_init_table(src_tab, 2); - sg_set_buf(_tab[0], vecs->m, 8); - sg_set_buf(_tab[1], vecs->m + 8, vecs->m_size - 8); + sg_set_buf(_tab[0], xbuf[0], 8); + sg_set_buf(_tab[1], xbuf[0] + 8, vecs->m_size - 8); sg_init_one(, outbuf_enc, out_len_max); akcipher_request_set_crypt(req, src_tab, , vecs->m_size, out_len_max); @@ -1898,7 +1907,7 @@ static int do_test_rsa(struct crypto_akc goto free_all; } /* verify that encrypted message is equal to expected */ - if (memcmp(vecs->c, sg_virt(req->dst), vecs->c_size)) { + if (memcmp(vecs->c, outbuf_enc, vecs->c_size)) { pr_err("alg: rsa: encrypt test failed. Invalid output\n"); err = -EINVAL; goto free_all; @@ -1913,7 +1922,13 @@ static int do_test_rsa(struct crypto_akc err = -ENOMEM; goto free_all; } - sg_init_one(, vecs->c, vecs->c_size); + + if (WARN_ON(vecs->c_size > PAGE_SIZE)) + goto free_all; + + memcpy(xbuf[0], vecs->c, vecs->c_size); + + sg_init_one(, xbuf[0], vecs->c_size); sg_init_one(, outbuf_dec, out_len_max); init_completion(); akcipher_request_set_crypt(req, , , vecs->c_size, out_len_max); @@ -1940,6 +1955,8 @@ free_all: kfree(outbuf_enc); free_req: akcipher_request_free(req); +free_xbuf: + testmgr_free_buf(xbuf); return err; }
[PATCH 4.5 052/101] ALSA: usb-audio: Quirk for yet another Phoenix Audio devices (v2)
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Takashi Iwai commit 2d2c038af423e820d89db2b5d7774b67ba49 upstream. Phoenix Audio MT202pcs (1de7:0114) and MT202exe (1de7:0013) need the same workaround as TMX320 for avoiding the firmware bug. It fixes the frequent error about the sample rate inquiries and the slow device probe as consequence. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=117321 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c |2 ++ 1 file changed, 2 insertions(+) --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1139,7 +1139,9 @@ bool snd_usb_get_sample_rate_quirk(struc case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ + case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */ case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */ + case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */ case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */ return true; }
[PATCH 4.5 051/101] crypto: testmgr - Use kmalloc memory for RSA input
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Herbert Xu commit df27b26f04ed388ff4cc2b5d8cfdb5d97678816f upstream. As akcipher uses an SG interface, you must not use vmalloc memory as input for it. This patch fixes testmgr to copy the vmalloc test vectors to kmalloc memory before running the test. This patch also removes a superfluous sg_virt call in do_test_rsa. Reported-by: Anatoly Pugachev Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/testmgr.c | 27 ++- 1 file changed, 22 insertions(+), 5 deletions(-) --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1849,6 +1849,7 @@ static int alg_test_drbg(const struct al static int do_test_rsa(struct crypto_akcipher *tfm, struct akcipher_testvec *vecs) { + char *xbuf[XBUFSIZE]; struct akcipher_request *req; void *outbuf_enc = NULL; void *outbuf_dec = NULL; @@ -1857,9 +1858,12 @@ static int do_test_rsa(struct crypto_akc int err = -ENOMEM; struct scatterlist src, dst, src_tab[2]; + if (testmgr_alloc_buf(xbuf)) + return err; + req = akcipher_request_alloc(tfm, GFP_KERNEL); if (!req) - return err; + goto free_xbuf; init_completion(); @@ -1877,9 +1881,14 @@ static int do_test_rsa(struct crypto_akc if (!outbuf_enc) goto free_req; + if (WARN_ON(vecs->m_size > PAGE_SIZE)) + goto free_all; + + memcpy(xbuf[0], vecs->m, vecs->m_size); + sg_init_table(src_tab, 2); - sg_set_buf(_tab[0], vecs->m, 8); - sg_set_buf(_tab[1], vecs->m + 8, vecs->m_size - 8); + sg_set_buf(_tab[0], xbuf[0], 8); + sg_set_buf(_tab[1], xbuf[0] + 8, vecs->m_size - 8); sg_init_one(, outbuf_enc, out_len_max); akcipher_request_set_crypt(req, src_tab, , vecs->m_size, out_len_max); @@ -1898,7 +1907,7 @@ static int do_test_rsa(struct crypto_akc goto free_all; } /* verify that encrypted message is equal to expected */ - if (memcmp(vecs->c, sg_virt(req->dst), vecs->c_size)) { + if (memcmp(vecs->c, outbuf_enc, vecs->c_size)) { pr_err("alg: rsa: encrypt test failed. Invalid output\n"); err = -EINVAL; goto free_all; @@ -1913,7 +1922,13 @@ static int do_test_rsa(struct crypto_akc err = -ENOMEM; goto free_all; } - sg_init_one(, vecs->c, vecs->c_size); + + if (WARN_ON(vecs->c_size > PAGE_SIZE)) + goto free_all; + + memcpy(xbuf[0], vecs->c, vecs->c_size); + + sg_init_one(, xbuf[0], vecs->c_size); sg_init_one(, outbuf_dec, out_len_max); init_completion(); akcipher_request_set_crypt(req, , , vecs->c_size, out_len_max); @@ -1940,6 +1955,8 @@ free_all: kfree(outbuf_enc); free_req: akcipher_request_free(req); +free_xbuf: + testmgr_free_buf(xbuf); return err; }
[PATCH 4.5 009/101] net: use skb_postpush_rcsum instead of own implementations
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Daniel Borkmann[ Upstream commit 6b83d28a55a891a9d70fc61ccb1c138e47dcbe74 ] Replace individual implementations with the recently introduced skb_postpush_rcsum() helper. Signed-off-by: Daniel Borkmann Acked-by: Tom Herbert Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c |4 +--- net/ipv6/reassembly.c |6 ++ net/openvswitch/actions.c |8 +++- net/openvswitch/vport-netdev.c |2 +- net/openvswitch/vport.h|7 --- 5 files changed, 7 insertions(+), 20 deletions(-) --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4433,9 +4433,7 @@ int skb_vlan_push(struct sk_buff *skb, _ skb->mac_len += VLAN_HLEN; __skb_pull(skb, offset); - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(skb->data - + (2 * ETH_ALEN), VLAN_HLEN, 0)); + skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -496,10 +496,8 @@ static int ip6_frag_reasm(struct frag_qu IP6CB(head)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_partial(skb_network_header(head), - skb_network_header_len(head), - head->csum); + skb_postpush_rcsum(head, skb_network_header(head), + skb_network_header_len(head)); rcu_read_lock(); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -158,9 +158,7 @@ static int push_mpls(struct sk_buff *skb new_mpls_lse = (__be32 *)skb_mpls_header(skb); *new_mpls_lse = mpls->mpls_lse; - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse, -MPLS_HLEN, 0)); + skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); hdr = eth_hdr(skb); hdr->h_proto = mpls->mpls_ethertype; @@ -280,7 +278,7 @@ static int set_eth_addr(struct sk_buff * ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, mask->eth_dst); - ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source); ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest); @@ -639,7 +637,7 @@ static int ovs_vport_output(struct net * /* Reconstruct the MAC header. */ skb_push(skb, data->l2_len); memcpy(skb->data, >l2_data, data->l2_len); - ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len); + skb_postpush_rcsum(skb, skb->data, data->l2_len); skb_reset_mac_header(skb); ovs_vport_send(vport, skb); --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -58,7 +58,7 @@ static void netdev_port_receive(struct s return; skb_push(skb, ETH_HLEN); - ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + skb_postpush_rcsum(skb, skb->data, ETH_HLEN); ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); return; error: --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -185,13 +185,6 @@ static inline struct vport *vport_from_p int ovs_vport_receive(struct vport *, struct sk_buff *, const struct ip_tunnel_info *); -static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, - const void *start, unsigned int len) -{ - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); -} - static inline const char *ovs_vport_name(struct vport *vport) { return vport->dev->name;
[PATCH 4.5 009/101] net: use skb_postpush_rcsum instead of own implementations
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Daniel Borkmann [ Upstream commit 6b83d28a55a891a9d70fc61ccb1c138e47dcbe74 ] Replace individual implementations with the recently introduced skb_postpush_rcsum() helper. Signed-off-by: Daniel Borkmann Acked-by: Tom Herbert Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c |4 +--- net/ipv6/reassembly.c |6 ++ net/openvswitch/actions.c |8 +++- net/openvswitch/vport-netdev.c |2 +- net/openvswitch/vport.h|7 --- 5 files changed, 7 insertions(+), 20 deletions(-) --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4433,9 +4433,7 @@ int skb_vlan_push(struct sk_buff *skb, _ skb->mac_len += VLAN_HLEN; __skb_pull(skb, offset); - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(skb->data - + (2 * ETH_ALEN), VLAN_HLEN, 0)); + skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -496,10 +496,8 @@ static int ip6_frag_reasm(struct frag_qu IP6CB(head)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_partial(skb_network_header(head), - skb_network_header_len(head), - head->csum); + skb_postpush_rcsum(head, skb_network_header(head), + skb_network_header_len(head)); rcu_read_lock(); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -158,9 +158,7 @@ static int push_mpls(struct sk_buff *skb new_mpls_lse = (__be32 *)skb_mpls_header(skb); *new_mpls_lse = mpls->mpls_lse; - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse, -MPLS_HLEN, 0)); + skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); hdr = eth_hdr(skb); hdr->h_proto = mpls->mpls_ethertype; @@ -280,7 +278,7 @@ static int set_eth_addr(struct sk_buff * ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, mask->eth_dst); - ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source); ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest); @@ -639,7 +637,7 @@ static int ovs_vport_output(struct net * /* Reconstruct the MAC header. */ skb_push(skb, data->l2_len); memcpy(skb->data, >l2_data, data->l2_len); - ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len); + skb_postpush_rcsum(skb, skb->data, data->l2_len); skb_reset_mac_header(skb); ovs_vport_send(vport, skb); --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -58,7 +58,7 @@ static void netdev_port_receive(struct s return; skb_push(skb, ETH_HLEN); - ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + skb_postpush_rcsum(skb, skb->data, ETH_HLEN); ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); return; error: --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -185,13 +185,6 @@ static inline struct vport *vport_from_p int ovs_vport_receive(struct vport *, struct sk_buff *, const struct ip_tunnel_info *); -static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, - const void *start, unsigned int len) -{ - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); -} - static inline const char *ovs_vport_name(struct vport *vport) { return vport->dev->name;
[PATCH 4.5 042/101] tcp: refresh skb timestamp at retransmit time
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Eric Dumazet[ Upstream commit 10a81980fc47e64ffac26a073139813d3f697b64 ] In the very unlikely case __tcp_retransmit_skb() can not use the cloning done in tcp_transmit_skb(), we need to refresh skb_mstamp before doing the copy and transmit, otherwise TCP TS val will be an exact copy of original transmit. Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c |6 -- 1 file changed, 4 insertions(+), 2 deletions(-) --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2625,8 +2625,10 @@ int __tcp_retransmit_skb(struct sock *sk */ if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || skb_headroom(skb) >= 0x)) { - struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, - GFP_ATOMIC); + struct sk_buff *nskb; + + skb_mstamp_get(>skb_mstamp); + nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; } else {
[PATCH 4.5 053/101] ALSA: usb-audio: Yet another Phoneix Audio device quirk
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Takashi Iwaicommit 84add303ef950b8d85f54bc2248c2bc73467c329 upstream. Phoenix Audio has yet another device with another id (even a different vendor id, 0556:0014) that requires the same quirk for the sample rate. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=110221 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c |1 + 1 file changed, 1 insertion(+) --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1138,6 +1138,7 @@ bool snd_usb_get_sample_rate_quirk(struc case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */ case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ + case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */ case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
[PATCH 4.5 055/101] ALSA: hda - Fix white noise on Asus UX501VW headset
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Kaho Ngcommit 2da2dc9ead232f25601404335cca13c0f722d41b upstream. For reducing the noise from the headset output on ASUS UX501VW, call the existing fixup, alc_fixup_headset_mode_alc668(), additionally. Thread: https://bbs.archlinux.org/viewtopic.php?id=209554 Signed-off-by: Kaho Ng Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c |1 + 1 file changed, 1 insertion(+) --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6704,6 +6704,7 @@ static const struct snd_pci_quirk alc662 SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), + SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A), SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50),
[PATCH 4.5 053/101] ALSA: usb-audio: Yet another Phoneix Audio device quirk
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Takashi Iwai commit 84add303ef950b8d85f54bc2248c2bc73467c329 upstream. Phoenix Audio has yet another device with another id (even a different vendor id, 0556:0014) that requires the same quirk for the sample rate. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=110221 Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c |1 + 1 file changed, 1 insertion(+) --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1138,6 +1138,7 @@ bool snd_usb_get_sample_rate_quirk(struc case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */ case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ + case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */ case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
[PATCH 4.5 055/101] ALSA: hda - Fix white noise on Asus UX501VW headset
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Kaho Ng commit 2da2dc9ead232f25601404335cca13c0f722d41b upstream. For reducing the noise from the headset output on ASUS UX501VW, call the existing fixup, alc_fixup_headset_mode_alc668(), additionally. Thread: https://bbs.archlinux.org/viewtopic.php?id=209554 Signed-off-by: Kaho Ng Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c |1 + 1 file changed, 1 insertion(+) --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6704,6 +6704,7 @@ static const struct snd_pci_quirk alc662 SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), + SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A), SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50),
[PATCH 4.5 042/101] tcp: refresh skb timestamp at retransmit time
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Eric Dumazet [ Upstream commit 10a81980fc47e64ffac26a073139813d3f697b64 ] In the very unlikely case __tcp_retransmit_skb() can not use the cloning done in tcp_transmit_skb(), we need to refresh skb_mstamp before doing the copy and transmit, otherwise TCP TS val will be an exact copy of original transmit. Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Acked-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c |6 -- 1 file changed, 4 insertions(+), 2 deletions(-) --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2625,8 +2625,10 @@ int __tcp_retransmit_skb(struct sock *sk */ if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || skb_headroom(skb) >= 0x)) { - struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, - GFP_ATOMIC); + struct sk_buff *nskb; + + skb_mstamp_get(>skb_mstamp); + nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; } else {
[PATCH 4.5 054/101] ALSA: hda - Fix subwoofer pin on ASUS N751 and N551
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Yura Pakhuchiycommit 3231e2053eaeee70bdfb216a78a30f11e88e2243 upstream. Subwoofer does not work out of the box on ASUS N751/N551 laptops. This patch fixes it. Patch tested on N751 laptop. N551 part is not tested, but according to [1] and [2] this laptop requires similar changes, so I included them in the patch. 1. https://github.com/honsiorovskyi/asus-n551-hda-fix 2. https://bugs.launchpad.net/ubuntu/+source/alsa-tools/+bug/1405691 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=117781 Signed-off-by: Yura Pakhuchiy Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 12 1 file changed, 12 insertions(+) --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6426,6 +6426,7 @@ enum { ALC668_FIXUP_DELL_DISABLE_AAMIX, ALC668_FIXUP_DELL_XPS13, ALC662_FIXUP_ASUS_Nx50, + ALC668_FIXUP_ASUS_Nx51, }; static const struct hda_fixup alc662_fixups[] = { @@ -6672,6 +6673,15 @@ static const struct hda_fixup alc662_fix .chained = true, .chain_id = ALC662_FIXUP_BASS_1A }, + [ALC668_FIXUP_ASUS_Nx51] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + {0x1a, 0x90170151}, /* bass speaker */ + {} + }, + .chained = true, + .chain_id = ALC662_FIXUP_BASS_CHMAP, + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -6699,6 +6709,8 @@ static const struct snd_pci_quirk alc662 SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), + SND_PCI_QUIRK(0x1043, 0x177d, "ASUS N551", ALC668_FIXUP_ASUS_Nx51), + SND_PCI_QUIRK(0x1043, 0x17bd, "ASUS N751", ALC668_FIXUP_ASUS_Nx51), SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT),
Re: [v5,1/7] QE: Add IC, SI and SIRAM document to device tree bindings.
On Tue, 2016-05-17 at 01:18 +, Qiang Zhao wrote: > On Tue, May 17, 2016 at 07:22AM, Scott Wood wrote: > > -Original Message- > > From: Scott Wood [mailto:o...@buserror.net] > > Sent: Tuesday, May 17, 2016 7:22 AM > > To: Qiang Zhao> > Cc: robh...@kernel.org; devicet...@vger.kernel.org; linux- > > ker...@vger.kernel.org; Xiaobo Xie ; Yang-Leo Li > > ; linuxppc-...@lists.ozlabs.org > > Subject: Re: [v5,1/7] QE: Add IC, SI and SIRAM document to device tree > > bindings. > > > > On Wed, Mar 09, 2016 at 09:21:28AM +0800, Zhao Qiang wrote: > > > Add IC, SI and SIRAM document of QE to > > > Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt > > > > > > Signed-off-by: Zhao Qiang > > > Acked-by: Rob Herring > > > --- > > > changes for v2 > > > - Add interrupt-controller in Required properties > > > - delete address-cells and size-cells for qe-si and qe-siram Changes > > > for v3 > > > - Add SoC specific caompatible strings to qe-si and qe-siram Changes > > > for v4 > > > - NA > > > Changes for v5 > > > - NA > > > > > > .../devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt | 50 > > > ++ > > > 1 file changed, 50 insertions(+) > > > +* Serial Interface Block (SI) > > > + > > > +The SI manages the routing of eight TDM lines to the QE block serial > > > +drivers , the MCC and the UCCs, for receive and transmit. > > > + > > > +Required properties: > > > +- compatible : should be "fsl,t1040-qe-si". > > > +- reg : Address range of SI register set. > > > > Is t1040 the only chip that has or will ever have this? > > There also be t1024 and ls1043 supporting si. > I thought to add them when adding their device node. > If you think it is better to add them now, I will modify. The binding is saying that the compatible "should" be "fsl,t1040-qe-si" regardless of whether it's actually a t1040. Instead say that compatible must include "fsl,-qe-si" and give t1040 as an example. If you intend "fsl,t1040-qe-si" to be something that other compatible chips list, in addition to their own compatibles, then also say that explicitly. -Scott
[PATCH 4.5 054/101] ALSA: hda - Fix subwoofer pin on ASUS N751 and N551
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Yura Pakhuchiy commit 3231e2053eaeee70bdfb216a78a30f11e88e2243 upstream. Subwoofer does not work out of the box on ASUS N751/N551 laptops. This patch fixes it. Patch tested on N751 laptop. N551 part is not tested, but according to [1] and [2] this laptop requires similar changes, so I included them in the patch. 1. https://github.com/honsiorovskyi/asus-n551-hda-fix 2. https://bugs.launchpad.net/ubuntu/+source/alsa-tools/+bug/1405691 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=117781 Signed-off-by: Yura Pakhuchiy Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 12 1 file changed, 12 insertions(+) --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6426,6 +6426,7 @@ enum { ALC668_FIXUP_DELL_DISABLE_AAMIX, ALC668_FIXUP_DELL_XPS13, ALC662_FIXUP_ASUS_Nx50, + ALC668_FIXUP_ASUS_Nx51, }; static const struct hda_fixup alc662_fixups[] = { @@ -6672,6 +6673,15 @@ static const struct hda_fixup alc662_fix .chained = true, .chain_id = ALC662_FIXUP_BASS_1A }, + [ALC668_FIXUP_ASUS_Nx51] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + {0x1a, 0x90170151}, /* bass speaker */ + {} + }, + .chained = true, + .chain_id = ALC662_FIXUP_BASS_CHMAP, + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -6699,6 +6709,8 @@ static const struct snd_pci_quirk alc662 SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), + SND_PCI_QUIRK(0x1043, 0x177d, "ASUS N551", ALC668_FIXUP_ASUS_Nx51), + SND_PCI_QUIRK(0x1043, 0x17bd, "ASUS N751", ALC668_FIXUP_ASUS_Nx51), SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT),
Re: [v5,1/7] QE: Add IC, SI and SIRAM document to device tree bindings.
On Tue, 2016-05-17 at 01:18 +, Qiang Zhao wrote: > On Tue, May 17, 2016 at 07:22AM, Scott Wood wrote: > > -Original Message- > > From: Scott Wood [mailto:o...@buserror.net] > > Sent: Tuesday, May 17, 2016 7:22 AM > > To: Qiang Zhao > > Cc: robh...@kernel.org; devicet...@vger.kernel.org; linux- > > ker...@vger.kernel.org; Xiaobo Xie ; Yang-Leo Li > > ; linuxppc-...@lists.ozlabs.org > > Subject: Re: [v5,1/7] QE: Add IC, SI and SIRAM document to device tree > > bindings. > > > > On Wed, Mar 09, 2016 at 09:21:28AM +0800, Zhao Qiang wrote: > > > Add IC, SI and SIRAM document of QE to > > > Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt > > > > > > Signed-off-by: Zhao Qiang > > > Acked-by: Rob Herring > > > --- > > > changes for v2 > > > - Add interrupt-controller in Required properties > > > - delete address-cells and size-cells for qe-si and qe-siram Changes > > > for v3 > > > - Add SoC specific caompatible strings to qe-si and qe-siram Changes > > > for v4 > > > - NA > > > Changes for v5 > > > - NA > > > > > > .../devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt | 50 > > > ++ > > > 1 file changed, 50 insertions(+) > > > +* Serial Interface Block (SI) > > > + > > > +The SI manages the routing of eight TDM lines to the QE block serial > > > +drivers , the MCC and the UCCs, for receive and transmit. > > > + > > > +Required properties: > > > +- compatible : should be "fsl,t1040-qe-si". > > > +- reg : Address range of SI register set. > > > > Is t1040 the only chip that has or will ever have this? > > There also be t1024 and ls1043 supporting si. > I thought to add them when adding their device node. > If you think it is better to add them now, I will modify. The binding is saying that the compatible "should" be "fsl,t1040-qe-si" regardless of whether it's actually a t1040. Instead say that compatible must include "fsl,-qe-si" and give t1040 as an example. If you intend "fsl,t1040-qe-si" to be something that other compatible chips list, in addition to their own compatibles, then also say that explicitly. -Scott
[PATCH 4.5 061/101] regmap: spmi: Fix regmap_spmi_ext_read in multi-byte case
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Jack Phamcommit dec8e8f6e6504aa3496c0f7cc10c756bb0e10f44 upstream. Specifically for the case of reads that use the Extended Register Read Long command, a multi-byte read operation is broken up into 8-byte chunks. However the call to spmi_ext_register_readl() is incorrectly passing 'val_size', which if greater than 8 will always fail. The argument should instead be 'len'. Fixes: c9afbb05a9ff ("regmap: spmi: support base and extended register spaces") Signed-off-by: Jack Pham Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap-spmi.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/base/regmap/regmap-spmi.c +++ b/drivers/base/regmap/regmap-spmi.c @@ -142,7 +142,7 @@ static int regmap_spmi_ext_read(void *co while (val_size) { len = min_t(size_t, val_size, 8); - err = spmi_ext_register_readl(context, addr, val, val_size); + err = spmi_ext_register_readl(context, addr, val, len); if (err) goto err_out;
[PATCH 4.5 057/101] spi: pxa2xx: Do not detect number of enabled chip selects on Intel SPT
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Jarkko Nikulacommit 66ec246eb9982e7eb8e15e1fc55f543230310dd0 upstream. Certain Intel Sunrisepoint PCH variants report zero chip selects in SPI capabilities register even they have one per port. Detection in pxa2xx_spi_probe() sets master->num_chipselect to 0 leading to -EINVAL from spi_register_master() where chip select count is validated. Fix this by not using SPI capabilities register on Sunrisepoint. They don't have more than one chip select so use the default value 1 instead of detection. Fixes: 8b136baa5892 ("spi: pxa2xx: Detect number of enabled Intel LPSS SPI chip select signals") Signed-off-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-pxa2xx.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -111,7 +111,7 @@ static const struct lpss_config lpss_pla .reg_general = -1, .reg_ssp = 0x20, .reg_cs_ctrl = 0x24, - .reg_capabilities = 0xfc, + .reg_capabilities = -1, .rx_threshold = 1, .tx_threshold_lo = 32, .tx_threshold_hi = 56,
[PATCH 4.5 061/101] regmap: spmi: Fix regmap_spmi_ext_read in multi-byte case
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Jack Pham commit dec8e8f6e6504aa3496c0f7cc10c756bb0e10f44 upstream. Specifically for the case of reads that use the Extended Register Read Long command, a multi-byte read operation is broken up into 8-byte chunks. However the call to spmi_ext_register_readl() is incorrectly passing 'val_size', which if greater than 8 will always fail. The argument should instead be 'len'. Fixes: c9afbb05a9ff ("regmap: spmi: support base and extended register spaces") Signed-off-by: Jack Pham Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap-spmi.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/base/regmap/regmap-spmi.c +++ b/drivers/base/regmap/regmap-spmi.c @@ -142,7 +142,7 @@ static int regmap_spmi_ext_read(void *co while (val_size) { len = min_t(size_t, val_size, 8); - err = spmi_ext_register_readl(context, addr, val, val_size); + err = spmi_ext_register_readl(context, addr, val, len); if (err) goto err_out;
[PATCH 4.5 057/101] spi: pxa2xx: Do not detect number of enabled chip selects on Intel SPT
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Jarkko Nikula commit 66ec246eb9982e7eb8e15e1fc55f543230310dd0 upstream. Certain Intel Sunrisepoint PCH variants report zero chip selects in SPI capabilities register even they have one per port. Detection in pxa2xx_spi_probe() sets master->num_chipselect to 0 leading to -EINVAL from spi_register_master() where chip select count is validated. Fix this by not using SPI capabilities register on Sunrisepoint. They don't have more than one chip select so use the default value 1 instead of detection. Fixes: 8b136baa5892 ("spi: pxa2xx: Detect number of enabled Intel LPSS SPI chip select signals") Signed-off-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-pxa2xx.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -111,7 +111,7 @@ static const struct lpss_config lpss_pla .reg_general = -1, .reg_ssp = 0x20, .reg_cs_ctrl = 0x24, - .reg_capabilities = 0xfc, + .reg_capabilities = -1, .rx_threshold = 1, .tx_threshold_lo = 32, .tx_threshold_hi = 56,
[PATCH 4.5 062/101] perf diff: Fix duplicated output column
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Namhyung Kimcommit e9d848cb65d5f6f7731d12bd1b6d994bfdbcc94f upstream. The commit b97511c5bc94 ("perf tools: Add overhead/overhead_children keys defaults via string") moved initialization of column headers but it missed to check the sort__mode. As 'perf diff' doesn't call perf_hpp__init(), the setup_overhead() also should not be called. Before: # BaselineDelta Children Overhead Shared ObjectSymbol # ... ... ... # 28.48% -28.47%28.48%28.48% [kernel.vmlinux ][k] intel_idle 11.51% -11.47%11.51%11.51% libxul.so[.] 0x01a360f7 3.49% -3.49% 3.49% 3.49% [kernel.vmlinux] [k] generic_exec_single 2.91% -2.89% 2.91% 2.91% libdbus-1.so.3.8.11 [.] 0xcdc2 2.86% -2.85% 2.86% 2.86% libxcb.so.1.1.0 [.] 0xc890 2.44% -2.39% 2.44% 2.44% [kernel.vmlinux] [k] perf_event_aux_ctx After: # BaselineDelta Shared ObjectSymbol # ... ... ... # 28.48% -28.47% [kernel.vmlinux] [k] intel_idle 11.51% -11.47% libxul.so[.] 0x01a360f7 3.49% -3.49% [kernel.vmlinux] [k] generic_exec_single 2.91% -2.89% libdbus-1.so.3.8.11 [.] 0xcdc2 2.86% -2.85% libxcb.so.1.1.0 [.] 0xc890 2.44% -2.39% [kernel.vmlinux] [k] perf_event_aux_ctx Signed-off-by: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: b97511c5bc94 ("perf tools: Add overhead/overhead_children keys defaults via string") Link: http://lkml.kernel.org/r/1462890384-12486-2-git-send-email-a...@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/sort.c |3 +++ 1 file changed, 3 insertions(+) --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2272,6 +2272,9 @@ static char *prefix_if_not_in(const char static char *setup_overhead(char *keys) { + if (sort__mode == SORT_MODE__DIFF) + return keys; + keys = prefix_if_not_in("overhead", keys); if (symbol_conf.cumulate_callchain)
[PATCH 4.5 060/101] pinctrl: at91-pio4: fix pull-up/down logic
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Ludovic Desrochescommit 5305a7b7e860bb40ab226bc7d58019416073948a upstream. The default configuration of a pin is often with a value in the pull-up/down field at chip reset. So, even if the internal logic of the controller prevents writing a configuration with pull-up and pull-down at the same time, we must ensure explicitly this condition before writing the register. This was leading to a pull-down condition not taken into account for instance. Signed-off-by: Ludovic Desroches Fixes: 776180848b57 ("pinctrl: introduce driver for Atmel PIO4 controller") Acked-by: Alexandre Belloni Acked-by: Nicolas Ferre Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-at91-pio4.c |2 ++ 1 file changed, 2 insertions(+) --- a/drivers/pinctrl/pinctrl-at91-pio4.c +++ b/drivers/pinctrl/pinctrl-at91-pio4.c @@ -722,9 +722,11 @@ static int atmel_conf_pin_config_group_s break; case PIN_CONFIG_BIAS_PULL_UP: conf |= ATMEL_PIO_PUEN_MASK; + conf &= (~ATMEL_PIO_PDEN_MASK); break; case PIN_CONFIG_BIAS_PULL_DOWN: conf |= ATMEL_PIO_PDEN_MASK; + conf &= (~ATMEL_PIO_PUEN_MASK); break; case PIN_CONFIG_DRIVE_OPEN_DRAIN: if (arg == 0)
[PATCH 4.5 062/101] perf diff: Fix duplicated output column
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Namhyung Kim commit e9d848cb65d5f6f7731d12bd1b6d994bfdbcc94f upstream. The commit b97511c5bc94 ("perf tools: Add overhead/overhead_children keys defaults via string") moved initialization of column headers but it missed to check the sort__mode. As 'perf diff' doesn't call perf_hpp__init(), the setup_overhead() also should not be called. Before: # BaselineDelta Children Overhead Shared ObjectSymbol # ... ... ... # 28.48% -28.47%28.48%28.48% [kernel.vmlinux ][k] intel_idle 11.51% -11.47%11.51%11.51% libxul.so[.] 0x01a360f7 3.49% -3.49% 3.49% 3.49% [kernel.vmlinux] [k] generic_exec_single 2.91% -2.89% 2.91% 2.91% libdbus-1.so.3.8.11 [.] 0xcdc2 2.86% -2.85% 2.86% 2.86% libxcb.so.1.1.0 [.] 0xc890 2.44% -2.39% 2.44% 2.44% [kernel.vmlinux] [k] perf_event_aux_ctx After: # BaselineDelta Shared ObjectSymbol # ... ... ... # 28.48% -28.47% [kernel.vmlinux] [k] intel_idle 11.51% -11.47% libxul.so[.] 0x01a360f7 3.49% -3.49% [kernel.vmlinux] [k] generic_exec_single 2.91% -2.89% libdbus-1.so.3.8.11 [.] 0xcdc2 2.86% -2.85% libxcb.so.1.1.0 [.] 0xc890 2.44% -2.39% [kernel.vmlinux] [k] perf_event_aux_ctx Signed-off-by: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: b97511c5bc94 ("perf tools: Add overhead/overhead_children keys defaults via string") Link: http://lkml.kernel.org/r/1462890384-12486-2-git-send-email-a...@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/sort.c |3 +++ 1 file changed, 3 insertions(+) --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2272,6 +2272,9 @@ static char *prefix_if_not_in(const char static char *setup_overhead(char *keys) { + if (sort__mode == SORT_MODE__DIFF) + return keys; + keys = prefix_if_not_in("overhead", keys); if (symbol_conf.cumulate_callchain)
[PATCH 4.5 060/101] pinctrl: at91-pio4: fix pull-up/down logic
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Ludovic Desroches commit 5305a7b7e860bb40ab226bc7d58019416073948a upstream. The default configuration of a pin is often with a value in the pull-up/down field at chip reset. So, even if the internal logic of the controller prevents writing a configuration with pull-up and pull-down at the same time, we must ensure explicitly this condition before writing the register. This was leading to a pull-down condition not taken into account for instance. Signed-off-by: Ludovic Desroches Fixes: 776180848b57 ("pinctrl: introduce driver for Atmel PIO4 controller") Acked-by: Alexandre Belloni Acked-by: Nicolas Ferre Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-at91-pio4.c |2 ++ 1 file changed, 2 insertions(+) --- a/drivers/pinctrl/pinctrl-at91-pio4.c +++ b/drivers/pinctrl/pinctrl-at91-pio4.c @@ -722,9 +722,11 @@ static int atmel_conf_pin_config_group_s break; case PIN_CONFIG_BIAS_PULL_UP: conf |= ATMEL_PIO_PUEN_MASK; + conf &= (~ATMEL_PIO_PDEN_MASK); break; case PIN_CONFIG_BIAS_PULL_DOWN: conf |= ATMEL_PIO_PDEN_MASK; + conf &= (~ATMEL_PIO_PUEN_MASK); break; case PIN_CONFIG_DRIVE_OPEN_DRAIN: if (arg == 0)
[PATCH 4.5 064/101] vfs: add vfs_select_inode() helper
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Miklos Szeredicommit 54d5ca871e72f2bb172ec9323497f01cd5091ec7 upstream. Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/open.c | 12 include/linux/dcache.h | 12 2 files changed, 16 insertions(+), 8 deletions(-) --- a/fs/open.c +++ b/fs/open.c @@ -840,16 +840,12 @@ EXPORT_SYMBOL(file_path); int vfs_open(const struct path *path, struct file *file, const struct cred *cred) { - struct dentry *dentry = path->dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = vfs_select_inode(path->dentry, file->f_flags); - file->f_path = *path; - if (dentry->d_flags & DCACHE_OP_SELECT_INODE) { - inode = dentry->d_op->d_select_inode(dentry, file->f_flags); - if (IS_ERR(inode)) - return PTR_ERR(inode); - } + if (IS_ERR(inode)) + return PTR_ERR(inode); + file->f_path = *path; return do_dentry_open(file, inode, NULL, cred); } --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -592,4 +592,16 @@ static inline struct dentry *d_real(stru return dentry; } +static inline struct inode *vfs_select_inode(struct dentry *dentry, +unsigned open_flags) +{ + struct inode *inode = d_inode(dentry); + + if (inode && unlikely(dentry->d_flags & DCACHE_OP_SELECT_INODE)) + inode = dentry->d_op->d_select_inode(dentry, open_flags); + + return inode; +} + + #endif /* __LINUX_DCACHE_H */
[PATCH 4.5 068/101] regulator: s2mps11: Fix invalid selector mask and voltages for buck9
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Krzysztof Kozlowskicommit 3b672623079bb3e5685b8549e514f2dfaa564406 upstream. The buck9 regulator of S2MPS11 PMIC had incorrect vsel_mask (0xff instead of 0x1f) thus reading entire register as buck9's voltage. This effectively caused regulator core to interpret values as higher voltages than they were and then to set real voltage much lower than intended. The buck9 provides power to other regulators, including LDO13 and LDO19 which supply the MMC2 (SD card). On Odroid XU3/XU4 the lower voltage caused SD card detection errors on Odroid XU3/XU4: mmc1: card never left busy state mmc1: error -110 whilst initialising SD card During driver probe the regulator core was checking whether initial voltage matches the constraints. With incorrect vsel_mask of 0xff and default value of 0x50, the core interpreted this as 5 V which is outside of constraints (3-3.775 V). Then the regulator core was adjusting the voltage to match the constraints. With incorrect vsel_mask this new voltage mapped to a vere low voltage in the driver. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Javier Martinez Canillas Tested-by: Javier Martinez Canillas Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/s2mps11.c | 28 ++-- include/linux/mfd/samsung/s2mps11.h |2 ++ 2 files changed, 24 insertions(+), 6 deletions(-) --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -306,7 +306,7 @@ static struct regulator_ops s2mps11_buck .enable_mask= S2MPS11_ENABLE_MASK \ } -#define regulator_desc_s2mps11_buck6_10(num, min, step) { \ +#define regulator_desc_s2mps11_buck67810(num, min, step) { \ .name = "BUCK"#num, \ .id = S2MPS11_BUCK##num,\ .ops= _buck_ops,\ @@ -322,6 +322,22 @@ static struct regulator_ops s2mps11_buck .enable_mask= S2MPS11_ENABLE_MASK \ } +#define regulator_desc_s2mps11_buck9 { \ + .name = "BUCK9", \ + .id = S2MPS11_BUCK9,\ + .ops= _buck_ops,\ + .type = REGULATOR_VOLTAGE,\ + .owner = THIS_MODULE, \ + .min_uV = MIN_3000_MV, \ + .uV_step= STEP_25_MV, \ + .n_voltages = S2MPS11_BUCK9_N_VOLTAGES, \ + .ramp_delay = S2MPS11_RAMP_DELAY, \ + .vsel_reg = S2MPS11_REG_B9CTRL2, \ + .vsel_mask = S2MPS11_BUCK9_VSEL_MASK, \ + .enable_reg = S2MPS11_REG_B9CTRL1, \ + .enable_mask= S2MPS11_ENABLE_MASK \ +} + static const struct regulator_desc s2mps11_regulators[] = { regulator_desc_s2mps11_ldo(1, STEP_25_MV), regulator_desc_s2mps11_ldo(2, STEP_50_MV), @@ -366,11 +382,11 @@ static const struct regulator_desc s2mps regulator_desc_s2mps11_buck1_4(3), regulator_desc_s2mps11_buck1_4(4), regulator_desc_s2mps11_buck5, - regulator_desc_s2mps11_buck6_10(6, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck6_10(7, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck6_10(8, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck6_10(9, MIN_3000_MV, STEP_25_MV), - regulator_desc_s2mps11_buck6_10(10, MIN_750_MV, STEP_12_5_MV), + regulator_desc_s2mps11_buck67810(6, MIN_600_MV, STEP_6_25_MV), + regulator_desc_s2mps11_buck67810(7, MIN_600_MV, STEP_6_25_MV), + regulator_desc_s2mps11_buck67810(8, MIN_600_MV, STEP_6_25_MV), + regulator_desc_s2mps11_buck9, + regulator_desc_s2mps11_buck67810(10, MIN_750_MV, STEP_12_5_MV), }; static struct regulator_ops s2mps14_reg_ops; --- a/include/linux/mfd/samsung/s2mps11.h +++ b/include/linux/mfd/samsung/s2mps11.h @@ -173,10 +173,12 @@ enum s2mps11_regulators { #define S2MPS11_LDO_VSEL_MASK 0x3F #define S2MPS11_BUCK_VSEL_MASK 0xFF +#define S2MPS11_BUCK9_VSEL_MASK0x1F #define S2MPS11_ENABLE_MASK(0x03 << S2MPS11_ENABLE_SHIFT) #define S2MPS11_ENABLE_SHIFT 0x06 #define S2MPS11_LDO_N_VOLTAGES (S2MPS11_LDO_VSEL_MASK + 1) #define S2MPS11_BUCK_N_VOLTAGES (S2MPS11_BUCK_VSEL_MASK + 1) +#define S2MPS11_BUCK9_N_VOLTAGES (S2MPS11_BUCK9_VSEL_MASK + 1) #define S2MPS11_RAMP_DELAY 25000 /* uV/us */ #define S2MPS11_CTRL1_PWRHOLD_MASK BIT(4)
[PATCH 4.5 064/101] vfs: add vfs_select_inode() helper
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Miklos Szeredi commit 54d5ca871e72f2bb172ec9323497f01cd5091ec7 upstream. Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/open.c | 12 include/linux/dcache.h | 12 2 files changed, 16 insertions(+), 8 deletions(-) --- a/fs/open.c +++ b/fs/open.c @@ -840,16 +840,12 @@ EXPORT_SYMBOL(file_path); int vfs_open(const struct path *path, struct file *file, const struct cred *cred) { - struct dentry *dentry = path->dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = vfs_select_inode(path->dentry, file->f_flags); - file->f_path = *path; - if (dentry->d_flags & DCACHE_OP_SELECT_INODE) { - inode = dentry->d_op->d_select_inode(dentry, file->f_flags); - if (IS_ERR(inode)) - return PTR_ERR(inode); - } + if (IS_ERR(inode)) + return PTR_ERR(inode); + file->f_path = *path; return do_dentry_open(file, inode, NULL, cred); } --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -592,4 +592,16 @@ static inline struct dentry *d_real(stru return dentry; } +static inline struct inode *vfs_select_inode(struct dentry *dentry, +unsigned open_flags) +{ + struct inode *inode = d_inode(dentry); + + if (inode && unlikely(dentry->d_flags & DCACHE_OP_SELECT_INODE)) + inode = dentry->d_op->d_select_inode(dentry, open_flags); + + return inode; +} + + #endif /* __LINUX_DCACHE_H */
[PATCH 4.5 068/101] regulator: s2mps11: Fix invalid selector mask and voltages for buck9
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Krzysztof Kozlowski commit 3b672623079bb3e5685b8549e514f2dfaa564406 upstream. The buck9 regulator of S2MPS11 PMIC had incorrect vsel_mask (0xff instead of 0x1f) thus reading entire register as buck9's voltage. This effectively caused regulator core to interpret values as higher voltages than they were and then to set real voltage much lower than intended. The buck9 provides power to other regulators, including LDO13 and LDO19 which supply the MMC2 (SD card). On Odroid XU3/XU4 the lower voltage caused SD card detection errors on Odroid XU3/XU4: mmc1: card never left busy state mmc1: error -110 whilst initialising SD card During driver probe the regulator core was checking whether initial voltage matches the constraints. With incorrect vsel_mask of 0xff and default value of 0x50, the core interpreted this as 5 V which is outside of constraints (3-3.775 V). Then the regulator core was adjusting the voltage to match the constraints. With incorrect vsel_mask this new voltage mapped to a vere low voltage in the driver. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Javier Martinez Canillas Tested-by: Javier Martinez Canillas Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/s2mps11.c | 28 ++-- include/linux/mfd/samsung/s2mps11.h |2 ++ 2 files changed, 24 insertions(+), 6 deletions(-) --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -306,7 +306,7 @@ static struct regulator_ops s2mps11_buck .enable_mask= S2MPS11_ENABLE_MASK \ } -#define regulator_desc_s2mps11_buck6_10(num, min, step) { \ +#define regulator_desc_s2mps11_buck67810(num, min, step) { \ .name = "BUCK"#num, \ .id = S2MPS11_BUCK##num,\ .ops= _buck_ops,\ @@ -322,6 +322,22 @@ static struct regulator_ops s2mps11_buck .enable_mask= S2MPS11_ENABLE_MASK \ } +#define regulator_desc_s2mps11_buck9 { \ + .name = "BUCK9", \ + .id = S2MPS11_BUCK9,\ + .ops= _buck_ops,\ + .type = REGULATOR_VOLTAGE,\ + .owner = THIS_MODULE, \ + .min_uV = MIN_3000_MV, \ + .uV_step= STEP_25_MV, \ + .n_voltages = S2MPS11_BUCK9_N_VOLTAGES, \ + .ramp_delay = S2MPS11_RAMP_DELAY, \ + .vsel_reg = S2MPS11_REG_B9CTRL2, \ + .vsel_mask = S2MPS11_BUCK9_VSEL_MASK, \ + .enable_reg = S2MPS11_REG_B9CTRL1, \ + .enable_mask= S2MPS11_ENABLE_MASK \ +} + static const struct regulator_desc s2mps11_regulators[] = { regulator_desc_s2mps11_ldo(1, STEP_25_MV), regulator_desc_s2mps11_ldo(2, STEP_50_MV), @@ -366,11 +382,11 @@ static const struct regulator_desc s2mps regulator_desc_s2mps11_buck1_4(3), regulator_desc_s2mps11_buck1_4(4), regulator_desc_s2mps11_buck5, - regulator_desc_s2mps11_buck6_10(6, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck6_10(7, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck6_10(8, MIN_600_MV, STEP_6_25_MV), - regulator_desc_s2mps11_buck6_10(9, MIN_3000_MV, STEP_25_MV), - regulator_desc_s2mps11_buck6_10(10, MIN_750_MV, STEP_12_5_MV), + regulator_desc_s2mps11_buck67810(6, MIN_600_MV, STEP_6_25_MV), + regulator_desc_s2mps11_buck67810(7, MIN_600_MV, STEP_6_25_MV), + regulator_desc_s2mps11_buck67810(8, MIN_600_MV, STEP_6_25_MV), + regulator_desc_s2mps11_buck9, + regulator_desc_s2mps11_buck67810(10, MIN_750_MV, STEP_12_5_MV), }; static struct regulator_ops s2mps14_reg_ops; --- a/include/linux/mfd/samsung/s2mps11.h +++ b/include/linux/mfd/samsung/s2mps11.h @@ -173,10 +173,12 @@ enum s2mps11_regulators { #define S2MPS11_LDO_VSEL_MASK 0x3F #define S2MPS11_BUCK_VSEL_MASK 0xFF +#define S2MPS11_BUCK9_VSEL_MASK0x1F #define S2MPS11_ENABLE_MASK(0x03 << S2MPS11_ENABLE_SHIFT) #define S2MPS11_ENABLE_SHIFT 0x06 #define S2MPS11_LDO_N_VOLTAGES (S2MPS11_LDO_VSEL_MASK + 1) #define S2MPS11_BUCK_N_VOLTAGES (S2MPS11_BUCK_VSEL_MASK + 1) +#define S2MPS11_BUCK9_N_VOLTAGES (S2MPS11_BUCK9_VSEL_MASK + 1) #define S2MPS11_RAMP_DELAY 25000 /* uV/us */ #define S2MPS11_CTRL1_PWRHOLD_MASK BIT(4)
[PATCH 4.5 056/101] ALSA: hda - Fix broken reconfig
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Takashi Iwaicommit addacd801e1638f41d659cb53b9b73fc14322cb1 upstream. The HD-audio reconfig function got broken in the recent kernels, typically resulting in a failure like: snd_hda_intel :00:1b.0: control 3:0:0:Playback Channel Map:0 is already present This is because of the code restructuring to move the PCM and control instantiation into the codec drive probe, by the commit [bcd96557bd0a: ALSA: hda - Build PCMs and controls at codec driver probe]. Although the commit above removed the calls of snd_hda_codec_build_pcms() and *_build_controls() at the controller driver probe, the similar calls in the reconfig were still left forgotten. This caused the conflicting and duplicated PCMs and controls. The fix is trivial: just remove these superfluous calls from reconfig_codec(). Fixes: bcd96557bd0a ('ALSA: hda - Build PCMs and controls at codec driver probe') Reported-by: Jochen Henneberg Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_sysfs.c |8 1 file changed, 8 deletions(-) --- a/sound/pci/hda/hda_sysfs.c +++ b/sound/pci/hda/hda_sysfs.c @@ -141,14 +141,6 @@ static int reconfig_codec(struct hda_cod err = snd_hda_codec_configure(codec); if (err < 0) goto error; - /* rebuild PCMs */ - err = snd_hda_codec_build_pcms(codec); - if (err < 0) - goto error; - /* rebuild mixers */ - err = snd_hda_codec_build_controls(codec); - if (err < 0) - goto error; err = snd_card_register(codec->card); error: snd_hda_power_down(codec);
Re: [PATCH net-next] tuntap: introduce tx skb ring
On 2016年05月16日 16:08, Michael S. Tsirkin wrote: On Mon, May 16, 2016 at 03:52:11PM +0800, Jason Wang wrote: On 2016年05月16日 12:23, Michael S. Tsirkin wrote: On Mon, May 16, 2016 at 09:17:01AM +0800, Jason Wang wrote: We used to queue tx packets in sk_receive_queue, this is less efficient since it requires spinlocks to synchronize between producer and consumer. This patch tries to address this by using circular buffer which allows lockless synchronization. This is done by switching from sk_receive_queue to a tx skb ring with a new flag IFF_TX_RING and when this is set: Why do we need a new flag? Is there a userspace-visible behaviour change? Probably yes since tx_queue_length does not work. So the flag name should reflect the behaviour somehow, not the implementation. - store pointer to skb in circular buffer in tun_net_xmit(), and read it from the circular buffer in tun_do_read(). - introduce a new proto_ops peek which could be implemented by specific socket which does not use sk_receive_queue. - store skb length in circular buffer too, and implement a lockless peek for tuntap. - change vhost_net to use proto_ops->peek() instead - new spinlocks were introduced to synchronize among producers (and so did for consumers). Pktgen test shows about 9% improvement on guest receiving pps: Before: ~148pps After : ~161pps (I'm not sure noblocking read is still needed, so it was not included in this patch) How do you mean? Of course we must support blocking and non-blocking read - userspace uses it. Ok, will add this. Signed-off-by: Jason Wang--- --- drivers/net/tun.c | 157 +--- drivers/vhost/net.c | 16 - include/linux/net.h | 1 + include/uapi/linux/if_tun.h | 1 + 4 files changed, 165 insertions(+), 10 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 425e983..6001ece 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -71,6 +71,7 @@ #include #include #include +#include #include @@ -130,6 +131,8 @@ struct tap_filter { #define MAX_TAP_FLOWS 4096 #define TUN_FLOW_EXPIRE (3 * HZ) +#define TUN_RING_SIZE 256 Can we resize this according to tx_queue_len set by user? We can, but it needs lots of other changes, e.g being notified when tx_queue_len was changed by user. Some kind of notifier? Yes, maybe. Probably better than a new user interface. Ok. And if tx_queue_length is not power of 2, we probably need modulus to calculate the capacity. Is that really that important for speed? Not sure, I can test. If yes, round it up to next power of two. Right, this sounds a good solution. You can also probably wrap it with a conditional instead. +#define TUN_RING_MASK (TUN_RING_SIZE - 1) struct tun_pcpu_stats { u64 rx_packets; @@ -142,6 +145,11 @@ struct tun_pcpu_stats { u32 rx_frame_errors; }; +struct tun_desc { + struct sk_buff *skb; + int len; /* Cached skb len for peeking */ +}; + /* A tun_file connects an open character device to a tuntap netdevice. It * also contains all socket related structures (except sock_fprog and tap_filter) * to serve as one transmit queue for tuntap device. The sock_fprog and @@ -167,6 +175,13 @@ struct tun_file { }; struct list_head next; struct tun_struct *detached; + /* reader lock */ + spinlock_t rlock; + unsigned long tail; + struct tun_desc tx_descs[TUN_RING_SIZE]; + /* writer lock */ + spinlock_t wlock; + unsigned long head; }; struct tun_flow_entry { @@ -515,7 +530,27 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile) static void tun_queue_purge(struct tun_file *tfile) { + unsigned long head, tail; + struct tun_desc *desc; + struct sk_buff *skb; skb_queue_purge(>sk.sk_receive_queue); + spin_lock(>rlock); + + head = ACCESS_ONCE(tfile->head); + tail = tfile->tail; + + /* read tail before reading descriptor at tail */ + smp_rmb(); I think you mean read *head* here Right. + + while (CIRC_CNT(head, tail, TUN_RING_SIZE) >= 1) { + desc = >tx_descs[tail]; + skb = desc->skb; + kfree_skb(skb); + tail = (tail + 1) & TUN_RING_MASK; + /* read descriptor before incrementing tail. */ + smp_store_release(>tail, tail & TUN_RING_MASK); + } + spin_unlock(>rlock); skb_queue_purge(>sk.sk_error_queue); } Barrier pairing seems messed up. Could you tag each barrier with its pair pls? E.g. add /* Barrier A for pairing */ Before barrier and its pair. Ok. for both tun_queue_purge() and tun_do_read(): smp_rmb() is paired with smp_store_release() in tun_net_xmit(). this seems at least an overkill. rmb would normally be paired with wmb, not a full mb within release. wmb is not enough here. We need
[PATCH 4.5 056/101] ALSA: hda - Fix broken reconfig
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Takashi Iwai commit addacd801e1638f41d659cb53b9b73fc14322cb1 upstream. The HD-audio reconfig function got broken in the recent kernels, typically resulting in a failure like: snd_hda_intel :00:1b.0: control 3:0:0:Playback Channel Map:0 is already present This is because of the code restructuring to move the PCM and control instantiation into the codec drive probe, by the commit [bcd96557bd0a: ALSA: hda - Build PCMs and controls at codec driver probe]. Although the commit above removed the calls of snd_hda_codec_build_pcms() and *_build_controls() at the controller driver probe, the similar calls in the reconfig were still left forgotten. This caused the conflicting and duplicated PCMs and controls. The fix is trivial: just remove these superfluous calls from reconfig_codec(). Fixes: bcd96557bd0a ('ALSA: hda - Build PCMs and controls at codec driver probe') Reported-by: Jochen Henneberg Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_sysfs.c |8 1 file changed, 8 deletions(-) --- a/sound/pci/hda/hda_sysfs.c +++ b/sound/pci/hda/hda_sysfs.c @@ -141,14 +141,6 @@ static int reconfig_codec(struct hda_cod err = snd_hda_codec_configure(codec); if (err < 0) goto error; - /* rebuild PCMs */ - err = snd_hda_codec_build_pcms(codec); - if (err < 0) - goto error; - /* rebuild mixers */ - err = snd_hda_codec_build_controls(codec); - if (err < 0) - goto error; err = snd_card_register(codec->card); error: snd_hda_power_down(codec);
Re: [PATCH net-next] tuntap: introduce tx skb ring
On 2016年05月16日 16:08, Michael S. Tsirkin wrote: On Mon, May 16, 2016 at 03:52:11PM +0800, Jason Wang wrote: On 2016年05月16日 12:23, Michael S. Tsirkin wrote: On Mon, May 16, 2016 at 09:17:01AM +0800, Jason Wang wrote: We used to queue tx packets in sk_receive_queue, this is less efficient since it requires spinlocks to synchronize between producer and consumer. This patch tries to address this by using circular buffer which allows lockless synchronization. This is done by switching from sk_receive_queue to a tx skb ring with a new flag IFF_TX_RING and when this is set: Why do we need a new flag? Is there a userspace-visible behaviour change? Probably yes since tx_queue_length does not work. So the flag name should reflect the behaviour somehow, not the implementation. - store pointer to skb in circular buffer in tun_net_xmit(), and read it from the circular buffer in tun_do_read(). - introduce a new proto_ops peek which could be implemented by specific socket which does not use sk_receive_queue. - store skb length in circular buffer too, and implement a lockless peek for tuntap. - change vhost_net to use proto_ops->peek() instead - new spinlocks were introduced to synchronize among producers (and so did for consumers). Pktgen test shows about 9% improvement on guest receiving pps: Before: ~148pps After : ~161pps (I'm not sure noblocking read is still needed, so it was not included in this patch) How do you mean? Of course we must support blocking and non-blocking read - userspace uses it. Ok, will add this. Signed-off-by: Jason Wang --- --- drivers/net/tun.c | 157 +--- drivers/vhost/net.c | 16 - include/linux/net.h | 1 + include/uapi/linux/if_tun.h | 1 + 4 files changed, 165 insertions(+), 10 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 425e983..6001ece 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -71,6 +71,7 @@ #include #include #include +#include #include @@ -130,6 +131,8 @@ struct tap_filter { #define MAX_TAP_FLOWS 4096 #define TUN_FLOW_EXPIRE (3 * HZ) +#define TUN_RING_SIZE 256 Can we resize this according to tx_queue_len set by user? We can, but it needs lots of other changes, e.g being notified when tx_queue_len was changed by user. Some kind of notifier? Yes, maybe. Probably better than a new user interface. Ok. And if tx_queue_length is not power of 2, we probably need modulus to calculate the capacity. Is that really that important for speed? Not sure, I can test. If yes, round it up to next power of two. Right, this sounds a good solution. You can also probably wrap it with a conditional instead. +#define TUN_RING_MASK (TUN_RING_SIZE - 1) struct tun_pcpu_stats { u64 rx_packets; @@ -142,6 +145,11 @@ struct tun_pcpu_stats { u32 rx_frame_errors; }; +struct tun_desc { + struct sk_buff *skb; + int len; /* Cached skb len for peeking */ +}; + /* A tun_file connects an open character device to a tuntap netdevice. It * also contains all socket related structures (except sock_fprog and tap_filter) * to serve as one transmit queue for tuntap device. The sock_fprog and @@ -167,6 +175,13 @@ struct tun_file { }; struct list_head next; struct tun_struct *detached; + /* reader lock */ + spinlock_t rlock; + unsigned long tail; + struct tun_desc tx_descs[TUN_RING_SIZE]; + /* writer lock */ + spinlock_t wlock; + unsigned long head; }; struct tun_flow_entry { @@ -515,7 +530,27 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile) static void tun_queue_purge(struct tun_file *tfile) { + unsigned long head, tail; + struct tun_desc *desc; + struct sk_buff *skb; skb_queue_purge(>sk.sk_receive_queue); + spin_lock(>rlock); + + head = ACCESS_ONCE(tfile->head); + tail = tfile->tail; + + /* read tail before reading descriptor at tail */ + smp_rmb(); I think you mean read *head* here Right. + + while (CIRC_CNT(head, tail, TUN_RING_SIZE) >= 1) { + desc = >tx_descs[tail]; + skb = desc->skb; + kfree_skb(skb); + tail = (tail + 1) & TUN_RING_MASK; + /* read descriptor before incrementing tail. */ + smp_store_release(>tail, tail & TUN_RING_MASK); + } + spin_unlock(>rlock); skb_queue_purge(>sk.sk_error_queue); } Barrier pairing seems messed up. Could you tag each barrier with its pair pls? E.g. add /* Barrier A for pairing */ Before barrier and its pair. Ok. for both tun_queue_purge() and tun_do_read(): smp_rmb() is paired with smp_store_release() in tun_net_xmit(). this seems at least an overkill. rmb would normally be paired with wmb, not a full mb within release. wmb is not enough here. We need guarantee the
[PATCH 4.5 071/101] qla1280: Dont allocate 512kb of host tags
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Johannes Thumshirncommit 2bcbc81421c511ef117cadcf0bee9c4340e68db0 upstream. The qla1280 driver sets the scsi_host_template's can_queue field to 0xf which results in an allocation failure when allocating the block layer tags for the driver's queues. This was introduced with the change for host wide tags in commit 64d513ac31b - "scsi: use host wide tags by default". Reduce can_queue to MAX_OUTSTANDING_COMMANDS (512) to solve the allocation error. Signed-off-by: Johannes Thumshirn Fixes: 64d513ac31b - "scsi: use host wide tags by default" Cc: Laura Abbott Cc: Michael Reed Reviewed-by: Laurence Oberman Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla1280.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -4214,7 +4214,7 @@ static struct scsi_host_template qla1280 .eh_bus_reset_handler = qla1280_eh_bus_reset, .eh_host_reset_handler = qla1280_eh_adapter_reset, .bios_param = qla1280_biosparam, - .can_queue = 0xf, + .can_queue = MAX_OUTSTANDING_COMMANDS, .this_id= -1, .sg_tablesize = SG_ALL, .use_clustering = ENABLE_CLUSTERING,
[PATCH 4.5 069/101] regulator: axp20x: Fix axp22x ldo_io voltage ranges
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Hans de Goedecommit a2262e5a12e05389ab4c7fc5cf60016b041dd8dc upstream. The minium voltage of 1800mV is a copy and paste error from the axp20x regulator info. The correct minimum voltage for the ldo_io regulators on the axp22x is 700mV. Fixes: 1b82b4e4f954 ("regulator: axp20x: Add support for AXP22X regulators") Signed-off-by: Hans de Goede Acked-by: Chen-Yu Tsai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/axp20x-regulator.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -221,10 +221,10 @@ static const struct regulator_desc axp22 AXP22X_ELDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(1)), AXP_DESC(AXP22X, ELDO3, "eldo3", "eldoin", 700, 3300, 100, AXP22X_ELDO3_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(2)), - AXP_DESC_IO(AXP22X, LDO_IO0, "ldo_io0", "ips", 1800, 3300, 100, + AXP_DESC_IO(AXP22X, LDO_IO0, "ldo_io0", "ips", 700, 3300, 100, AXP22X_LDO_IO0_V_OUT, 0x1f, AXP20X_GPIO0_CTRL, 0x07, AXP22X_IO_ENABLED, AXP22X_IO_DISABLED), - AXP_DESC_IO(AXP22X, LDO_IO1, "ldo_io1", "ips", 1800, 3300, 100, + AXP_DESC_IO(AXP22X, LDO_IO1, "ldo_io1", "ips", 700, 3300, 100, AXP22X_LDO_IO1_V_OUT, 0x1f, AXP20X_GPIO1_CTRL, 0x07, AXP22X_IO_ENABLED, AXP22X_IO_DISABLED), AXP_DESC_FIXED(AXP22X, RTC_LDO, "rtc_ldo", "ips", 3000),
[PATCH 4.5 071/101] qla1280: Dont allocate 512kb of host tags
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Johannes Thumshirn commit 2bcbc81421c511ef117cadcf0bee9c4340e68db0 upstream. The qla1280 driver sets the scsi_host_template's can_queue field to 0xf which results in an allocation failure when allocating the block layer tags for the driver's queues. This was introduced with the change for host wide tags in commit 64d513ac31b - "scsi: use host wide tags by default". Reduce can_queue to MAX_OUTSTANDING_COMMANDS (512) to solve the allocation error. Signed-off-by: Johannes Thumshirn Fixes: 64d513ac31b - "scsi: use host wide tags by default" Cc: Laura Abbott Cc: Michael Reed Reviewed-by: Laurence Oberman Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla1280.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -4214,7 +4214,7 @@ static struct scsi_host_template qla1280 .eh_bus_reset_handler = qla1280_eh_bus_reset, .eh_host_reset_handler = qla1280_eh_adapter_reset, .bios_param = qla1280_biosparam, - .can_queue = 0xf, + .can_queue = MAX_OUTSTANDING_COMMANDS, .this_id= -1, .sg_tablesize = SG_ALL, .use_clustering = ENABLE_CLUSTERING,
[PATCH 4.5 069/101] regulator: axp20x: Fix axp22x ldo_io voltage ranges
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Hans de Goede commit a2262e5a12e05389ab4c7fc5cf60016b041dd8dc upstream. The minium voltage of 1800mV is a copy and paste error from the axp20x regulator info. The correct minimum voltage for the ldo_io regulators on the axp22x is 700mV. Fixes: 1b82b4e4f954 ("regulator: axp20x: Add support for AXP22X regulators") Signed-off-by: Hans de Goede Acked-by: Chen-Yu Tsai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/axp20x-regulator.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -221,10 +221,10 @@ static const struct regulator_desc axp22 AXP22X_ELDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(1)), AXP_DESC(AXP22X, ELDO3, "eldo3", "eldoin", 700, 3300, 100, AXP22X_ELDO3_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(2)), - AXP_DESC_IO(AXP22X, LDO_IO0, "ldo_io0", "ips", 1800, 3300, 100, + AXP_DESC_IO(AXP22X, LDO_IO0, "ldo_io0", "ips", 700, 3300, 100, AXP22X_LDO_IO0_V_OUT, 0x1f, AXP20X_GPIO0_CTRL, 0x07, AXP22X_IO_ENABLED, AXP22X_IO_DISABLED), - AXP_DESC_IO(AXP22X, LDO_IO1, "ldo_io1", "ips", 1800, 3300, 100, + AXP_DESC_IO(AXP22X, LDO_IO1, "ldo_io1", "ips", 700, 3300, 100, AXP22X_LDO_IO1_V_OUT, 0x1f, AXP20X_GPIO1_CTRL, 0x07, AXP22X_IO_ENABLED, AXP22X_IO_DISABLED), AXP_DESC_FIXED(AXP22X, RTC_LDO, "rtc_ldo", "ips", 3000),
[PATCH 4.5 035/101] net/mlx4_en: Fix endianness bug in IPV6 csum calculation
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Daniel Jurgens[ Upstream commit 82d69203df634b4dfa765c94f60ce9482bcc44d6 ] Use htons instead of unconditionally byte swapping nexthdr. On a little endian systems shifting the byte is correct behavior, but it results in incorrect csums on big endian architectures. Fixes: f8c6455bb04b ('net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE') Signed-off-by: Daniel Jurgens Reviewed-by: Carol Soto Tested-by: Carol Soto Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_rx.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -704,7 +704,7 @@ static int get_fixed_ipv6_csum(__wsum hw if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS) return -1; - hw_checksum = csum_add(hw_checksum, (__force __wsum)(ipv6h->nexthdr << 8)); + hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr)); csum_pseudo_hdr = csum_partial(>saddr, sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0);
[PATCH 4.5 066/101] ARM: dts: at91: sam9x5: Fix the memory range assigned to the PMC
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Boris Brezilloncommit aab0a4c83ceb344d2327194bf354820e50607af6 upstream. The memory range assigned to the PMC (Power Management Controller) was not including the PMC_PCR register which are used to control peripheral clocks. This was working fine thanks to the page granularity of ioremap(), but started to fail when we switched to syscon/regmap, because regmap is making sure that all accesses are falling into the reserved range. Signed-off-by: Boris Brezillon Reported-by: Richard Genoud Tested-by: Richard Genoud Fixes: 863a81c3be1d ("clk: at91: make use of syscon to share PMC registers in several drivers") Signed-off-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91sam9x5.dtsi |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -106,7 +106,7 @@ pmc: pmc@fc00 { compatible = "atmel,at91sam9x5-pmc", "syscon"; - reg = <0xfc00 0x100>; + reg = <0xfc00 0x200>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; interrupt-controller; #address-cells = <1>;
[PATCH 4.5 065/101] vfs: rename: check backing inode being equal
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Miklos Szeredicommit 9409e22acdfc9153f88d9b1ed2bd2a5b34d2d3ca upstream. If a file is renamed to a hardlink of itself POSIX specifies that rename(2) should do nothing and return success. This condition is checked in vfs_rename(). However it won't detect hard links on overlayfs where these are given separate inodes on the overlayfs layer. Overlayfs itself detects this condition and returns success without doing anything, but then vfs_rename() will proceed as if this was a successful rename (detach_mounts(), d_move()). The correct thing to do is to detect this condition before even calling into overlayfs. This patch does this by calling vfs_select_inode() to get the underlying inodes. Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/namei.c |6 +- 1 file changed, 5 insertions(+), 1 deletion(-) --- a/fs/namei.c +++ b/fs/namei.c @@ -4258,7 +4258,11 @@ int vfs_rename(struct inode *old_dir, st bool new_is_dir = false; unsigned max_links = new_dir->i_sb->s_max_links; - if (source == target) + /* +* Check source == target. +* On overlayfs need to look at underlying inodes. +*/ + if (vfs_select_inode(old_dentry, 0) == vfs_select_inode(new_dentry, 0)) return 0; error = may_delete(old_dir, old_dentry, is_dir);
[PATCH 4.5 070/101] atomic_open(): fix the handling of create_error
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Al Virocommit 10c64cea04d3c75c306b3f990586ffb343b63287 upstream. * if we have a hashed negative dentry and either CREAT|EXCL on r/o filesystem, or CREAT|TRUNC on r/o filesystem, or CREAT|EXCL with failing may_o_create(), we should fail with EROFS or the error may_o_create() has returned, but not ENOENT. Which is what the current code ends up returning. * if we have CREAT|TRUNC hitting a regular file on a read-only filesystem, we can't fail with EROFS here. At the very least, not until we'd done follow_managed() - we might have a writable file (or a device, for that matter) bound on top of that one. Moreover, the code downstream will see that O_TRUNC and attempt to grab the write access (*after* following possible mount), so if we really should fail with EROFS, it will happen. No need to do that inside atomic_open(). The real logics is much simpler than what the current code is trying to do - if we decided to go for simple lookup, ended up with a negative dentry *and* had create_error set, fail with create_error. No matter whether we'd got that negative dentry from lookup_real() or had found it in dcache. Acked-by: Miklos Szeredi Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 20 1 file changed, 4 insertions(+), 16 deletions(-) --- a/fs/namei.c +++ b/fs/namei.c @@ -2968,22 +2968,10 @@ no_open: dentry = lookup_real(dir, dentry, nd->flags); if (IS_ERR(dentry)) return PTR_ERR(dentry); - - if (create_error) { - int open_flag = op->open_flag; - - error = create_error; - if ((open_flag & O_EXCL)) { - if (!dentry->d_inode) - goto out; - } else if (!dentry->d_inode) { - goto out; - } else if ((open_flag & O_TRUNC) && - d_is_reg(dentry)) { - goto out; - } - /* will fail later, go on to get the right error */ - } + } + if (create_error && !dentry->d_inode) { + error = create_error; + goto out; } looked_up: path->dentry = dentry;
[PATCH 4.5 035/101] net/mlx4_en: Fix endianness bug in IPV6 csum calculation
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Daniel Jurgens [ Upstream commit 82d69203df634b4dfa765c94f60ce9482bcc44d6 ] Use htons instead of unconditionally byte swapping nexthdr. On a little endian systems shifting the byte is correct behavior, but it results in incorrect csums on big endian architectures. Fixes: f8c6455bb04b ('net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE') Signed-off-by: Daniel Jurgens Reviewed-by: Carol Soto Tested-by: Carol Soto Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_rx.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -704,7 +704,7 @@ static int get_fixed_ipv6_csum(__wsum hw if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS) return -1; - hw_checksum = csum_add(hw_checksum, (__force __wsum)(ipv6h->nexthdr << 8)); + hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr)); csum_pseudo_hdr = csum_partial(>saddr, sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0);
[PATCH 4.5 066/101] ARM: dts: at91: sam9x5: Fix the memory range assigned to the PMC
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Boris Brezillon commit aab0a4c83ceb344d2327194bf354820e50607af6 upstream. The memory range assigned to the PMC (Power Management Controller) was not including the PMC_PCR register which are used to control peripheral clocks. This was working fine thanks to the page granularity of ioremap(), but started to fail when we switched to syscon/regmap, because regmap is making sure that all accesses are falling into the reserved range. Signed-off-by: Boris Brezillon Reported-by: Richard Genoud Tested-by: Richard Genoud Fixes: 863a81c3be1d ("clk: at91: make use of syscon to share PMC registers in several drivers") Signed-off-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/at91sam9x5.dtsi |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -106,7 +106,7 @@ pmc: pmc@fc00 { compatible = "atmel,at91sam9x5-pmc", "syscon"; - reg = <0xfc00 0x100>; + reg = <0xfc00 0x200>; interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; interrupt-controller; #address-cells = <1>;
[PATCH 4.5 065/101] vfs: rename: check backing inode being equal
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Miklos Szeredi commit 9409e22acdfc9153f88d9b1ed2bd2a5b34d2d3ca upstream. If a file is renamed to a hardlink of itself POSIX specifies that rename(2) should do nothing and return success. This condition is checked in vfs_rename(). However it won't detect hard links on overlayfs where these are given separate inodes on the overlayfs layer. Overlayfs itself detects this condition and returns success without doing anything, but then vfs_rename() will proceed as if this was a successful rename (detach_mounts(), d_move()). The correct thing to do is to detect this condition before even calling into overlayfs. This patch does this by calling vfs_select_inode() to get the underlying inodes. Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/namei.c |6 +- 1 file changed, 5 insertions(+), 1 deletion(-) --- a/fs/namei.c +++ b/fs/namei.c @@ -4258,7 +4258,11 @@ int vfs_rename(struct inode *old_dir, st bool new_is_dir = false; unsigned max_links = new_dir->i_sb->s_max_links; - if (source == target) + /* +* Check source == target. +* On overlayfs need to look at underlying inodes. +*/ + if (vfs_select_inode(old_dentry, 0) == vfs_select_inode(new_dentry, 0)) return 0; error = may_delete(old_dir, old_dentry, is_dir);
[PATCH 4.5 070/101] atomic_open(): fix the handling of create_error
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Al Viro commit 10c64cea04d3c75c306b3f990586ffb343b63287 upstream. * if we have a hashed negative dentry and either CREAT|EXCL on r/o filesystem, or CREAT|TRUNC on r/o filesystem, or CREAT|EXCL with failing may_o_create(), we should fail with EROFS or the error may_o_create() has returned, but not ENOENT. Which is what the current code ends up returning. * if we have CREAT|TRUNC hitting a regular file on a read-only filesystem, we can't fail with EROFS here. At the very least, not until we'd done follow_managed() - we might have a writable file (or a device, for that matter) bound on top of that one. Moreover, the code downstream will see that O_TRUNC and attempt to grab the write access (*after* following possible mount), so if we really should fail with EROFS, it will happen. No need to do that inside atomic_open(). The real logics is much simpler than what the current code is trying to do - if we decided to go for simple lookup, ended up with a negative dentry *and* had create_error set, fail with create_error. No matter whether we'd got that negative dentry from lookup_real() or had found it in dcache. Acked-by: Miklos Szeredi Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 20 1 file changed, 4 insertions(+), 16 deletions(-) --- a/fs/namei.c +++ b/fs/namei.c @@ -2968,22 +2968,10 @@ no_open: dentry = lookup_real(dir, dentry, nd->flags); if (IS_ERR(dentry)) return PTR_ERR(dentry); - - if (create_error) { - int open_flag = op->open_flag; - - error = create_error; - if ((open_flag & O_EXCL)) { - if (!dentry->d_inode) - goto out; - } else if (!dentry->d_inode) { - goto out; - } else if ((open_flag & O_TRUNC) && - d_is_reg(dentry)) { - goto out; - } - /* will fail later, go on to get the right error */ - } + } + if (create_error && !dentry->d_inode) { + error = create_error; + goto out; } looked_up: path->dentry = dentry;
[PATCH 4.5 063/101] perf/core: Disable the event on a truncated AUX record
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Alexander Shishkincommit 9f448cd3cbcec8995935e60b27802ae56aac8cc0 upstream. When the PMU driver reports a truncated AUX record, it effectively means that there is no more usable room in the event's AUX buffer (even though there may still be some room, so that perf_aux_output_begin() doesn't take action). At this point the consumer still has to be woken up and the event has to be disabled, otherwise the event will just keep spinning between perf_aux_output_begin() and perf_aux_output_end() until its context gets unscheduled. Again, for cpu-wide events this means never, so once in this condition, they will be forever losing data. Fix this by disabling the event and waking up the consumer in case of a truncated AUX record. Reported-by: Markus Metzger Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vi...@deater.net Link: http://lkml.kernel.org/r/1462886313-13660-3-git-send-email-alexander.shish...@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/ring_buffer.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -347,6 +347,7 @@ void perf_aux_output_end(struct perf_out bool truncated) { struct ring_buffer *rb = handle->rb; + bool wakeup = truncated; unsigned long aux_head; u64 flags = 0; @@ -375,9 +376,16 @@ void perf_aux_output_end(struct perf_out aux_head = rb->user_page->aux_head = local_read(>aux_head); if (aux_head - local_read(>aux_wakeup) >= rb->aux_watermark) { - perf_output_wakeup(handle); + wakeup = true; local_add(rb->aux_watermark, >aux_wakeup); } + + if (wakeup) { + if (truncated) + handle->event->pending_disable = 1; + perf_output_wakeup(handle); + } + handle->event = NULL; local_set(>aux_nest, 0);
[PATCH 4.5 063/101] perf/core: Disable the event on a truncated AUX record
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Alexander Shishkin commit 9f448cd3cbcec8995935e60b27802ae56aac8cc0 upstream. When the PMU driver reports a truncated AUX record, it effectively means that there is no more usable room in the event's AUX buffer (even though there may still be some room, so that perf_aux_output_begin() doesn't take action). At this point the consumer still has to be woken up and the event has to be disabled, otherwise the event will just keep spinning between perf_aux_output_begin() and perf_aux_output_end() until its context gets unscheduled. Again, for cpu-wide events this means never, so once in this condition, they will be forever losing data. Fix this by disabling the event and waking up the consumer in case of a truncated AUX record. Reported-by: Markus Metzger Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vi...@deater.net Link: http://lkml.kernel.org/r/1462886313-13660-3-git-send-email-alexander.shish...@linux.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/ring_buffer.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -347,6 +347,7 @@ void perf_aux_output_end(struct perf_out bool truncated) { struct ring_buffer *rb = handle->rb; + bool wakeup = truncated; unsigned long aux_head; u64 flags = 0; @@ -375,9 +376,16 @@ void perf_aux_output_end(struct perf_out aux_head = rb->user_page->aux_head = local_read(>aux_head); if (aux_head - local_read(>aux_wakeup) >= rb->aux_watermark) { - perf_output_wakeup(handle); + wakeup = true; local_add(rb->aux_watermark, >aux_wakeup); } + + if (wakeup) { + if (truncated) + handle->event->pending_disable = 1; + perf_output_wakeup(handle); + } + handle->event = NULL; local_set(>aux_nest, 0);
[PATCH 4.5 072/101] tools lib traceevent: Do not reassign parg after collapse_tree()
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Steven Rostedtcommit 106b816cb46ebd87408b4ed99a2e16203114daa6 upstream. At the end of process_filter(), collapse_tree() was changed to update the parg parameter, but the reassignment after the call wasn't removed. What happens is that the "current_op" gets modified and freed and parg is assigned to the new allocated argument. But after the call to collapse_tree(), parg is assigned again to the just freed "current_op", and this causes the tool to crash. The current_op variable must also be assigned to NULL in case of error, otherwise it will cause it to be free()ed twice. Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Fixes: 42d6194d133c ("tools lib traceevent: Refactor process_filter()") Link: http://lkml.kernel.org/r/20160511150936.678c1...@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/lib/traceevent/parse-filter.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1164,11 +1164,11 @@ process_filter(struct event_format *even current_op = current_exp; ret = collapse_tree(current_op, parg, error_str); + /* collapse_tree() may free current_op, and updates parg accordingly */ + current_op = NULL; if (ret < 0) goto fail; - *parg = current_op; - free(token); return 0;
[PATCH 4.5 073/101] get_rock_ridge_filename(): handle malformed NM entries
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Al Virocommit 99d825822eade8d827a1817357cbf3f889a552d6 upstream. Payloads of NM entries are not supposed to contain NUL. When we run into such, only the part prior to the first NUL goes into the concatenation (i.e. the directory entry name being encoded by a bunch of NM entries). We do stop when the amount collected so far + the claimed amount in the current NM entry exceed 254. So far, so good, but what we return as the total length is the sum of *claimed* sizes, not the actual amount collected. And that can grow pretty large - not unlimited, since you'd need to put CE entries in between to be able to get more than the maximum that could be contained in one isofs directory entry / continuation chunk and we are stop once we'd encountered 32 CEs, but you can get about 8Kb easily. And that's what will be passed to readdir callback as the name length. 8Kb __copy_to_user() from a buffer allocated by __get_free_page() Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/isofs/rock.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -203,6 +203,8 @@ int get_rock_ridge_filename(struct iso_d int retnamlen = 0; int truncate = 0; int ret = 0; + char *p; + int len; if (!ISOFS_SB(inode->i_sb)->s_rock) return 0; @@ -267,12 +269,17 @@ repeat: rr->u.NM.flags); break; } - if ((strlen(retname) + rr->len - 5) >= 254) { + len = rr->len - 5; + if (retnamlen + len >= 254) { truncate = 1; break; } - strncat(retname, rr->u.NM.name, rr->len - 5); - retnamlen += rr->len - 5; + p = memchr(rr->u.NM.name, '\0', len); + if (unlikely(p)) + len = p - rr->u.NM.name; + memcpy(retname + retnamlen, rr->u.NM.name, len); + retnamlen += len; + retname[retnamlen] = '\0'; break; case SIG('R', 'E'): kfree(rs.buffer);
[PATCH 4.5 072/101] tools lib traceevent: Do not reassign parg after collapse_tree()
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Steven Rostedt commit 106b816cb46ebd87408b4ed99a2e16203114daa6 upstream. At the end of process_filter(), collapse_tree() was changed to update the parg parameter, but the reassignment after the call wasn't removed. What happens is that the "current_op" gets modified and freed and parg is assigned to the new allocated argument. But after the call to collapse_tree(), parg is assigned again to the just freed "current_op", and this causes the tool to crash. The current_op variable must also be assigned to NULL in case of error, otherwise it will cause it to be free()ed twice. Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Fixes: 42d6194d133c ("tools lib traceevent: Refactor process_filter()") Link: http://lkml.kernel.org/r/20160511150936.678c1...@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/lib/traceevent/parse-filter.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1164,11 +1164,11 @@ process_filter(struct event_format *even current_op = current_exp; ret = collapse_tree(current_op, parg, error_str); + /* collapse_tree() may free current_op, and updates parg accordingly */ + current_op = NULL; if (ret < 0) goto fail; - *parg = current_op; - free(token); return 0;
[PATCH 4.5 073/101] get_rock_ridge_filename(): handle malformed NM entries
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Al Viro commit 99d825822eade8d827a1817357cbf3f889a552d6 upstream. Payloads of NM entries are not supposed to contain NUL. When we run into such, only the part prior to the first NUL goes into the concatenation (i.e. the directory entry name being encoded by a bunch of NM entries). We do stop when the amount collected so far + the claimed amount in the current NM entry exceed 254. So far, so good, but what we return as the total length is the sum of *claimed* sizes, not the actual amount collected. And that can grow pretty large - not unlimited, since you'd need to put CE entries in between to be able to get more than the maximum that could be contained in one isofs directory entry / continuation chunk and we are stop once we'd encountered 32 CEs, but you can get about 8Kb easily. And that's what will be passed to readdir callback as the name length. 8Kb __copy_to_user() from a buffer allocated by __get_free_page() Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/isofs/rock.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -203,6 +203,8 @@ int get_rock_ridge_filename(struct iso_d int retnamlen = 0; int truncate = 0; int ret = 0; + char *p; + int len; if (!ISOFS_SB(inode->i_sb)->s_rock) return 0; @@ -267,12 +269,17 @@ repeat: rr->u.NM.flags); break; } - if ((strlen(retname) + rr->len - 5) >= 254) { + len = rr->len - 5; + if (retnamlen + len >= 254) { truncate = 1; break; } - strncat(retname, rr->u.NM.name, rr->len - 5); - retnamlen += rr->len - 5; + p = memchr(rr->u.NM.name, '\0', len); + if (unlikely(p)) + len = p - rr->u.NM.name; + memcpy(retname + retnamlen, rr->u.NM.name, len); + retnamlen += len; + retname[retnamlen] = '\0'; break; case SIG('R', 'E'): kfree(rs.buffer);
[PATCH 4.5 036/101] VSOCK: do not disconnect socket when peer has shutdown SEND only
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Ian Campbell[ Upstream commit dedc58e067d8c379a15a8a183c5db318201295bb ] The peer may be expecting a reply having sent a request and then done a shutdown(SHUT_WR), so tearing down the whole socket at this point seems wrong and breaks for me with a client which does a SHUT_WR. Looking at other socket family's stream_recvmsg callbacks doing a shutdown here does not seem to be the norm and removing it does not seem to have had any adverse effects that I can see. I'm using Stefan's RFC virtio transport patches, I'm unsure of the impact on the vmci transport. Signed-off-by: Ian Campbell Cc: "David S. Miller" Cc: Stefan Hajnoczi Cc: Claudio Imbrenda Cc: Andy King Cc: Dmitry Torokhov Cc: Jorgen Hansen Cc: Adit Ranadive Cc: net...@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/af_vsock.c | 21 + 1 file changed, 1 insertion(+), 20 deletions(-) --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1789,27 +1789,8 @@ vsock_stream_recvmsg(struct socket *sock else if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; - if (copied > 0) { - /* We only do these additional bookkeeping/notification steps -* if we actually copied something out of the queue pair -* instead of just peeking ahead. -*/ - - if (!(flags & MSG_PEEK)) { - /* If the other side has shutdown for sending and there -* is nothing more to read, then modify the socket -* state. -*/ - if (vsk->peer_shutdown & SEND_SHUTDOWN) { - if (vsock_stream_has_data(vsk) <= 0) { - sk->sk_state = SS_UNCONNECTED; - sock_set_flag(sk, SOCK_DONE); - sk->sk_state_change(sk); - } - } - } + if (copied > 0) err = copied; - } out: release_sock(sk);
[PATCH 4.5 036/101] VSOCK: do not disconnect socket when peer has shutdown SEND only
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Ian Campbell [ Upstream commit dedc58e067d8c379a15a8a183c5db318201295bb ] The peer may be expecting a reply having sent a request and then done a shutdown(SHUT_WR), so tearing down the whole socket at this point seems wrong and breaks for me with a client which does a SHUT_WR. Looking at other socket family's stream_recvmsg callbacks doing a shutdown here does not seem to be the norm and removing it does not seem to have had any adverse effects that I can see. I'm using Stefan's RFC virtio transport patches, I'm unsure of the impact on the vmci transport. Signed-off-by: Ian Campbell Cc: "David S. Miller" Cc: Stefan Hajnoczi Cc: Claudio Imbrenda Cc: Andy King Cc: Dmitry Torokhov Cc: Jorgen Hansen Cc: Adit Ranadive Cc: net...@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/af_vsock.c | 21 + 1 file changed, 1 insertion(+), 20 deletions(-) --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1789,27 +1789,8 @@ vsock_stream_recvmsg(struct socket *sock else if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; - if (copied > 0) { - /* We only do these additional bookkeeping/notification steps -* if we actually copied something out of the queue pair -* instead of just peeking ahead. -*/ - - if (!(flags & MSG_PEEK)) { - /* If the other side has shutdown for sending and there -* is nothing more to read, then modify the socket -* state. -*/ - if (vsk->peer_shutdown & SEND_SHUTDOWN) { - if (vsock_stream_has_data(vsk) <= 0) { - sk->sk_state = SS_UNCONNECTED; - sock_set_flag(sk, SOCK_DONE); - sk->sk_state_change(sk); - } - } - } + if (copied > 0) err = copied; - } out: release_sock(sk);
[PATCH 4.5 074/101] Input: max8997-haptic - fix NULL pointer dereference
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Marek Szyprowskicommit 6ae645d5fa385f3787bf1723639cd907fe5865e7 upstream. NULL pointer derefence happens when booting with DTB because the platform data for haptic device is not set in supplied data from parent MFD device. The MFD device creates only platform data (from Device Tree) for itself, not for haptic child. Unable to handle kernel NULL pointer dereference at virtual address 009c pgd = c0004000 [009c] *pgd= Internal error: Oops: 5 [#1] PREEMPT SMP ARM (max8997_haptic_probe) from [] (platform_drv_probe+0x4c/0xb0) (platform_drv_probe) from [] (driver_probe_device+0x214/0x2c0) (driver_probe_device) from [] (__driver_attach+0xac/0xb0) (__driver_attach) from [] (bus_for_each_dev+0x68/0x9c) (bus_for_each_dev) from [] (bus_add_driver+0x1a0/0x218) (bus_add_driver) from [] (driver_register+0x78/0xf8) (driver_register) from [] (do_one_initcall+0x90/0x1d8) (do_one_initcall) from [] (kernel_init_freeable+0x15c/0x1fc) (kernel_init_freeable) from [] (kernel_init+0x8/0x114) (kernel_init) from [] (ret_from_fork+0x14/0x3c) Signed-off-by: Marek Szyprowski Fixes: 104594b01ce7 ("Input: add driver support for MAX8997-haptic") [k.kozlowski: Write commit message, add CC-stable] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/max8997_haptic.c |6 -- 1 file changed, 4 insertions(+), 2 deletions(-) --- a/drivers/input/misc/max8997_haptic.c +++ b/drivers/input/misc/max8997_haptic.c @@ -255,12 +255,14 @@ static int max8997_haptic_probe(struct p struct max8997_dev *iodev = dev_get_drvdata(pdev->dev.parent); const struct max8997_platform_data *pdata = dev_get_platdata(iodev->dev); - const struct max8997_haptic_platform_data *haptic_pdata = - pdata->haptic_pdata; + const struct max8997_haptic_platform_data *haptic_pdata = NULL; struct max8997_haptic *chip; struct input_dev *input_dev; int error; + if (pdata) + haptic_pdata = pdata->haptic_pdata; + if (!haptic_pdata) { dev_err(>dev, "no haptic platform data\n"); return -EINVAL;
[PATCH 4.5 074/101] Input: max8997-haptic - fix NULL pointer dereference
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Marek Szyprowski commit 6ae645d5fa385f3787bf1723639cd907fe5865e7 upstream. NULL pointer derefence happens when booting with DTB because the platform data for haptic device is not set in supplied data from parent MFD device. The MFD device creates only platform data (from Device Tree) for itself, not for haptic child. Unable to handle kernel NULL pointer dereference at virtual address 009c pgd = c0004000 [009c] *pgd= Internal error: Oops: 5 [#1] PREEMPT SMP ARM (max8997_haptic_probe) from [] (platform_drv_probe+0x4c/0xb0) (platform_drv_probe) from [] (driver_probe_device+0x214/0x2c0) (driver_probe_device) from [] (__driver_attach+0xac/0xb0) (__driver_attach) from [] (bus_for_each_dev+0x68/0x9c) (bus_for_each_dev) from [] (bus_add_driver+0x1a0/0x218) (bus_add_driver) from [] (driver_register+0x78/0xf8) (driver_register) from [] (do_one_initcall+0x90/0x1d8) (do_one_initcall) from [] (kernel_init_freeable+0x15c/0x1fc) (kernel_init_freeable) from [] (kernel_init+0x8/0x114) (kernel_init) from [] (ret_from_fork+0x14/0x3c) Signed-off-by: Marek Szyprowski Fixes: 104594b01ce7 ("Input: add driver support for MAX8997-haptic") [k.kozlowski: Write commit message, add CC-stable] Signed-off-by: Krzysztof Kozlowski Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/max8997_haptic.c |6 -- 1 file changed, 4 insertions(+), 2 deletions(-) --- a/drivers/input/misc/max8997_haptic.c +++ b/drivers/input/misc/max8997_haptic.c @@ -255,12 +255,14 @@ static int max8997_haptic_probe(struct p struct max8997_dev *iodev = dev_get_drvdata(pdev->dev.parent); const struct max8997_platform_data *pdata = dev_get_platdata(iodev->dev); - const struct max8997_haptic_platform_data *haptic_pdata = - pdata->haptic_pdata; + const struct max8997_haptic_platform_data *haptic_pdata = NULL; struct max8997_haptic *chip; struct input_dev *input_dev; int error; + if (pdata) + haptic_pdata = pdata->haptic_pdata; + if (!haptic_pdata) { dev_err(>dev, "no haptic platform data\n"); return -EINVAL;
[PATCH 4.5 037/101] net: bridge: fix old ioctl unlocked net device walk
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Nikolay Aleksandrov[ Upstream commit 31ca0458a61a502adb7ed192bf9716c6d05791a5 ] get_bridge_ifindices() is used from the old "deviceless" bridge ioctl calls which aren't called with rtnl held. The comment above says that it is called with rtnl but that is not really the case. Here's a sample output from a test ASSERT_RTNL() which I put in get_bridge_ifindices and executed "brctl show": [ 957.422726] RTNL: assertion failed at net/bridge//br_ioctl.c (30) [ 957.422925] CPU: 0 PID: 1862 Comm: brctl Tainted: GW O 4.6.0-rc4+ #157 [ 957.423009] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 [ 957.423009] 880058adfdf0 8138dec5 0400 [ 957.423009] 81ce8380 880058adfe58 a05ead32 0001 [ 957.423009] 7ffec1a444b0 0400 880053c19130 8940 [ 957.423009] Call Trace: [ 957.423009] [] dump_stack+0x85/0xc0 [ 957.423009] [] br_ioctl_deviceless_stub+0x212/0x2e0 [bridge] [ 957.423009] [] sock_ioctl+0x22b/0x290 [ 957.423009] [] do_vfs_ioctl+0x95/0x700 [ 957.423009] [] SyS_ioctl+0x79/0x90 [ 957.423009] [] entry_SYSCALL_64_fastpath+0x23/0xc1 Since it only reads bridge ifindices, we can use rcu to safely walk the net device list. Also remove the wrong rtnl comment above. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_ioctl.c |5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -21,18 +21,19 @@ #include #include "br_private.h" -/* called with RTNL */ static int get_bridge_ifindices(struct net *net, int *indices, int num) { struct net_device *dev; int i = 0; - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) indices[i++] = dev->ifindex; } + rcu_read_unlock(); return i; }
[PATCH 4.5 037/101] net: bridge: fix old ioctl unlocked net device walk
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Nikolay Aleksandrov [ Upstream commit 31ca0458a61a502adb7ed192bf9716c6d05791a5 ] get_bridge_ifindices() is used from the old "deviceless" bridge ioctl calls which aren't called with rtnl held. The comment above says that it is called with rtnl but that is not really the case. Here's a sample output from a test ASSERT_RTNL() which I put in get_bridge_ifindices and executed "brctl show": [ 957.422726] RTNL: assertion failed at net/bridge//br_ioctl.c (30) [ 957.422925] CPU: 0 PID: 1862 Comm: brctl Tainted: GW O 4.6.0-rc4+ #157 [ 957.423009] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 [ 957.423009] 880058adfdf0 8138dec5 0400 [ 957.423009] 81ce8380 880058adfe58 a05ead32 0001 [ 957.423009] 7ffec1a444b0 0400 880053c19130 8940 [ 957.423009] Call Trace: [ 957.423009] [] dump_stack+0x85/0xc0 [ 957.423009] [] br_ioctl_deviceless_stub+0x212/0x2e0 [bridge] [ 957.423009] [] sock_ioctl+0x22b/0x290 [ 957.423009] [] do_vfs_ioctl+0x95/0x700 [ 957.423009] [] SyS_ioctl+0x79/0x90 [ 957.423009] [] entry_SYSCALL_64_fastpath+0x23/0xc1 Since it only reads bridge ifindices, we can use rcu to safely walk the net device list. Also remove the wrong rtnl comment above. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_ioctl.c |5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -21,18 +21,19 @@ #include #include "br_private.h" -/* called with RTNL */ static int get_bridge_ifindices(struct net *net, int *indices, int num) { struct net_device *dev; int i = 0; - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) indices[i++] = dev->ifindex; } + rcu_read_unlock(); return i; }
[PATCH 4.5 086/101] btrfs: remove error message from search ioctl for nonexistent tree
4.5-stable review patch. If anyone has any objections, please let me know. -- From: David Sterbacommit 11ea474f74709fc764fb7e80306e0776f94ce8b8 upstream. Let's remove the error message that appears when the tree_id is not present. This can happen with the quota tree and has been observed in practice. The applications are supposed to handle -ENOENT and we don't need to report that in the system log as it's not a fatal error. Reported-by: Vlastimil Babka Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c |2 -- 1 file changed, 2 deletions(-) --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2097,8 +2097,6 @@ static noinline int search_ioctl(struct key.offset = (u64)-1; root = btrfs_read_fs_root_no_name(info, ); if (IS_ERR(root)) { - btrfs_err(info, "could not find root %llu", - sk->tree_id); btrfs_free_path(path); return -ENOENT; }
[PATCH 4.5 031/101] ipv6/ila: fix nlsize calculation for lwtunnel
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Nicolas Dichtel[ Upstream commit 79e8dc8b80bff0bc5bbb90ca5e73044bf207c8ac ] The handler 'ila_fill_encap_info' adds one attribute: ILA_ATTR_LOCATOR. Fixes: 65d7ab8de582 ("net: Identifier Locator Addressing module") CC: Tom Herbert Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ila/ila_lwt.c |3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -120,8 +120,7 @@ nla_put_failure: static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) { - /* No encapsulation overhead */ - return 0; + return nla_total_size(sizeof(u64)); /* ILA_ATTR_LOCATOR */ } static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
[PATCH 4.5 077/101] drm/radeon: fix PLL sharing on DCE6.1 (v2)
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Lucas Stachcommit e3c00d87845ab375f90fa6e10a5e72a3a5778cd3 upstream. On DCE6.1 PPLL2 is exclusively available to UNIPHYA, so it should not be taken into consideration when looking for an already enabled PLL to be shared with other outputs. This fixes the broken VGA port (TRAVIS DP->VGA bridge) on my Richland based laptop, where the internal display is connected to UNIPHYA through a TRAVIS DP->LVDS bridge. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=78987 v2: agd: add check in radeon_get_shared_nondp_ppll as well, drop extra parameter. Signed-off-by: Lucas Stach Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_crtc.c | 10 ++ 1 file changed, 10 insertions(+) --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1740,6 +1740,7 @@ static u32 radeon_get_pll_use_mask(struc static int radeon_get_shared_dp_ppll(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; + struct radeon_device *rdev = dev->dev_private; struct drm_crtc *test_crtc; struct radeon_crtc *test_radeon_crtc; @@ -1749,6 +1750,10 @@ static int radeon_get_shared_dp_ppll(str test_radeon_crtc = to_radeon_crtc(test_crtc); if (test_radeon_crtc->encoder && ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_radeon_crtc->encoder))) { + /* PPLL2 is exclusive to UNIPHYA on DCE61 */ + if (ASIC_IS_DCE61(rdev) && !ASIC_IS_DCE8(rdev) && + test_radeon_crtc->pll_id == ATOM_PPLL2) + continue; /* for DP use the same PLL for all */ if (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID) return test_radeon_crtc->pll_id; @@ -1770,6 +1775,7 @@ static int radeon_get_shared_nondp_ppll( { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; + struct radeon_device *rdev = dev->dev_private; struct drm_crtc *test_crtc; struct radeon_crtc *test_radeon_crtc; u32 adjusted_clock, test_adjusted_clock; @@ -1785,6 +1791,10 @@ static int radeon_get_shared_nondp_ppll( test_radeon_crtc = to_radeon_crtc(test_crtc); if (test_radeon_crtc->encoder && !ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_radeon_crtc->encoder))) { + /* PPLL2 is exclusive to UNIPHYA on DCE61 */ + if (ASIC_IS_DCE61(rdev) && !ASIC_IS_DCE8(rdev) && + test_radeon_crtc->pll_id == ATOM_PPLL2) + continue; /* check if we are already driving this connector with another crtc */ if (test_radeon_crtc->connector == radeon_crtc->connector) { /* if we are, return that pll */
[PATCH 4.5 040/101] net: fix a kernel infoleak in x25 module
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Kangjie Lu[ Upstream commit 79e48650320e6fba48369fccf13fd045315b19b8 ] Stack object "dte_facilities" is allocated in x25_rx_call_request(), which is supposed to be initialized in x25_negotiate_facilities. However, 5 fields (8 bytes in total) are not initialized. This object is then copied to userland via copy_to_user, thus infoleak occurs. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/x25/x25_facilities.c |1 + 1 file changed, 1 insertion(+) --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_b memset(, 0, sizeof(theirs)); memcpy(new, ours, sizeof(*new)); + memset(dte, 0, sizeof(*dte)); len = x25_parse_facilities(skb, , dte, >vc_facil_mask); if (len < 0)
[PATCH 4.5 086/101] btrfs: remove error message from search ioctl for nonexistent tree
4.5-stable review patch. If anyone has any objections, please let me know. -- From: David Sterba commit 11ea474f74709fc764fb7e80306e0776f94ce8b8 upstream. Let's remove the error message that appears when the tree_id is not present. This can happen with the quota tree and has been observed in practice. The applications are supposed to handle -ENOENT and we don't need to report that in the system log as it's not a fatal error. Reported-by: Vlastimil Babka Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c |2 -- 1 file changed, 2 deletions(-) --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2097,8 +2097,6 @@ static noinline int search_ioctl(struct key.offset = (u64)-1; root = btrfs_read_fs_root_no_name(info, ); if (IS_ERR(root)) { - btrfs_err(info, "could not find root %llu", - sk->tree_id); btrfs_free_path(path); return -ENOENT; }
[PATCH 4.5 031/101] ipv6/ila: fix nlsize calculation for lwtunnel
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Nicolas Dichtel [ Upstream commit 79e8dc8b80bff0bc5bbb90ca5e73044bf207c8ac ] The handler 'ila_fill_encap_info' adds one attribute: ILA_ATTR_LOCATOR. Fixes: 65d7ab8de582 ("net: Identifier Locator Addressing module") CC: Tom Herbert Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ila/ila_lwt.c |3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -120,8 +120,7 @@ nla_put_failure: static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) { - /* No encapsulation overhead */ - return 0; + return nla_total_size(sizeof(u64)); /* ILA_ATTR_LOCATOR */ } static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
[PATCH 4.5 077/101] drm/radeon: fix PLL sharing on DCE6.1 (v2)
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Lucas Stach commit e3c00d87845ab375f90fa6e10a5e72a3a5778cd3 upstream. On DCE6.1 PPLL2 is exclusively available to UNIPHYA, so it should not be taken into consideration when looking for an already enabled PLL to be shared with other outputs. This fixes the broken VGA port (TRAVIS DP->VGA bridge) on my Richland based laptop, where the internal display is connected to UNIPHYA through a TRAVIS DP->LVDS bridge. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=78987 v2: agd: add check in radeon_get_shared_nondp_ppll as well, drop extra parameter. Signed-off-by: Lucas Stach Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_crtc.c | 10 ++ 1 file changed, 10 insertions(+) --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1740,6 +1740,7 @@ static u32 radeon_get_pll_use_mask(struc static int radeon_get_shared_dp_ppll(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; + struct radeon_device *rdev = dev->dev_private; struct drm_crtc *test_crtc; struct radeon_crtc *test_radeon_crtc; @@ -1749,6 +1750,10 @@ static int radeon_get_shared_dp_ppll(str test_radeon_crtc = to_radeon_crtc(test_crtc); if (test_radeon_crtc->encoder && ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_radeon_crtc->encoder))) { + /* PPLL2 is exclusive to UNIPHYA on DCE61 */ + if (ASIC_IS_DCE61(rdev) && !ASIC_IS_DCE8(rdev) && + test_radeon_crtc->pll_id == ATOM_PPLL2) + continue; /* for DP use the same PLL for all */ if (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID) return test_radeon_crtc->pll_id; @@ -1770,6 +1775,7 @@ static int radeon_get_shared_nondp_ppll( { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; + struct radeon_device *rdev = dev->dev_private; struct drm_crtc *test_crtc; struct radeon_crtc *test_radeon_crtc; u32 adjusted_clock, test_adjusted_clock; @@ -1785,6 +1791,10 @@ static int radeon_get_shared_nondp_ppll( test_radeon_crtc = to_radeon_crtc(test_crtc); if (test_radeon_crtc->encoder && !ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_radeon_crtc->encoder))) { + /* PPLL2 is exclusive to UNIPHYA on DCE61 */ + if (ASIC_IS_DCE61(rdev) && !ASIC_IS_DCE8(rdev) && + test_radeon_crtc->pll_id == ATOM_PPLL2) + continue; /* check if we are already driving this connector with another crtc */ if (test_radeon_crtc->connector == radeon_crtc->connector) { /* if we are, return that pll */
[PATCH 4.5 040/101] net: fix a kernel infoleak in x25 module
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Kangjie Lu [ Upstream commit 79e48650320e6fba48369fccf13fd045315b19b8 ] Stack object "dte_facilities" is allocated in x25_rx_call_request(), which is supposed to be initialized in x25_negotiate_facilities. However, 5 fields (8 bytes in total) are not initialized. This object is then copied to userland via copy_to_user, thus infoleak occurs. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/x25/x25_facilities.c |1 + 1 file changed, 1 insertion(+) --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_b memset(, 0, sizeof(theirs)); memcpy(new, ours, sizeof(*new)); + memset(dte, 0, sizeof(*dte)); len = x25_parse_facilities(skb, , dte, >vc_facil_mask); if (len < 0)
[PATCH 4.5 087/101] btrfs: change max_inline default to 2048
4.5-stable review patch. If anyone has any objections, please let me know. -- From: David Sterbacommit f7e98a7fff8634ae655c666dc2c9fc55a48d0a73 upstream. The current practical default is ~4k on x86_64 (the logic is more complex, simplified for brevity), the inlined files land in the metadata group and thus consume space that could be needed for the real metadata. The inlining brings some usability surprises: 1) total space consumption measured on various filesystems and btrfs with DUP metadata was quite visible because of the duplicated data within metadata 2) inlined data may exhaust the metadata, which are more precious in case the entire device space is allocated to chunks (ie. balance cannot make the space more compact) 3) performance suffers a bit as the inlined blocks are duplicate and stored far away on the device. Proposed fix: set the default to 2048 This fixes namely 1), the total filesysystem space consumption will be on par with other filesystems. Partially fixes 2), more data are pushed to the data block groups. The characteristics of 3) are based on actual small file size distribution. The change is independent of the metadata blockgroup type (though it's most visible with DUP) or system page size as these parameters are not trival to find out, compared to file size. Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.h |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2252,7 +2252,7 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_FREE_SPACE_TREE(1 << 26) #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) -#define BTRFS_DEFAULT_MAX_INLINE (8192) +#define BTRFS_DEFAULT_MAX_INLINE (2048) #define btrfs_clear_opt(o, opt)((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
[PATCH 4.5 087/101] btrfs: change max_inline default to 2048
4.5-stable review patch. If anyone has any objections, please let me know. -- From: David Sterba commit f7e98a7fff8634ae655c666dc2c9fc55a48d0a73 upstream. The current practical default is ~4k on x86_64 (the logic is more complex, simplified for brevity), the inlined files land in the metadata group and thus consume space that could be needed for the real metadata. The inlining brings some usability surprises: 1) total space consumption measured on various filesystems and btrfs with DUP metadata was quite visible because of the duplicated data within metadata 2) inlined data may exhaust the metadata, which are more precious in case the entire device space is allocated to chunks (ie. balance cannot make the space more compact) 3) performance suffers a bit as the inlined blocks are duplicate and stored far away on the device. Proposed fix: set the default to 2048 This fixes namely 1), the total filesysystem space consumption will be on par with other filesystems. Partially fixes 2), more data are pushed to the data block groups. The characteristics of 3) are based on actual small file size distribution. The change is independent of the metadata blockgroup type (though it's most visible with DUP) or system page size as these parameters are not trival to find out, compared to file size. Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.h |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2252,7 +2252,7 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_FREE_SPACE_TREE(1 << 26) #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) -#define BTRFS_DEFAULT_MAX_INLINE (8192) +#define BTRFS_DEFAULT_MAX_INLINE (2048) #define btrfs_clear_opt(o, opt)((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
[PATCH 4.5 088/101] Btrfs: fix unreplayable log after snapshot delete + parent dir fsync
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Filipe Mananacommit 1ec9a1ae1e30c733077c0b288c4301b66b7a81f2 upstream. If we delete a snapshot, fsync its parent directory and crash/power fail before the next transaction commit, on the next mount when we attempt to replay the log tree of the root containing the parent directory we will fail and prevent the filesystem from mounting, which is solvable by wiping out the log trees with the btrfs-zero-log tool but very inconvenient as we will lose any data and metadata fsynced before the parent directory was fsynced. For example: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ mkdir /mnt/testdir $ btrfs subvolume snapshot /mnt /mnt/testdir/snap $ btrfs subvolume delete /mnt/testdir/snap $ xfs_io -c "fsync" /mnt/testdir < crash / power failure and reboot > $ mount /dev/sdc /mnt mount: mount(2) failed: No such file or directory And in dmesg/syslog we get the following message and trace: [192066.361162] BTRFS info (device dm-0): failed to delete reference to snap, inode 257 parent 257 [192066.363010] [ cut here ] [192066.365268] WARNING: CPU: 4 PID: 5130 at fs/btrfs/inode.c:3986 __btrfs_unlink_inode+0x17a/0x354 [btrfs]() [192066.367250] BTRFS: Transaction aborted (error -2) [192066.368401] Modules linked in: btrfs dm_flakey dm_mod ppdev sha256_generic xor raid6_pq hmac drbg ansi_cprng aesni_intel acpi_cpufreq tpm_tis aes_x86_64 tpm ablk_helper evdev cryptd sg parport_pc i2c_piix4 psmouse lrw parport i2c_core pcspkr gf128mul processor serio_raw glue_helper button loop autofs4 ext4 crc16 mbcache jbd2 sd_mod sr_mod cdrom ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring crc32c_intel scsi_mod e1000 virtio floppy [last unloaded: btrfs] [192066.377154] CPU: 4 PID: 5130 Comm: mount Tainted: GW 4.4.0-rc6-btrfs-next-20+ #1 [192066.378875] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014 [192066.380889] 880143923670 81257570 8801439236b8 [192066.382561] 8801439236a8 8104ec07 a039dc2c fffe [192066.384191] 8801ed31d000 8801b9fc9c88 8801086875e0 880143923710 [192066.385827] Call Trace: [192066.386373] [] dump_stack+0x4e/0x79 [192066.387387] [] warn_slowpath_common+0x99/0xb2 [192066.388429] [] ? __btrfs_unlink_inode+0x17a/0x354 [btrfs] [192066.389236] [] warn_slowpath_fmt+0x48/0x50 [192066.389884] [] __btrfs_unlink_inode+0x17a/0x354 [btrfs] [192066.390621] [] ? iput+0xb0/0x266 [192066.391200] [] btrfs_unlink_inode+0x1c/0x3d [btrfs] [192066.391930] [] check_item_in_log+0x1fe/0x29b [btrfs] [192066.392715] [] replay_dir_deletes+0x167/0x1cf [btrfs] [192066.393510] [] replay_one_buffer+0x417/0x570 [btrfs] [192066.394241] [] walk_up_log_tree+0x10e/0x1dc [btrfs] [192066.394958] [] walk_log_tree+0xa5/0x190 [btrfs] [192066.395628] [] btrfs_recover_log_trees+0x239/0x32c [btrfs] [192066.396790] [] ? replay_one_extent+0x50a/0x50a [btrfs] [192066.397891] [] open_ctree+0x1d8b/0x2167 [btrfs] [192066.398897] [] btrfs_mount+0x5ef/0x729 [btrfs] [192066.399823] [] ? trace_hardirqs_on+0xd/0xf [192066.400739] [] ? lockdep_init_map+0xb9/0x1b3 [192066.401700] [] mount_fs+0x67/0x131 [192066.402482] [] vfs_kern_mount+0x6c/0xde [192066.403930] [] btrfs_mount+0x1cb/0x729 [btrfs] [192066.404831] [] ? trace_hardirqs_on+0xd/0xf [192066.405726] [] ? lockdep_init_map+0xb9/0x1b3 [192066.406621] [] mount_fs+0x67/0x131 [192066.407401] [] vfs_kern_mount+0x6c/0xde [192066.408247] [] do_mount+0x893/0x9d2 [192066.409047] [] ? strndup_user+0x3f/0x8c [192066.409842] [] SyS_mount+0x75/0xa1 [192066.410621] [] entry_SYSCALL_64_fastpath+0x12/0x6b [192066.411572] ---[ end trace 2de42126c1e0a0f0 ]--- [192066.412344] BTRFS: error (device dm-0) in __btrfs_unlink_inode:3986: errno=-2 No such entry [192066.413748] BTRFS: error (device dm-0) in btrfs_replay_log:2464: errno=-2 No such entry (Failed to recover log tree) [192066.415458] BTRFS error (device dm-0): cleaner transaction attach returned -30 [192066.444613] BTRFS: open_ctree failed This happens because when we are replaying the log and processing the directory entry pointing to the snapshot in the subvolume tree, we treat its btrfs_dir_item item as having a location with a key type matching BTRFS_INODE_ITEM_KEY, which is wrong because the type matches BTRFS_ROOT_ITEM_KEY and therefore must be processed differently, as the object id refers to a root number and not to an inode in the root containing the parent directory. So fix this by triggering a transaction commit if an fsync against the parent directory is requested after deleting a snapshot. This is the simplest approach for a rare use case. Some alternative that avoids the transaction commit would require more code to explicitly delete the snapshot at log replay time (factoring out common code
RE: [v5,1/7] QE: Add IC, SI and SIRAM document to device tree bindings.
On Tue, May 17, 2016 at 07:22AM, Scott Wood wrote: > -Original Message- > From: Scott Wood [mailto:o...@buserror.net] > Sent: Tuesday, May 17, 2016 7:22 AM > To: Qiang Zhao> Cc: robh...@kernel.org; devicet...@vger.kernel.org; linux- > ker...@vger.kernel.org; Xiaobo Xie ; Yang-Leo Li > ; linuxppc-...@lists.ozlabs.org > Subject: Re: [v5,1/7] QE: Add IC, SI and SIRAM document to device tree > bindings. > > On Wed, Mar 09, 2016 at 09:21:28AM +0800, Zhao Qiang wrote: > > Add IC, SI and SIRAM document of QE to > > Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt > > > > Signed-off-by: Zhao Qiang > > Acked-by: Rob Herring > > --- > > changes for v2 > > - Add interrupt-controller in Required properties > > - delete address-cells and size-cells for qe-si and qe-siram Changes > > for v3 > > - Add SoC specific caompatible strings to qe-si and qe-siram Changes > > for v4 > > - NA > > Changes for v5 > > - NA > > > > .../devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt | 50 > > ++ > > 1 file changed, 50 insertions(+) > > +* Serial Interface Block (SI) > > + > > +The SI manages the routing of eight TDM lines to the QE block serial > > +drivers , the MCC and the UCCs, for receive and transmit. > > + > > +Required properties: > > +- compatible : should be "fsl,t1040-qe-si". > > +- reg : Address range of SI register set. > > Is t1040 the only chip that has or will ever have this? There also be t1024 and ls1043 supporting si. I thought to add them when adding their device node. If you think it is better to add them now, I will modify. -Zhao Qiang
[PATCH 4.5 089/101] Btrfs: fix file loss on log replay after renaming a file and fsync
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Filipe Mananacommit 2be63d5ce929603d4e7cedabd9e992eb34a0ff95 upstream. We have two cases where we end up deleting a file at log replay time when we should not. For this to happen the file must have been renamed and a directory inode must have been fsynced/logged. Two examples that exercise these two cases are listed below. Case 1) $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ mkdir -p /mnt/a/b $ mkdir /mnt/c $ touch /mnt/a/b/foo $ sync $ mv /mnt/a/b/foo /mnt/c/ # Create file bar just to make sure the fsync on directory a/ does # something and it's not a no-op. $ touch /mnt/a/bar $ xfs_io -c "fsync" /mnt/a < power fail / crash > The next time the filesystem is mounted, the log replay procedure deletes file foo. Case 2) $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ mkdir /mnt/a $ mkdir /mnt/b $ mkdir /mnt/c $ touch /mnt/a/foo $ ln /mnt/a/foo /mnt/b/foo_link $ touch /mnt/b/bar $ sync $ unlink /mnt/b/foo_link $ mv /mnt/b/bar /mnt/c/ $ xfs_io -c "fsync" /mnt/a/foo < power fail / crash > The next time the filesystem is mounted, the log replay procedure deletes file bar. The reason why the files are deleted is because when we log inodes other then the fsync target inode, we ignore their last_unlink_trans value and leave the log without enough information to later replay the rename operations. So we need to look at the last_unlink_trans values and fallback to a transaction commit if they are greater than the id of the last committed transaction. So fix this by looking at the last_unlink_trans values and fallback to transaction commits when needed. Also, when logging other inodes (for case 1 we logged descendants of the fsync target inode while for case 2 we logged ascendants) we need to care about concurrent tasks updating the last_unlink_trans of inodes we are logging (which was already an existing problem in check_parent_dirs_for_sync()). Since we can not acquire their inode mutex (vfs' struct inode ->i_mutex), as that causes deadlocks with other concurrent operations that acquire the i_mutex of 2 inodes (other fsyncs or renames for example), we need to serialize on the log_mutex of the inode we are logging. A task setting a new value for an inode's last_unlink_trans must acquire the inode's log_mutex and it must do this update before doing the actual unlink operation (which is already the case except when deleting a snapshot). Conversely the task logging the inode must first log the inode and then check the inode's last_unlink_trans value while holding its log_mutex, as if its value is not greater then the id of the last committed transaction it means it logged a safe state of the inode's items, while if its value is not smaller then the id of the last committed transaction it means the inode state it has logged might not be safe (the concurrent task might have just updated last_unlink_trans but hasn't done yet the unlink operation) and therefore a transaction commit must be done. Test cases for xfstests follow in separate patches. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c|4 +-- fs/btrfs/tree-log.c | 67 2 files changed, 59 insertions(+), 12 deletions(-) --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2475,6 +2475,8 @@ static noinline int btrfs_ioctl_snap_des trans->block_rsv = _rsv; trans->bytes_reserved = block_rsv.size; + btrfs_record_snapshot_destroy(trans, dir); + ret = btrfs_unlink_subvol(trans, root, dir, dest->root_key.objectid, dentry->d_name.name, @@ -2526,8 +2528,6 @@ static noinline int btrfs_ioctl_snap_des out_end_trans: trans->block_rsv = NULL; trans->bytes_reserved = 0; - if (!err) - btrfs_record_snapshot_destroy(trans, dir); ret = btrfs_end_transaction(trans, root); if (ret && !err) err = ret; --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4909,6 +4909,42 @@ out_unlock: } /* + * Check if we must fallback to a transaction commit when logging an inode. + * This must be called after logging the inode and is used only in the context + * when fsyncing an inode requires the need to log some other inode - in which + * case we can't lock the i_mutex of each other inode we need to log as that + * can lead to deadlocks with concurrent fsync against other inodes (as we can + * log inodes up or down in the hierarchy) or rename operations for example. So + * we take the log_mutex of the inode after we have logged it and then check for + * its last_unlink_trans value - this is safe because any task setting + * last_unlink_trans must
[PATCH 4.5 058/101] spi: spi-ti-qspi: Fix FLEN and WLEN settings if bits_per_word is overridden
4.5-stable review patch. If anyone has any objections, please let me know. -- From: Ben Hutchingscommit ea1b60fb085839a9544cb3a0069992991beabb7f upstream. Each transfer can specify 8, 16 or 32 bits per word independently of the default for the device being addressed. However, currently we calculate the number of words in the frame assuming that the word size is the device default. If multiple transfers in the same message have differing bits_per_word, we bitwise-or the different values in the WLEN register field. Fix both of these. Also rename 'frame_length' to 'frame_len_words' to make clear that it's not a byte count like spi_message::frame_length. Signed-off-by: Ben Hutchings Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-ti-qspi.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) --- a/drivers/spi/spi-ti-qspi.c +++ b/drivers/spi/spi-ti-qspi.c @@ -94,6 +94,7 @@ struct ti_qspi { #define QSPI_FLEN(n) ((n - 1) << 0) #define QSPI_WLEN_MAX_BITS 128 #define QSPI_WLEN_MAX_BYTES16 +#define QSPI_WLEN_MASK QSPI_WLEN(QSPI_WLEN_MAX_BITS) /* STATUS REGISTER */ #define BUSY 0x01 @@ -373,7 +374,7 @@ static int ti_qspi_start_transfer_one(st struct spi_device *spi = m->spi; struct spi_transfer *t; int status = 0, ret; - int frame_length; + unsigned int frame_len_words; /* setup device control reg */ qspi->dc = 0; @@ -385,21 +386,23 @@ static int ti_qspi_start_transfer_one(st if (spi->mode & SPI_CS_HIGH) qspi->dc |= QSPI_CSPOL(spi->chip_select); - frame_length = (m->frame_length << 3) / spi->bits_per_word; - - frame_length = clamp(frame_length, 0, QSPI_FRAME); + frame_len_words = 0; + list_for_each_entry(t, >transfers, transfer_list) + frame_len_words += t->len / (t->bits_per_word >> 3); + frame_len_words = min_t(unsigned int, frame_len_words, QSPI_FRAME); /* setup command reg */ qspi->cmd = 0; qspi->cmd |= QSPI_EN_CS(spi->chip_select); - qspi->cmd |= QSPI_FLEN(frame_length); + qspi->cmd |= QSPI_FLEN(frame_len_words); ti_qspi_write(qspi, qspi->dc, QSPI_SPI_DC_REG); mutex_lock(>list_lock); list_for_each_entry(t, >transfers, transfer_list) { - qspi->cmd |= QSPI_WLEN(t->bits_per_word); + qspi->cmd = ((qspi->cmd & ~QSPI_WLEN_MASK) | +QSPI_WLEN(t->bits_per_word)); ret = qspi_transfer_msg(qspi, t); if (ret) {