Re: [PATCH] selftest: fix kselftest-merge depend on 'RUNTIME_TESTING_MENU'

2018-02-22 Thread Zong Li
2018-02-23 3:57 GMT+08:00 Anders Roxell :
> On 22 February 2018 at 12:53, Zong Li  wrote:
>> Since the 'commit d3deafaa8b5c ("lib/: make RUNTIME_TESTS a menuconfig
>> to ease disabling it all")', the make kselftest-merge cannot merge the
>> config dependencies of kselftest to the existing .config file.
>>
>> These config dependencies of kselftest need to enable the
>> 'CONFIG_RUNTIME_TESTING_MENU=y' at the same time.
>
> Is this patch needed when patch sha 'f29c79906064 ("lib/Kconfig.debug: enable
> RUNTIME_TESTING_MENU")' find its way into the kernel ?
> I think it's in linux-next now.

Thanks. The patch sha 'f29c79906064 ("lib/Kconfig.debug: enable
RUNTIME_TESTING_MENU")' can resolve the make kselftest-merge
on default situation, but I think the kselftest-merge should resolve
the dependencies
without the prerequisite, it should enable the 'RUNTIME_TESTING_MENU'
when merging
the config of some selftests if RUNTIME_TESTING_MENU is not set.

Best Regards,
Zong Li


[PATCH bpf-next] bpf: NULL pointer check is not needed in BPF_CGROUP_RUN_PROG_INET_SOCK

2018-02-22 Thread Yafang Shao
sk is already allocated in inet_create/inet6_create, hence when
BPF_CGROUP_RUN_PROG_INET_SOCK is executed sk will never be NULL.

The logic is as bellow,
sk = sk_alloc();
if (!sk)
goto out;
BPF_CGROUP_RUN_PROG_INET_SOCK(sk);

Signed-off-by: Yafang Shao 
---
 include/linux/bpf-cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a7f16e0..8a45666 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -96,7 +96,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 
major, u32 minor,
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
 ({\
int __ret = 0; \
-   if (cgroup_bpf_enabled && sk) {\
+   if (cgroup_bpf_enabled) {  \
__ret = __cgroup_bpf_run_filter_sk(sk, \
 BPF_CGROUP_INET_SOCK_CREATE); \
}  \
--
1.8.3.1



[PATCH net-next] net/ncsi: Add generic netlink family

2018-02-22 Thread Samuel Mendoza-Jonas
Add a generic netlink family for NCSI. This supports two commands;
NCSI_CMD_PKG_INFO which returns information on packages and their
associated channels, and NCSI_CMD_SET_INTERFACE which allows a specific
package or package/channel combination to be set as the preferred
choice.

Signed-off-by: Samuel Mendoza-Jonas 
---
 include/uapi/linux/ncsi.h | 113 +
 net/ncsi/Makefile |   2 +-
 net/ncsi/internal.h   |   3 +
 net/ncsi/ncsi-manage.c|  30 +++-
 net/ncsi/ncsi-netlink.c   | 394 ++
 net/ncsi/ncsi-netlink.h   |  20 +++
 6 files changed, 557 insertions(+), 5 deletions(-)
 create mode 100644 include/uapi/linux/ncsi.h
 create mode 100644 net/ncsi/ncsi-netlink.c
 create mode 100644 net/ncsi/ncsi-netlink.h

diff --git a/include/uapi/linux/ncsi.h b/include/uapi/linux/ncsi.h
new file mode 100644
index ..aecab3fb92df
--- /dev/null
+++ b/include/uapi/linux/ncsi.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __UAPI_NCSI_NETLINK_H__
+#define __UAPI_NCSI_NETLINK_H__
+
+/**
+ * enum ncsi_nl_commands - supported NCSI commands
+ *
+ * @NCSI_CMD_UNSPEC: unspecified command to catch errors
+ * @NCSI_CMD_SET_INTERFACE: set preferred package and channel combination.
+ * Requires NCSI_ATTR_IFINDEX and the preferred NCSI_ATTR_PACKAGE_ID and
+ * optionally the preferred NCSI_ATTR_CHANNEL_ID. If neither IDs are
+ * specified the setting is cleared.
+ * @NCSI_CMD_PKG_INFO: list package and channel attributes. Requires
+ * NCSI_ATTR_IFINDEX. If NCSI_ATTR_PACKAGE_ID is specified returns the
+ * specific package and its channels - otherwise a dump request returns
+ * all packages and their associated channels.
+ * @NCSI_CMD_MAX: highest command number
+ */
+enum ncsi_nl_commands {
+   NCSI_CMD_UNSPEC,
+   NCSI_CMD_SET_INTERFACE,
+   NCSI_CMD_PKG_INFO,
+
+   __NCSI_CMD_AFTER_LAST,
+   NCSI_CMD_MAX = __NCSI_CMD_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_attrs - General NCSI netlink attributes
+ *
+ * @NCSI_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_ATTR_IFINDEX: ifindex of network device using NCSI
+ * @NCSI_ATTR_PACKAGE_LIST: nested array of NCSI_PKG_ATTR attributes
+ * @NCSI_ATTR_PACKAGE_ID: package ID
+ * @NCSI_ATTR_CHANNEL_ID: channel ID
+ * @NCSI_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_attrs {
+   NCSI_ATTR_UNSPEC,
+   NCSI_ATTR_IFINDEX,
+   NCSI_ATTR_PACKAGE_LIST,
+   NCSI_ATTR_PACKAGE_ID,
+   NCSI_ATTR_CHANNEL_ID,
+
+   __NCSI_ATTR_AFTER_LAST,
+   NCSI_ATTR_MAX = __NCSI_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_pkg_attrs - NCSI netlink package-specific attributes
+ *
+ * @NCSI_PKG_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_PKG_ATTR: nested array of package attributes
+ * @NCSI_PKG_ATTR_ID: package ID
+ * @NCSI_PKG_ATTR_FORCED: flag signifying a package has been set as preferred
+ * @NCSI_PKG_ATTR_CHANNEL_LIST: nested array of NCSI_CHANNEL_ATTR attributes
+ * @NCSI_PKG_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_pkg_attrs {
+   NCSI_PKG_ATTR_UNSPEC,
+   NCSI_PKG_ATTR,
+   NCSI_PKG_ATTR_ID,
+   NCSI_PKG_ATTR_FORCED,
+   NCSI_PKG_ATTR_CHANNEL_LIST,
+
+   __NCSI_PKG_ATTR_AFTER_LAST,
+   NCSI_PKG_ATTR_MAX = __NCSI_PKG_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_channel_attrs - NCSI netlink channel-specific attributes
+ *
+ * @NCSI_CHANNEL_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_CHANNEL_ATTR: nested array of channel attributes
+ * @NCSI_CHANNEL_ATTR_ID: channel ID
+ * @NCSI_CHANNEL_ATTR_VERSION_MAJOR: channel major version number
+ * @NCSI_CHANNEL_ATTR_VERSION_MINOR: channel minor version number
+ * @NCSI_CHANNEL_ATTR_VERSION_STR: channel version string
+ * @NCSI_CHANNEL_ATTR_LINK_STATE: channel link state flags
+ * @NCSI_CHANNEL_ATTR_ACTIVE: channels with this flag are in
+ * NCSI_CHANNEL_ACTIVE state
+ * @NCSI_CHANNEL_ATTR_FORCED: flag signifying a channel has been set as
+ * preferred
+ * @NCSI_CHANNEL_ATTR_VLAN_LIST: nested array of NCSI_CHANNEL_ATTR_VLAN_IDs
+ * @NCSI_CHANNEL_ATTR_VLAN_ID: VLAN ID being filtered on this channel
+ * @NCSI_CHANNEL_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_channel_attrs {
+   NCSI_CHANNEL_ATTR_UNSPEC,
+   NCSI_CHANNEL_ATTR,
+   NCSI_CHANNEL_ATTR_ID,
+   NCSI_CHANNEL_ATTR_VERSION_MAJOR,
+   NCSI_CHANNEL_ATTR_VERSION_MINOR,
+   NCSI_CHANNEL_ATTR_VERSION_STR,
+   NCSI_CHANNEL_ATTR_LINK_STATE,
+   NCSI_CHANNEL_ATTR_ACTIVE,
+   NCSI_CHANNEL_ATTR_FORCED,
+   NCSI_CHANNEL_ATTR_VLAN_LIST,
+   NCSI_CHANNEL_ATTR_VLAN_ID,
+
+   

[PATCH v2 net] net_sched: gen_estimator: fix broken estimators based on percpu stats

2018-02-22 Thread Eric Dumazet
From: Eric Dumazet 

pfifo_fast got percpu stats lately, uncovering a bug I introduced last
year in linux-4.10.

I missed the fact that we have to clear our temporary storage
before calling __gnet_stats_copy_basic() in the case of percpu stats.

Without this fix, rate estimators (tc qd replace dev xxx root est 1sec
4sec pfifo_fast) are utterly broken.

Fixes: 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate 
estimators")
Signed-off-by: Eric Dumazet 
---
v2: Perform the zeroing in est_fetch_counters() instead of caller(s)

 net/core/gen_estimator.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 
0a3f88f08727f1f1217560407ff539c8a8c17496..98fd12721221e4aa26e4d11be9de6c0305fb6dd9
 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -66,6 +66,7 @@ struct net_rate_estimator {
 static void est_fetch_counters(struct net_rate_estimator *e,
   struct gnet_stats_basic_packed *b)
 {
+   memset(b, 0, sizeof(*b));
if (e->stats_lock)
spin_lock(e->stats_lock);
 



Re: [PATCH net] net_sched: gen_estimator: fix broken estimators based on percpu stats

2018-02-22 Thread Eric Dumazet
On Thu, 2018-02-22 at 19:36 -0800, Eric Dumazet wrote:
> From: Eric Dumazet 
> 
> pfifo_fast got percpu stats lately, uncovering a bug I introduced last
> year in linux-4.10.
> 
> I missed the fact that we have to clear our temporary storage
> before calling __gnet_stats_copy_basic() in the case of percpu stats.
> 
> Without this fix, rate estimators (tc qd replace dev xxx root est 1sec
> 4sec pfifo_fast) are utterly broken.
> 
> Fixes: 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate 
> estimators")
> Signed-off-by: Eric Dumazet 
> ---
>  net/core/gen_estimator.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
> index 
> 0a3f88f08727f1f1217560407ff539c8a8c17496..f13ea3c1fa3eddc6be172df9eda8828da76045e7
>  100644
> --- a/net/core/gen_estimator.c
> +++ b/net/core/gen_estimator.c
> @@ -79,7 +79,7 @@ static void est_fetch_counters(struct net_rate_estimator *e,
>  static void est_timer(struct timer_list *t)
>  {
>   struct net_rate_estimator *est = from_timer(est, t, timer);
> - struct gnet_stats_basic_packed b;
> + struct gnet_stats_basic_packed b = {0};
>   u64 rate, brate;
>  
>   est_fetch_counters(est, );

Oh I sent the wrong version of the patch, sorry.

Will send a V2.



[PATCH net] net_sched: gen_estimator: fix broken estimators based on percpu stats

2018-02-22 Thread Eric Dumazet
From: Eric Dumazet 

pfifo_fast got percpu stats lately, uncovering a bug I introduced last
year in linux-4.10.

I missed the fact that we have to clear our temporary storage
before calling __gnet_stats_copy_basic() in the case of percpu stats.

Without this fix, rate estimators (tc qd replace dev xxx root est 1sec
4sec pfifo_fast) are utterly broken.

Fixes: 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate 
estimators")
Signed-off-by: Eric Dumazet 
---
 net/core/gen_estimator.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 
0a3f88f08727f1f1217560407ff539c8a8c17496..f13ea3c1fa3eddc6be172df9eda8828da76045e7
 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -79,7 +79,7 @@ static void est_fetch_counters(struct net_rate_estimator *e,
 static void est_timer(struct timer_list *t)
 {
struct net_rate_estimator *est = from_timer(est, t, timer);
-   struct gnet_stats_basic_packed b;
+   struct gnet_stats_basic_packed b = {0};
u64 rate, brate;
 
est_fetch_counters(est, );



Re: [PATCH V7 2/4] sctp: Add ip option support

2018-02-22 Thread Marcelo Ricardo Leitner
On Thu, Feb 22, 2018 at 06:08:05PM -0500, Paul Moore wrote:
> On Wed, Feb 21, 2018 at 3:45 PM, Paul Moore  wrote:
> > On February 21, 2018 9:33:51 AM Marcelo Ricardo Leitner 
> >  wrote:
> >> On Tue, Feb 20, 2018 at 07:15:27PM +, Richard Haines wrote:
> >>> Add ip option support to allow LSM security modules to utilise CIPSO/IPv4
> >>> and CALIPSO/IPv6 services.
> >>>
> >>> Signed-off-by: Richard Haines 
> >>
> >> LGTM too, thanks!
> >>
> >> Acked-by: Marcelo Ricardo Leitner 
> >
> > I agree, thanks everyone for all the work, review, and patience behind this 
> > patchset!  I'll work on merging this into selinux/next and I'll send a note 
> > when it's done.
> 
> I just merged the four patches (1,3,4 from the v6 patchset, 2 from the
> v7 patchset) in selinux/next and did a quick sanity test on the kernel
> (booted, no basic SELinux regressions).  Additional testing help is
> always appreciated ...

I'll try it early next week.

Any ideas on when this is going to appear on Dave's net-next tree?
We have a lot of SCTP changes to be posted on this cycle and would be
nice if we could avoid merge conflicts.

> 
> * git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git
> * https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git
> 
> -- 
> paul moore
> www.paul-moore.com


Re: [pull request][for-next 0/7] Mellanox, mlx5 shared code updates 2018-02-21

2018-02-22 Thread Doug Ledford
On Thu, 2018-02-22 at 14:39 -0500, David Miller wrote:
> From: Saeed Mahameed 
> Date: Wed, 21 Feb 2018 12:13:47 -0800
> 
> > This series includes shared code updates for mlx5 core driver for both
> > netdev and rdma subsystems.  This series should be pulled to both
> > trees so we can continue netdev and rdma specific submissions separately.
> > 
> > For more information please see tag log below.
> > 
> > P.S. We expect two more shared code pull requests.
> > 
> > The series doesn't cause any conflict with the latest mlx5 net fixes
> > series.
> > 
> > Please pull and let me know if there's any issue,
> 
> Looks good to me, pulled into net-next, thanks.

Thanks, pulled into rdma-next.

-- 
Doug Ledford 
GPG KeyID: B826A3330E572FDD
Key fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57 2FDD

signature.asc
Description: This is a digitally signed message part


Re: [PATCH net v3 2/2] tuntap: correctly add the missing xdp flush

2018-02-22 Thread Jason Wang



On 2018年02月23日 01:46, Jesper Dangaard Brouer wrote:

On Thu, 22 Feb 2018 17:36:46 +0800
Jason Wang  wrote:


Commit 762c330d670e ("tuntap: add missing xdp flush") tries to fix the
devmap stall caused by missed xdp flush by counting the pending xdp
redirected packets and flush when it exceeds NAPI_POLL_WEIGHT or
MSG_MORE is clear. This may lead to BUG() since xdp_do_flush() was
called in the process context with preemption enabled. Simply
disabling preemption may silence the warning but be not enough since
process may move between different CPUS during a batch which cause
xdp_do_flush() misses some CPU where the process run
previously. Consider the fallouts, that commit was reverted. To fix
the issue correctly, we can simply call xdp_do_flush() immediately
after xdp_do_redirect(), a side effect is that this removes any
possibility of batching which could be addressed in the future.

Reported-by: Christoffer Dall 
Fixes: 762c330d670e ("tuntap: add missing xdp flush")
Signed-off-by: Jason Wang 
---
  drivers/net/tun.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2823a4a..a363ea2 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1662,6 +1662,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct 
*tun,
get_page(alloc_frag->page);
alloc_frag->offset += buflen;
err = xdp_do_redirect(tun->dev, , xdp_prog);
+   xdp_do_flush_map();
if (err)
goto err_redirect;
rcu_read_unlock();

As you have noticed, the xdp_do_redirect() + xdp_do_flush_map() rely
heavily on being executed in softirq/napi_schedule context.
Particularly the map infra devmap[1]+cpumap depend on the enqueue and
flush operation MUST happen on the same CPU (e.g. stores which
devices needs flushing in a this_cpu_ptr bitmap [1]).

What context is tun_build_skb() invoked under?

Even when you call xdp_do_redirect and xdp_do_flush_map right after
each-other, are we sure we cannot be preempted here?


Ok, I miss the fact that we can be preempted here with preemptible RCU. 
Let me disable preemption here and post a V4.


Thanks




[1] https://github.com/torvalds/linux/blob/master/kernel/bpf/devmap.c#L209-L215




pull-request: bpf 2018-02-22

2018-02-22 Thread Alexei Starovoitov
Hi David,

The following pull-request contains BPF updates for your *net* tree.

The main changes are:

1) two urgent fixes for bpf_tail_call logic for x64 and arm64 JITs, from Daniel.

2) cond_resched points in percpu array alloc/free paths, from Eric.

3) lockdep and other minor fixes, from Yonghong, Arnd, Anders, Li.

Please consider pulling these changes from:

  git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git

Thanks a lot!



The following changes since commit cfd092f2db8b4b6727e1c03ef68a7842e1023573:

  amd-xgbe: Restore PCI interrupt enablement setting on resume (2018-02-21 
15:39:54 -0500)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git 

for you to fetch changes up to 16338a9b3ac30740d49f5dfed81bac0ffa53b9c7:

  bpf, arm64: fix out of bounds access in tail call (2018-02-22 16:06:28 -0800)


Anders Roxell (2):
  selftests/bpf: tcpbpf_kern: use in6_* macros from glibc
  selftests/bpf: update gitignore with test_libbpf_open

Arnd Bergmann (1):
  bpf: clean up unused-variable warning

Daniel Borkmann (2):
  bpf, x64: implement retpoline for tail call
  bpf, arm64: fix out of bounds access in tail call

Eric Dumazet (1):
  bpf: add schedule points in percpu arrays management

Li Zhijian (1):
  selftests/bpf/test_maps: exit child process without error in ENOMEM case

Yonghong Song (1):
  bpf: fix rcu lockdep warning for lpm_trie map_free callback

 arch/arm64/net/bpf_jit_comp.c  |  5 ++--
 arch/x86/include/asm/nospec-branch.h   | 37 ++
 arch/x86/net/bpf_jit_comp.c|  9 ---
 kernel/bpf/arraymap.c  |  5 +++-
 kernel/bpf/lpm_trie.c  |  3 +--
 net/core/filter.c  |  6 +
 tools/testing/selftests/bpf/.gitignore |  1 +
 tools/testing/selftests/bpf/test_maps.c|  2 ++
 tools/testing/selftests/bpf/test_tcpbpf_kern.c |  1 -
 tools/testing/selftests/bpf/test_verifier.c| 26 ++
 10 files changed, 80 insertions(+), 15 deletions(-)


Re: [PATCH bpf] bpf, arm64: fix out of bounds access in tail call

2018-02-22 Thread Alexei Starovoitov
On Fri, Feb 23, 2018 at 01:03:43AM +0100, Daniel Borkmann wrote:
> I recently noticed a crash on arm64 when feeding a bogus index
> into BPF tail call helper. The crash would not occur when the
> interpreter is used, but only in case of JIT. Output looks as
> follows:
> 
>   [  347.007486] Unable to handle kernel paging request at virtual address 
> fffb850e96492510
>   [...]
>   [  347.043065] [fffb850e96492510] address between user and kernel address 
> ranges
>   [  347.050205] Internal error: Oops: 9604 [#1] SMP
>   [...]
>   [  347.190829] x13:  x12: 
>   [  347.196128] x11: fffc047ebe782800 x10: 808fd7d0fd10
>   [  347.201427] x9 :  x8 : 
>   [  347.206726] x7 :  x6 : 001c99173800
>   [  347.212025] x5 : 0018 x4 : ba5a
>   [  347.217325] x3 : 000329c4 x2 : 808fd7cf0500
>   [  347.222625] x1 : 808fd7d0fc00 x0 : 808fd7cf0500
>   [  347.227926] Process test_verifier (pid: 4548, stack limit = 
> 0x7467fa61)
>   [  347.235221] Call trace:
>   [  347.237656]  0x02f3a4fc
>   [  347.240784]  bpf_test_run+0x78/0xf8
>   [  347.244260]  bpf_prog_test_run_skb+0x148/0x230
>   [  347.248694]  SyS_bpf+0x77c/0x1110
>   [  347.251999]  el0_svc_naked+0x30/0x34
>   [  347.255564] Code: 9100075a d280220a 8b0a002a d37df04b (f86b694b)
>   [...]
> 
> In this case the index used in BPF r3 is the same as in r1
> at the time of the call, meaning we fed a pointer as index;
> here, it had the value 0x808fd7cf0500 which sits in x2.
> 
> While I found tail calls to be working in general (also for
> hitting the error cases), I noticed the following in the code
> emission:
> 
>   # bpftool p d j i 988
>   [...]
>   38:   ldr w10, [x1,x10]
>   3c:   cmp w2, w10
>   40:   b.ge0x007c  <-- signed cmp
>   44:   mov x10, #0x20  // #32
>   48:   cmp x26, x10
>   4c:   b.gt0x007c
>   50:   add x26, x26, #0x1
>   54:   mov x10, #0x110 // #272
>   58:   add x10, x1, x10
>   5c:   lsl x11, x2, #3
>   60:   ldr x11, [x10,x11]  <-- faulting insn (f86b694b)
>   64:   cbz x11, 0x007c
>   [...]
> 
> Meaning, the tests passed because commit ddb55992b04d ("arm64:
> bpf: implement bpf_tail_call() helper") was using signed compares
> instead of unsigned which as a result had the test wrongly passing.
> 
> Change this but also the tail call count test both into unsigned
> and cap the index as u32. Latter we did as well in 90caccdd8cc0
> ("bpf: fix bpf_tail_call() x64 JIT") and is needed in addition here,
> too. Tested on HiSilicon Hi1616.
> 
> Result after patch:
> 
>   # bpftool p d j i 268
>   [...]
>   38: ldr w10, [x1,x10]
>   3c: add w2, w2, #0x0
>   40: cmp w2, w10
>   44: b.cs0x0080
>   48: mov x10, #0x20  // #32
>   4c: cmp x26, x10
>   50: b.hi0x0080
>   54: add x26, x26, #0x1
>   58: mov x10, #0x110 // #272
>   5c: add x10, x1, x10
>   60: lsl x11, x2, #3
>   64: ldr x11, [x10,x11]
>   68: cbz x11, 0x0080
>   [...]
> 
> Fixes: ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper")
> Signed-off-by: Daniel Borkmann 

ouch. nice catch!
Tested on arm64 hw and applied to bpf tree, Thanks Daniel!



Re: [for-next 7/7] IB/mlx5: Implement fragmented completion queue (CQ)

2018-02-22 Thread Santosh Shilimkar

Hi Saeed

On 2/21/2018 12:13 PM, Saeed Mahameed wrote:

From: Yonatan Cohen 

The current implementation of create CQ requires contiguous
memory, such requirement is problematic once the memory is
fragmented or the system is low in memory, it causes for
failures in dma_zalloc_coherent().

This patch implements new scheme of fragmented CQ to overcome
this issue by introducing new type: 'struct mlx5_frag_buf_ctrl'
to allocate fragmented buffers, rather than contiguous ones.

Base the Completion Queues (CQs) on this new fragmented buffer.

It fixes following crashes:
kworker/29:0: page allocation failure: order:6, mode:0x80d0
CPU: 29 PID: 8374 Comm: kworker/29:0 Tainted: G OE 3.10.0
Workqueue: ib_cm cm_work_handler [ib_cm]
Call Trace:
[<>] dump_stack+0x19/0x1b
[<>] warn_alloc_failed+0x110/0x180
[<>] __alloc_pages_slowpath+0x6b7/0x725
[<>] __alloc_pages_nodemask+0x405/0x420
[<>] dma_generic_alloc_coherent+0x8f/0x140
[<>] x86_swiotlb_alloc_coherent+0x21/0x50
[<>] mlx5_dma_zalloc_coherent_node+0xad/0x110 [mlx5_core]
[<>] ? mlx5_db_alloc_node+0x69/0x1b0 [mlx5_core]
[<>] mlx5_buf_alloc_node+0x3e/0xa0 [mlx5_core]
[<>] mlx5_buf_alloc+0x14/0x20 [mlx5_core]
[<>] create_cq_kernel+0x90/0x1f0 [mlx5_ib]
[<>] mlx5_ib_create_cq+0x3b0/0x4e0 [mlx5_ib]

Signed-off-by: Yonatan Cohen 
Reviewed-by: Tariq Toukan 
Signed-off-by: Leon Romanovsky 
Signed-off-by: Saeed Mahameed 
---

Jason mentioned about this patch to me off-list. We were
seeing similar issue with SRQs & QPs. So wondering whether
you have any plans to do similar change for other resouces
too so that they don't rely on higher order page allocation
for icm tables.

Regards,
Santosh


[PATCH bpf] bpf, arm64: fix out of bounds access in tail call

2018-02-22 Thread Daniel Borkmann
I recently noticed a crash on arm64 when feeding a bogus index
into BPF tail call helper. The crash would not occur when the
interpreter is used, but only in case of JIT. Output looks as
follows:

  [  347.007486] Unable to handle kernel paging request at virtual address 
fffb850e96492510
  [...]
  [  347.043065] [fffb850e96492510] address between user and kernel address 
ranges
  [  347.050205] Internal error: Oops: 9604 [#1] SMP
  [...]
  [  347.190829] x13:  x12: 
  [  347.196128] x11: fffc047ebe782800 x10: 808fd7d0fd10
  [  347.201427] x9 :  x8 : 
  [  347.206726] x7 :  x6 : 001c99173800
  [  347.212025] x5 : 0018 x4 : ba5a
  [  347.217325] x3 : 000329c4 x2 : 808fd7cf0500
  [  347.222625] x1 : 808fd7d0fc00 x0 : 808fd7cf0500
  [  347.227926] Process test_verifier (pid: 4548, stack limit = 
0x7467fa61)
  [  347.235221] Call trace:
  [  347.237656]  0x02f3a4fc
  [  347.240784]  bpf_test_run+0x78/0xf8
  [  347.244260]  bpf_prog_test_run_skb+0x148/0x230
  [  347.248694]  SyS_bpf+0x77c/0x1110
  [  347.251999]  el0_svc_naked+0x30/0x34
  [  347.255564] Code: 9100075a d280220a 8b0a002a d37df04b (f86b694b)
  [...]

In this case the index used in BPF r3 is the same as in r1
at the time of the call, meaning we fed a pointer as index;
here, it had the value 0x808fd7cf0500 which sits in x2.

While I found tail calls to be working in general (also for
hitting the error cases), I noticed the following in the code
emission:

  # bpftool p d j i 988
  [...]
  38:   ldr w10, [x1,x10]
  3c:   cmp w2, w10
  40:   b.ge0x007c  <-- signed cmp
  44:   mov x10, #0x20  // #32
  48:   cmp x26, x10
  4c:   b.gt0x007c
  50:   add x26, x26, #0x1
  54:   mov x10, #0x110 // #272
  58:   add x10, x1, x10
  5c:   lsl x11, x2, #3
  60:   ldr x11, [x10,x11]  <-- faulting insn (f86b694b)
  64:   cbz x11, 0x007c
  [...]

Meaning, the tests passed because commit ddb55992b04d ("arm64:
bpf: implement bpf_tail_call() helper") was using signed compares
instead of unsigned which as a result had the test wrongly passing.

Change this but also the tail call count test both into unsigned
and cap the index as u32. Latter we did as well in 90caccdd8cc0
("bpf: fix bpf_tail_call() x64 JIT") and is needed in addition here,
too. Tested on HiSilicon Hi1616.

Result after patch:

  # bpftool p d j i 268
  [...]
  38:   ldr w10, [x1,x10]
  3c:   add w2, w2, #0x0
  40:   cmp w2, w10
  44:   b.cs0x0080
  48:   mov x10, #0x20  // #32
  4c:   cmp x26, x10
  50:   b.hi0x0080
  54:   add x26, x26, #0x1
  58:   mov x10, #0x110 // #272
  5c:   add x10, x1, x10
  60:   lsl x11, x2, #3
  64:   ldr x11, [x10,x11]
  68:   cbz x11, 0x0080
  [...]

Fixes: ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper")
Signed-off-by: Daniel Borkmann 
---
 arch/arm64/net/bpf_jit_comp.c   |  5 +++--
 tools/testing/selftests/bpf/test_verifier.c | 26 ++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 1d4f1da..a933504 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -250,8 +250,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
off = offsetof(struct bpf_array, map.max_entries);
emit_a64_mov_i64(tmp, off, ctx);
emit(A64_LDR32(tmp, r2, tmp), ctx);
+   emit(A64_MOV(0, r3, r3), ctx);
emit(A64_CMP(0, r3, tmp), ctx);
-   emit(A64_B_(A64_COND_GE, jmp_offset), ctx);
+   emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
 
/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
 * goto out;
@@ -259,7 +260,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 */
emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
emit(A64_CMP(1, tcc, tmp), ctx);
-   emit(A64_B_(A64_COND_GT, jmp_offset), ctx);
+   emit(A64_B_(A64_COND_HI, jmp_offset), ctx);
emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
 
/* prog = array->ptrs[index];
diff --git a/tools/testing/selftests/bpf/test_verifier.c 
b/tools/testing/selftests/bpf/test_verifier.c
index c0f16e9..c73592f 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2587,6 +2587,32 @@ static struct bpf_test tests[] = {
.result = ACCEPT,
},
{
+   "runtime/jit: pass negative index to tail_call",
+   .insns = {
+   BPF_MOV64_IMM(BPF_REG_3, -1),
+   BPF_LD_MAP_FD(BPF_REG_2, 0),
+   BPF_RAW_INSN(BPF_JMP | 

Re: [PATCH net-next] rds: rds_msg_zcopy should return error of null rm->data.op_mmp_znotifier

2018-02-22 Thread Santosh Shilimkar

On 2/22/2018 1:40 PM, Sowmini Varadhan wrote:

if either or both of MSG_ZEROCOPY and SOCK_ZEROCOPY have not been
specified, the rm->data.op_mmp_znotifier allocation will be skipped.
In this case, it is invalid ot pass down a cmsghdr with
RDS_CMSG_ZCOPY_COOKIE, so return EINVAL from rds_msg_zcopy for this
case.

Reported-by: syzbot+f893ae7bb2f6456df...@syzkaller.appspotmail.com
Fixes: 0cebaccef3ac ("rds: zerocopy Tx support.")
Signed-off-by: Sowmini Varadhan 
---


Acked-by: Santosh Shilimkar 


Re: [PATCH net-next v2 1/1] net: Allow a rule to track originating protocol

2018-02-22 Thread David Ahern
On 2/22/18 10:20 AM, David Ahern wrote:
>> This breaks my scripts:
>> # ip -4 rule show
>> 0:  from all lookup local
>> 32766:  from all lookup main
>> 32767:  from all lookup default
>>
>> # ip -4 rule del pref 0
>> RTNETLINK answers: No such file or directory
>>
>> Using iproute 4.15 in Fedora 27:
>> # ip -V
>> ip utility, iproute2-ss180129
>>
>> Problem is iproute sets protocol to RTPROT_BOOT while rules are
>> installed with RTPROT_KERNEL.
>>
>> Maybe add FRA_PROTOCOL?
>>
>> Thanks!
> 
> ugh. Another iproute2 bug that the kernel has to deal with. iproute2 has
> been using rtm for the ancillary header for rules when it should have
> been fib_rule_hdr. That bug allowed someone to set the protocol field to
> RTPROT_BOOT which was complete nonsense for rules until Donald's recent
> patch.
> 
> That means all FIB rules need to default to RTPROT_BOOT. I hate to
> inherit that for the l3mdev rule, but looking at the iproute2 code I
> don't see any options.
> 
> Donald: send a patch that changes the protocol for kernel installed
> rules to RTPROT_BOOT.
> 

After more thinking, Donald is going move the protocol to an
FRA_PROTOCOL attribute as you suggested. That avoids breaking legacy
iproute2 and allows us to keep a sane default protocol value.


[PATCH v8 0/3] netdev: octeon-ethernet: Add Cavium Octeon III support.

2018-02-22 Thread David Daney
We are adding the Cavium OCTEON-III network driver.  Since interacting
with the input and output queues is done via special CPU local memory,
we also need to add support to the MIPS/Octeon architecture code.  The
four patch set to add this prerequisite code has been split out to a
seperate patch set sent to the mips-linux list.

A separate pull request was recently done by Steven Hill for the
firmware required by the driver.

Changes in v8:

o Fixed locking in bgx port functions as noted by davem.

o Corrected SPDX-License-Identifier tags.

o Split driver from prerequisite patches.

Changes in v7:

o There was no v7, we go to v8 to synchronize version numbers with
prerequisites.

Changes in v6:

o Added back cleanup patch for previous generation SoC "staging"
  driver, as Greg K-H acked it.

o Moved FPA driver to drivers/net/ethernet/cavium/octeon as it is
  currently only used by the octeon3-ethernet driver.

o Many code formatting fixes as noted by davem.

Changes in v5:

o Removed cleanup patch for previous generation SoC "staging" driver,
  as it will be sent as a follow-on.

o Fixed kernel doc formatting in all patches.

o Removed redundant licensing text boilerplate.

o Reviewed-by: header added to 2/7.

o Rewrote locking code in 3/7 to eliminate inline asm.

Changes in v4:

o Use phy_print_status() instead of open coding the equivalent.

o Print warning on phy mode mismatch.

o Improve dt-bindings and add Acked-by.

Changes in v3:

o Fix PKI (RX path) initialization to work with little endian kernel.

Changes in v2:

o Cleanup and use of standard bindings in the device tree bindings
  document.

o Added (hopefully) clarifying comments about several OCTEON
  architectural peculiarities.

o Removed unused testing code from the driver.

o Removed some module parameters that already default to the proper
  values.

o KConfig cleanup, including testing on x86_64, arm64 and mips.

o Fixed breakage to the driver for previous generation of OCTEON SoCs (in
  the staging directory still).

o Verified bisectability of the patch set.

Carlos Munoz (2):
  dt-bindings: Add Cavium Octeon Common Ethernet Interface.
  netdev: octeon-ethernet: Add Cavium Octeon III support.

David Daney (1):
  MAINTAINERS: Add entry for
drivers/net/ethernet/cavium/octeon/octeon3-*

 .../devicetree/bindings/net/cavium-bgx.txt |   61 +
 MAINTAINERS|6 +
 drivers/net/ethernet/cavium/Kconfig|   59 +-
 drivers/net/ethernet/cavium/octeon/Makefile|7 +
 .../net/ethernet/cavium/octeon/octeon3-bgx-nexus.c |  417 
 .../net/ethernet/cavium/octeon/octeon3-bgx-port.c  | 2003 +++
 drivers/net/ethernet/cavium/octeon/octeon3-core.c  | 2079 
 drivers/net/ethernet/cavium/octeon/octeon3-fpa.c   |  358 
 drivers/net/ethernet/cavium/octeon/octeon3-pki.c   |  823 
 drivers/net/ethernet/cavium/octeon/octeon3-pko.c   | 1688 
 drivers/net/ethernet/cavium/octeon/octeon3-sso.c   |  301 +++
 drivers/net/ethernet/cavium/octeon/octeon3.h   |  430 
 12 files changed, 8222 insertions(+), 10 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/cavium-bgx.txt
 create mode 100644 drivers/net/ethernet/cavium/octeon/octeon3-bgx-nexus.c
 create mode 100644 drivers/net/ethernet/cavium/octeon/octeon3-bgx-port.c
 create mode 100644 drivers/net/ethernet/cavium/octeon/octeon3-core.c
 create mode 100644 drivers/net/ethernet/cavium/octeon/octeon3-fpa.c
 create mode 100644 drivers/net/ethernet/cavium/octeon/octeon3-pki.c
 create mode 100644 drivers/net/ethernet/cavium/octeon/octeon3-pko.c
 create mode 100644 drivers/net/ethernet/cavium/octeon/octeon3-sso.c
 create mode 100644 drivers/net/ethernet/cavium/octeon/octeon3.h

-- 
2.14.3



Re: [PATCH bpf v2] bpf, x64: implement retpoline for tail call

2018-02-22 Thread Alexei Starovoitov
On Thu, Feb 22, 2018 at 03:12:53PM +0100, Daniel Borkmann wrote:
> Implement a retpoline [0] for the BPF tail call JIT'ing that converts
> the indirect jump via jmp %rax that is used to make the long jump into
> another JITed BPF image. Since this is subject to speculative execution,
> we need to control the transient instruction sequence here as well
> when CONFIG_RETPOLINE is set, and direct it into a pause + lfence loop.
> The latter aligns also with what gcc / clang emits (e.g. [1]).
> 
> JIT dump after patch:
> 
>   # bpftool p d x i 1
>0: (18) r2 = map[id:1]
>2: (b7) r3 = 0
>3: (85) call bpf_tail_call#12
>4: (b7) r0 = 2
>5: (95) exit
> 
> With CONFIG_RETPOLINE:
> 
>   # bpftool p d j i 1
>   [...]
>   33: cmp%edx,0x24(%rsi)
>   36: jbe0x0072  |*
>   38: mov0x24(%rbp),%eax
>   3e: cmp$0x20,%eax
>   41: ja 0x0072  |
>   43: add$0x1,%eax
>   46: mov%eax,0x24(%rbp)
>   4c: mov0x90(%rsi,%rdx,8),%rax
>   54: test   %rax,%rax
>   57: je 0x0072  |
>   59: mov0x28(%rax),%rax
>   5d: add$0x25,%rax
>   61: callq  0x006d  |+
>   66: pause  |
>   68: lfence |
>   6b: jmp0x0066  |
>   6d: mov%rax,(%rsp) |
>   71: retq   |
>   72: mov$0x2,%eax
>   [...]
> 
>   * relative fall-through jumps in error case
>   + retpoline for indirect jump
> 
> Without CONFIG_RETPOLINE:
> 
>   # bpftool p d j i 1
>   [...]
>   33: cmp%edx,0x24(%rsi)
>   36: jbe0x0063  |*
>   38: mov0x24(%rbp),%eax
>   3e: cmp$0x20,%eax
>   41: ja 0x0063  |
>   43: add$0x1,%eax
>   46: mov%eax,0x24(%rbp)
>   4c: mov0x90(%rsi,%rdx,8),%rax
>   54: test   %rax,%rax
>   57: je 0x0063  |
>   59: mov0x28(%rax),%rax
>   5d: add$0x25,%rax
>   61: jmpq   *%rax   |-
>   63: mov$0x2,%eax
>   [...]
> 
>   * relative fall-through jumps in error case
>   - plain indirect jump as before
> 
>   [0] https://support.google.com/faqs/answer/7625886
>   [1] 
> https://github.com/gcc-mirror/gcc/commit/a31e654fa107be968b802786d747e962c2fcdb2b
> 
> Signed-off-by: Daniel Borkmann 
> ---
>  v1 -> v2:
>   - Moved into nospec-branch.h as suggested by Eric, thanks!

Applied to bpf tree, Thanks Daniel.



[PATCH v8 1/3] dt-bindings: Add Cavium Octeon Common Ethernet Interface.

2018-02-22 Thread David Daney
From: Carlos Munoz 

Add bindings for Common Ethernet Interface (BGX) block.

Acked-by: Rob Herring 
Signed-off-by: Carlos Munoz 
Signed-off-by: Steven J. Hill 
Signed-off-by: David Daney 
---
 .../devicetree/bindings/net/cavium-bgx.txt | 61 ++
 1 file changed, 61 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/cavium-bgx.txt

diff --git a/Documentation/devicetree/bindings/net/cavium-bgx.txt 
b/Documentation/devicetree/bindings/net/cavium-bgx.txt
new file mode 100644
index ..830c5f08
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/cavium-bgx.txt
@@ -0,0 +1,61 @@
+* Common Ethernet Interface (BGX) block
+
+Properties:
+
+- compatible: "cavium,octeon-7890-bgx": Compatibility with all cn7xxx SOCs.
+
+- reg: The base address of the BGX block.
+
+- #address-cells: Must be <1>.
+
+- #size-cells: Must be <0>.  BGX addresses have no size component.
+
+A BGX block has several children, each representing an Ethernet
+interface.
+
+
+* Ethernet Interface (BGX port) connects to PKI/PKO
+
+Properties:
+
+- compatible: "cavium,octeon-7890-bgx-port": Compatibility with all
+ cn7xxx SOCs.
+
+ "cavium,octeon-7360-xcv": Compatibility with cn73xx SOCs
+ for RGMII.
+
+- reg: The index of the interface within the BGX block.
+
+Optional properties:
+
+- local-mac-address: Mac address for the interface.
+
+- phy-handle: phandle to the phy node connected to the interface.
+
+- phy-mode: described in ethernet.txt.
+
+- fixed-link: described in fixed-link.txt.
+
+Example:
+
+   ethernet-mac-nexus@11800e000 {
+   compatible = "cavium,octeon-7890-bgx";
+   reg = <0x00011800 0xe000 0x 0x0100>;
+   #address-cells = <1>;
+   #size-cells = <0>;
+
+   ethernet@0 {
+   compatible = "cavium,octeon-7360-xcv";
+   reg = <0>;
+   local-mac-address = [ 00 01 23 45 67 89 ];
+   phy-handle = <>;
+   phy-mode = "rgmii-rxid"
+   };
+   ethernet@1 {
+   compatible = "cavium,octeon-7890-bgx-port";
+   reg = <1>;
+   local-mac-address = [ 00 01 23 45 67 8a ];
+   phy-handle = <>;
+   phy-mode = "sgmii"
+   };
+   };
-- 
2.14.3



[PATCH v8 3/3] MAINTAINERS: Add entry for drivers/net/ethernet/cavium/octeon/octeon3-*

2018-02-22 Thread David Daney
Signed-off-by: David Daney 
---
 MAINTAINERS | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9a7f76eadae9..b36371ae590f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3259,6 +3259,12 @@ W:   http://www.cavium.com
 S: Supported
 F: drivers/mmc/host/cavium*
 
+CAVIUM OCTEON-III NETWORK DRIVER
+M: Steven J. Hill 
+L: netdev@vger.kernel.org
+S: Supported
+F: drivers/net/ethernet/cavium/octeon/octeon3-*
+
 CAVIUM OCTEON-TX CRYPTO DRIVER
 M: George Cherian 
 L: linux-cry...@vger.kernel.org
-- 
2.14.3



[PATCH v8 0/4] Prerequisites for Cavium OCTEON-III network driver.

2018-02-22 Thread David Daney
We want to add the Cavium OCTEON-III network driver.  But since
interacting with the input and output queues is done via special CPU
local memory, we also need to add support to the MIPS/Octeon
architecture code.  Aren't SoCs nice in this way?  These are the
prerequisite patches that are needed before the network driver can be
merged.

Changes in v8:

o Rebased to v4.16-rc2

Changes in v7:

o Splitting of the patch set only.  These 4 mips patches are unchanged
  from the previous posting.

Changes in v6:

o Added back cleanup patch for previous generation SoC "staging"
  driver, as Greg K-H acked it.

o Moved FPA driver to drivers/net/ethernet/cavium/octeon as it is
  currently only used by the octeon3-ethernet driver.

o Many code formatting fixes as noted by davem.

Changes in v5:

o Removed cleanup patch for previous generation SoC "staging" driver,
  as it will be sent as a follow-on.

o Fixed kernel doc formatting in all patches.

o Removed redundant licensing text boilerplate.

o Reviewed-by: header added to 2/7.

o Rewrote locking code in 3/7 to eliminate inline asm.

Changes in v4:

o Use phy_print_status() instead of open coding the equivalent.

o Print warning on phy mode mismatch.

o Improve dt-bindings and add Acked-by.

Changes in v3:

o Fix PKI (RX path) initialization to work with little endian kernel.

Changes in v2:

o Cleanup and use of standard bindings in the device tree bindings
  document.

o Added (hopefully) clarifying comments about several OCTEON
  architectural peculiarities.

o Removed unused testing code from the driver.

o Removed some module parameters that already default to the proper
  values.

o KConfig cleanup, including testing on x86_64, arm64 and mips.

o Fixed breakage to the driver for previous generation of OCTEON SoCs
  (in the staging directory still).

o Verified bisectability of the patch set.

Carlos Munoz (2):
  MIPS: Octeon: Enable LMTDMA/LMTST operations.
  MIPS: Octeon: Add a global resource manager.

David Daney (2):
  MIPS: Octeon: Automatically provision CVMSEG space.
  staging: octeon: Remove USE_ASYNC_IOBDMA macro.

 arch/mips/cavium-octeon/Kconfig|  27 +-
 arch/mips/cavium-octeon/Makefile   |   1 +
 arch/mips/cavium-octeon/resource-mgr.c | 351 +
 arch/mips/cavium-octeon/setup.c|  22 +-
 .../asm/mach-cavium-octeon/kernel-entry-init.h |  20 +-
 arch/mips/include/asm/mipsregs.h   |   2 +
 arch/mips/include/asm/octeon/octeon.h  |  32 +-
 arch/mips/include/asm/processor.h  |   2 +-
 arch/mips/kernel/octeon_switch.S   |   2 -
 arch/mips/mm/tlbex.c   |  29 +-
 drivers/staging/octeon/ethernet-defines.h  |   6 -
 drivers/staging/octeon/ethernet-rx.c   |  25 +-
 drivers/staging/octeon/ethernet-tx.c   |  85 ++---
 13 files changed, 472 insertions(+), 132 deletions(-)
 create mode 100644 arch/mips/cavium-octeon/resource-mgr.c

-- 
2.14.3



[PATCH v8 2/4] MIPS: Octeon: Automatically provision CVMSEG space.

2018-02-22 Thread David Daney
Remove CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE and automatically calculate
the amount of CVMSEG space needed.

1st 128-bytes: Use by IOBDMA
2nd 128-bytes: Reserved by kernel for scratch/TLS emulation.
3rd 128-bytes: OCTEON-III LMTLINE

New config variable CONFIG_CAVIUM_OCTEON_EXTRA_CVMSEG provisions
additional lines, defaults to zero.

Signed-off-by: David Daney 
Signed-off-by: Carlos Munoz 
---
 arch/mips/cavium-octeon/Kconfig| 27 
 arch/mips/cavium-octeon/setup.c| 16 ++--
 .../asm/mach-cavium-octeon/kernel-entry-init.h | 20 +--
 arch/mips/include/asm/mipsregs.h   |  2 ++
 arch/mips/include/asm/octeon/octeon.h  |  2 ++
 arch/mips/include/asm/processor.h  |  2 +-
 arch/mips/kernel/octeon_switch.S   |  2 --
 arch/mips/mm/tlbex.c   | 29 ++
 drivers/staging/octeon/ethernet-defines.h  |  2 +-
 9 files changed, 50 insertions(+), 52 deletions(-)

diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index b5eee1a57d6c..a283b73b7fc6 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -11,21 +11,26 @@ config CAVIUM_CN63XXP1
  non-CN63XXP1 hardware, so it is recommended to select "n"
  unless it is known the workarounds are needed.
 
-config CAVIUM_OCTEON_CVMSEG_SIZE
-   int "Number of L1 cache lines reserved for CVMSEG memory"
-   range 0 54
-   default 1
-   help
- CVMSEG LM is a segment that accesses portions of the dcache as a
- local memory; the larger CVMSEG is, the smaller the cache is.
- This selects the size of CVMSEG LM, which is in cache blocks. The
- legally range is from zero to 54 cache blocks (i.e. CVMSEG LM is
- between zero and 6192 bytes).
-
 endif # CPU_CAVIUM_OCTEON
 
 if CAVIUM_OCTEON_SOC
 
+config CAVIUM_OCTEON_EXTRA_CVMSEG
+   int "Number of extra L1 cache lines reserved for CVMSEG memory"
+   range 0 50
+   default 0
+   help
+ CVMSEG LM is a segment that accesses portions of the dcache
+ as a local memory; the larger CVMSEG is, the smaller the
+ cache is.  The kernel uses two or three blocks (one for TLB
+ exception handlers, one for driver IOBDMA operations, and on
+ models that need it, one for LMTDMA operations). This
+ selects an optional extra number of CVMSEG lines for use by
+ other software.
+
+ Normally no extra lines are required, and this parameter
+ should be set to zero.
+
 config CAVIUM_OCTEON_LOCK_L2
bool "Lock often used kernel code in the L2"
default "y"
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 99e6a68bc652..51c4d3c3cada 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -68,6 +68,12 @@ extern void pci_console_init(const char *arg);
 static unsigned long long max_memory = ULLONG_MAX;
 static unsigned long long reserve_low_mem;
 
+/*
+ * modified in hernel-entry-init.h, must have an initial value to keep
+ * it from being clobbered when bss is zeroed.
+ */
+u32 octeon_cvmseg_lines = 2;
+
 DEFINE_SEMAPHORE(octeon_bootbus_sem);
 EXPORT_SYMBOL(octeon_bootbus_sem);
 
@@ -604,11 +610,7 @@ void octeon_user_io_init(void)
 
/* R/W If set, CVMSEG is available for loads/stores in
 * kernel/debug mode. */
-#if CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0
cvmmemctl.s.cvmsegenak = 1;
-#else
-   cvmmemctl.s.cvmsegenak = 0;
-#endif
if (OCTEON_IS_OCTEON3()) {
/* Enable LMTDMA */
cvmmemctl.s.lmtena = 1;
@@ -626,9 +628,9 @@ void octeon_user_io_init(void)
 
/* Setup of CVMSEG is done in kernel-entry-init.h */
if (smp_processor_id() == 0)
-   pr_notice("CVMSEG size: %d cache lines (%d bytes)\n",
- CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE,
- CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128);
+   pr_notice("CVMSEG size: %u cache lines (%u bytes)\n",
+ octeon_cvmseg_lines,
+ octeon_cvmseg_lines * 128);
 
if (octeon_has_feature(OCTEON_FEATURE_FAU)) {
union cvmx_iob_fau_timeout fau_timeout;
diff --git a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h 
b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
index c38b38ce5a3d..cdcca60978a2 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
@@ -26,11 +26,18 @@
# a3 = address of boot descriptor block
.set push
.set arch=octeon
+   mfc0v1, CP0_PRID_REG
+   andiv1, 0xff00
+   li  v0, 0x9500  # cn78XX or later
+   subuv1, v1, v0
+ 

[PATCH v8 3/4] staging: octeon: Remove USE_ASYNC_IOBDMA macro.

2018-02-22 Thread David Daney
Previous patch sets USE_ASYNC_IOBDMA to 1 unconditionally.  Remove
USE_ASYNC_IOBDMA from all if statements.  Remove dead code caused by
the change.

Acked-by: Greg Kroah-Hartman 
Signed-off-by: David Daney 
---
 drivers/staging/octeon/ethernet-defines.h |  6 ---
 drivers/staging/octeon/ethernet-rx.c  | 25 -
 drivers/staging/octeon/ethernet-tx.c  | 85 ++-
 3 files changed, 37 insertions(+), 79 deletions(-)

diff --git a/drivers/staging/octeon/ethernet-defines.h 
b/drivers/staging/octeon/ethernet-defines.h
index 33c71f86890b..15db928c4712 100644
--- a/drivers/staging/octeon/ethernet-defines.h
+++ b/drivers/staging/octeon/ethernet-defines.h
@@ -7,10 +7,6 @@
 
 /*
  * A few defines are used to control the operation of this driver:
- *  USE_ASYNC_IOBDMA
- *  Use asynchronous IO access to hardware. This uses Octeon's asynchronous
- *  IOBDMAs to issue IO accesses without stalling. Set this to zero
- *  to disable this. Note that IOBDMAs require CVMSEG.
  *  REUSE_SKBUFFS_WITHOUT_FREE
  *  Allows the TX path to free an skbuff into the FPA hardware pool. This
  *  can significantly improve performance for forwarding and bridging, but
@@ -29,8 +25,6 @@
 #define REUSE_SKBUFFS_WITHOUT_FREE  1
 #endif
 
-#define USE_ASYNC_IOBDMA   1
-
 /* Maximum number of SKBs to try to free per xmit packet. */
 #define MAX_OUT_QUEUE_DEPTH 1000
 
diff --git a/drivers/staging/octeon/ethernet-rx.c 
b/drivers/staging/octeon/ethernet-rx.c
index 5e271245273c..c1ae60ce11f5 100644
--- a/drivers/staging/octeon/ethernet-rx.c
+++ b/drivers/staging/octeon/ethernet-rx.c
@@ -198,11 +198,9 @@ static int cvm_oct_poll(struct oct_rx_group *rx_group, int 
budget)
/* Prefetch cvm_oct_device since we know we need it soon */
prefetch(cvm_oct_device);
 
-   if (USE_ASYNC_IOBDMA) {
-   /* Save scratch in case userspace is using it */
-   CVMX_SYNCIOBDMA;
-   old_scratch = cvmx_scratch_read64(CVMX_SCR_SCRATCH);
-   }
+   /* Save scratch in case userspace is using it */
+   CVMX_SYNCIOBDMA;
+   old_scratch = cvmx_scratch_read64(CVMX_SCR_SCRATCH);
 
/* Only allow work for our group (and preserve priorities) */
if (OCTEON_IS_MODEL(OCTEON_CN68XX)) {
@@ -217,10 +215,8 @@ static int cvm_oct_poll(struct oct_rx_group *rx_group, int 
budget)
   BIT(rx_group->group));
}
 
-   if (USE_ASYNC_IOBDMA) {
-   cvmx_pow_work_request_async(CVMX_SCR_SCRATCH, CVMX_POW_NO_WAIT);
-   did_work_request = 1;
-   }
+   cvmx_pow_work_request_async(CVMX_SCR_SCRATCH, CVMX_POW_NO_WAIT);
+   did_work_request = 1;
 
while (rx_count < budget) {
struct sk_buff *skb = NULL;
@@ -229,7 +225,7 @@ static int cvm_oct_poll(struct oct_rx_group *rx_group, int 
budget)
cvmx_wqe_t *work;
int port;
 
-   if (USE_ASYNC_IOBDMA && did_work_request)
+   if (did_work_request)
work = cvmx_pow_work_response_async(CVMX_SCR_SCRATCH);
else
work = cvmx_pow_work_request_sync(CVMX_POW_NO_WAIT);
@@ -257,7 +253,7 @@ static int cvm_oct_poll(struct oct_rx_group *rx_group, int 
budget)
sizeof(void *));
prefetch(pskb);
 
-   if (USE_ASYNC_IOBDMA && rx_count < (budget - 1)) {
+   if (rx_count < (budget - 1)) {
cvmx_pow_work_request_async_nocheck(CVMX_SCR_SCRATCH,
CVMX_POW_NO_WAIT);
did_work_request = 1;
@@ -400,10 +396,9 @@ static int cvm_oct_poll(struct oct_rx_group *rx_group, int 
budget)
cvmx_write_csr(CVMX_POW_PP_GRP_MSKX(coreid), old_group_mask);
}
 
-   if (USE_ASYNC_IOBDMA) {
-   /* Restore the scratch area */
-   cvmx_scratch_write64(CVMX_SCR_SCRATCH, old_scratch);
-   }
+   /* Restore the scratch area */
+   cvmx_scratch_write64(CVMX_SCR_SCRATCH, old_scratch);
+
cvm_oct_rx_refill_pool(0);
 
return rx_count;
diff --git a/drivers/staging/octeon/ethernet-tx.c 
b/drivers/staging/octeon/ethernet-tx.c
index df3441b815bb..2aa5fcb7ee32 100644
--- a/drivers/staging/octeon/ethernet-tx.c
+++ b/drivers/staging/octeon/ethernet-tx.c
@@ -176,23 +176,18 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device 
*dev)
qos = 0;
}
 
-   if (USE_ASYNC_IOBDMA) {
-   /* Save scratch in case userspace is using it */
-   CVMX_SYNCIOBDMA;
-   old_scratch = cvmx_scratch_read64(CVMX_SCR_SCRATCH);
-   old_scratch2 = cvmx_scratch_read64(CVMX_SCR_SCRATCH + 8);
-
-   /*
-* Fetch and increment the number of packets to be
-* freed.
-*/
-

[PATCH v8 1/4] MIPS: Octeon: Enable LMTDMA/LMTST operations.

2018-02-22 Thread David Daney
From: Carlos Munoz 

LMTDMA/LMTST operations move data between cores and I/O devices:

* LMTST operations can send an address and a variable length
  (up to 128 bytes) of data to an I/O device.
* LMTDMA operations can send an address and a variable length
  (up to 128) of data to the I/O device and then return a
  variable length (up to 128 bytes) response from the I/O device.

For both LMTST and LMTDMA, the data sent to the device is first stored
in the CVMSEG core local memory cache line indexed by
CVMMEMCTL[LMTLINE], the data is then atomically transmitted to the
device with a store to the CVMSEG LMTDMA trigger location.

Reviewed-by: James Hogan 
Signed-off-by: Carlos Munoz 
Signed-off-by: Steven J. Hill 
Signed-off-by: David Daney 
---
 arch/mips/cavium-octeon/setup.c   |  6 ++
 arch/mips/include/asm/octeon/octeon.h | 12 ++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index a8034d0dcade..99e6a68bc652 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -609,6 +609,12 @@ void octeon_user_io_init(void)
 #else
cvmmemctl.s.cvmsegenak = 0;
 #endif
+   if (OCTEON_IS_OCTEON3()) {
+   /* Enable LMTDMA */
+   cvmmemctl.s.lmtena = 1;
+   /* Scratch line to use for LMT operation */
+   cvmmemctl.s.lmtline = 2;
+   }
/* R/W If set, CVMSEG is available for loads/stores in
 * supervisor mode. */
cvmmemctl.s.cvmsegenas = 0;
diff --git a/arch/mips/include/asm/octeon/octeon.h 
b/arch/mips/include/asm/octeon/octeon.h
index c99c4b6a79f4..92a17d67c1fa 100644
--- a/arch/mips/include/asm/octeon/octeon.h
+++ b/arch/mips/include/asm/octeon/octeon.h
@@ -179,7 +179,15 @@ union octeon_cvmemctl {
/* RO 1 = BIST fail, 0 = BIST pass */
__BITFIELD_FIELD(uint64_t wbfbist:1,
/* Reserved */
-   __BITFIELD_FIELD(uint64_t reserved:17,
+   __BITFIELD_FIELD(uint64_t reserved_52_57:6,
+   /* When set, LMTDMA/LMTST operations are permitted */
+   __BITFIELD_FIELD(uint64_t lmtena:1,
+   /* Selects the CVMSEG LM cacheline used by LMTDMA
+* LMTST and wide atomic store operations.
+*/
+   __BITFIELD_FIELD(uint64_t lmtline:6,
+   /* Reserved */
+   __BITFIELD_FIELD(uint64_t reserved_41_44:4,
/* OCTEON II - TLB replacement policy: 0 = bitmask LRU; 1 = NLU.
 * This field selects between the TLB replacement policies:
 * bitmask LRU or NLU. Bitmask LRU maintains a mask of
@@ -275,7 +283,7 @@ union octeon_cvmemctl {
/* R/W Size of local memory in cache blocks, 54 (6912
 * bytes) is max legal value. */
__BITFIELD_FIELD(uint64_t lmemsz:6,
-   ;)
+   ;
} s;
 };
 
-- 
2.14.3



Re: [PATCH V7 2/4] sctp: Add ip option support

2018-02-22 Thread Paul Moore
On Wed, Feb 21, 2018 at 3:45 PM, Paul Moore  wrote:
> On February 21, 2018 9:33:51 AM Marcelo Ricardo Leitner 
>  wrote:
>> On Tue, Feb 20, 2018 at 07:15:27PM +, Richard Haines wrote:
>>> Add ip option support to allow LSM security modules to utilise CIPSO/IPv4
>>> and CALIPSO/IPv6 services.
>>>
>>> Signed-off-by: Richard Haines 
>>
>> LGTM too, thanks!
>>
>> Acked-by: Marcelo Ricardo Leitner 
>
> I agree, thanks everyone for all the work, review, and patience behind this 
> patchset!  I'll work on merging this into selinux/next and I'll send a note 
> when it's done.

I just merged the four patches (1,3,4 from the v6 patchset, 2 from the
v7 patchset) in selinux/next and did a quick sanity test on the kernel
(booted, no basic SELinux regressions).  Additional testing help is
always appreciated ...

* git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git
* https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git

-- 
paul moore
www.paul-moore.com


[PATCH v8 4/4] MIPS: Octeon: Add a global resource manager.

2018-02-22 Thread David Daney
From: Carlos Munoz 

Add a global resource manager to manage tagged pointers within
bootmem allocated memory. This is used by various functional
blocks in the Octeon core like the FPA, Ethernet nexus, etc.

Signed-off-by: Carlos Munoz 
Signed-off-by: Steven J. Hill 
Signed-off-by: David Daney 
---
 arch/mips/cavium-octeon/Makefile   |   1 +
 arch/mips/cavium-octeon/resource-mgr.c | 351 +
 arch/mips/include/asm/octeon/octeon.h  |  18 ++
 3 files changed, 370 insertions(+)
 create mode 100644 arch/mips/cavium-octeon/resource-mgr.c

diff --git a/arch/mips/cavium-octeon/Makefile b/arch/mips/cavium-octeon/Makefile
index 7c02e542959a..28c0bb75d1a4 100644
--- a/arch/mips/cavium-octeon/Makefile
+++ b/arch/mips/cavium-octeon/Makefile
@@ -10,6 +10,7 @@
 #
 
 obj-y := cpu.o setup.o octeon-platform.o octeon-irq.o csrc-octeon.o
+obj-y += resource-mgr.o
 obj-y += dma-octeon.o
 obj-y += octeon-memcpy.o
 obj-y += executive/
diff --git a/arch/mips/cavium-octeon/resource-mgr.c 
b/arch/mips/cavium-octeon/resource-mgr.c
new file mode 100644
index ..74efda5420ff
--- /dev/null
+++ b/arch/mips/cavium-octeon/resource-mgr.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Resource manager for Octeon.
+ *
+ * Copyright (C) 2017 Cavium, Inc.
+ */
+#include 
+
+#include 
+#include 
+
+#define RESOURCE_MGR_BLOCK_NAME"cvmx-global-resources"
+#define MAX_RESOURCES  128
+#define INST_AVAILABLE -88
+#define OWNER  0xbadc0de
+
+struct global_resource_entry {
+   struct global_resource_tag tag;
+   u64 phys_addr;
+   u64 size;
+};
+
+struct global_resources {
+#ifdef __LITTLE_ENDIAN_BITFIELD
+   u32 rlock;
+   u32 pad;
+#else
+   u32 pad;
+   u32 rlock;
+#endif
+   u64 entry_cnt;
+   struct global_resource_entry resource_entry[];
+};
+
+static struct global_resources *res_mgr_info;
+
+
+/*
+ * The resource manager interacts with software running outside of the
+ * Linux kernel, which necessitates locking to maintain data structure
+ * consistency.  These custom locking functions implement the locking
+ * protocol, and cannot be replaced by kernel locking functions that
+ * may use different in-memory structures.
+ */
+
+static void res_mgr_lock(void)
+{
+   while (cmpxchg(_mgr_info->rlock, 0, 1))
+   ; /* Loop while not zero */
+   rmb();
+}
+
+static void res_mgr_unlock(void)
+{
+   /* Wait until all resource operations finish before unlocking. */
+   wmb();
+   WRITE_ONCE(res_mgr_info->rlock, 0);
+   /* Force a write buffer flush. */
+   wmb();
+}
+
+static int res_mgr_find_resource(struct global_resource_tag tag)
+{
+   struct global_resource_entry *res_entry;
+   int i;
+
+   for (i = 0; i < res_mgr_info->entry_cnt; i++) {
+   res_entry = _mgr_info->resource_entry[i];
+   if (res_entry->tag.lo == tag.lo && res_entry->tag.hi == tag.hi)
+   return i;
+   }
+   return -1;
+}
+
+/**
+ * res_mgr_create_resource() - Create a resource.
+ * @tag: Identifies the resource.
+ * @inst_cnt: Number of resource instances to create.
+ *
+ * Returns 0 if the source was created successfully.
+ * Returns < 0 for error codes.
+ */
+int res_mgr_create_resource(struct global_resource_tag tag, int inst_cnt)
+{
+   struct global_resource_entry *res_entry;
+   u64 size;
+   u64 *res_addr;
+   int res_index, i, rc = 0;
+
+   res_mgr_lock();
+
+   /* Make sure resource doesn't already exist. */
+   res_index = res_mgr_find_resource(tag);
+   if (res_index >= 0) {
+   rc = -EEXIST;
+   goto err;
+   }
+
+   if (res_mgr_info->entry_cnt >= MAX_RESOURCES) {
+   pr_err("Resource max limit reached, not created\n");
+   rc = -ENOSPC;
+   goto err;
+   }
+
+   /*
+* Each instance is kept in an array of u64s. The first array element
+* holds the number of allocated instances.
+*/
+   size = sizeof(u64) * (inst_cnt + 1);
+   res_addr = cvmx_bootmem_alloc_range(size, CVMX_CACHE_LINE_SIZE, 0, 0);
+   if (!res_addr) {
+   pr_err("Failed to allocate resource. not created\n");
+   rc = -ENOMEM;
+   goto err;
+   }
+
+   /* Initialize the newly created resource. */
+   *res_addr = inst_cnt;
+   for (i = 1; i <= inst_cnt; i++)
+   res_addr[i] = INST_AVAILABLE;
+
+   res_index = res_mgr_info->entry_cnt;
+   res_entry = _mgr_info->resource_entry[res_index];
+   res_entry->tag = tag;
+   res_entry->phys_addr = virt_to_phys(res_addr);
+   res_entry->size = size;
+   res_mgr_info->entry_cnt++;
+
+err:
+   res_mgr_unlock();
+
+   return rc;
+}
+EXPORT_SYMBOL(res_mgr_create_resource);
+
+/**
+ * 

Re: [PATCH iproute2 net-next] ss: print skmeminfo for packet sockets

2018-02-22 Thread David Ahern
On 2/21/18 10:37 PM, Roopa Prabhu wrote:
> From: Roopa Prabhu 
> 
> before:
> $ss --packet -p -m
> p_raw0  0*:eth0
>   users:(("lldpd",pid=2240,fd=11))
> 
> after:
> $ss --packet -p -m
> p_raw0  0*:eth0
>   users:(("lldpd",pid=2240,fd=11))
>   skmem:(r0,rb266240,t0,tb266240,f0,w0,o320,bl0,d0)
> 
> Signed-off-by: Roopa Prabhu 
> ---
>  misc/ss.c | 3 +++
>  1 file changed, 3 insertions(+)

Applied to iproute2-next.


Re: [PATCH iproute2-next v1] rdma: Add batch command support

2018-02-22 Thread David Ahern
On 2/22/18 12:28 AM, Leon Romanovsky wrote:
> From: Leon Romanovsky 
> 
> Implement an option (-b) to execute RDMAtool commands
> from supplied file. This follows the same model as
> in use for ip and devlink tools, by expecting
> every new command to be on new line.
> 
> These commands are expected to be without any -*
> (e.g. -d, -j, e.t.c) global flags, which should be
> called externally.
> 
> Signed-off-by: Leon Romanovsky 
> ---
> 
> Changelog v0->v1:
>   * Used ARRAY_SIZE instead of hardcoded value as an input to makeargs()
> 
> David,
> 
> This patch is based on iproute2.git because iproute2-next doesn't
> have latest restrack code. The patch itself is completely independent
> from that code and is supposed to go to -next, but it has conflicts
> (manual page and help line).
> 
> Can you please merge iproute2 master into iproute2-next prior to
> applying this patch?
> 

Done. And applied to iproute2-next



Re: [PATCH net-next] rds: rds_msg_zcopy should return error of null rm->data.op_mmp_znotifier

2018-02-22 Thread Willem de Bruijn
On Thu, Feb 22, 2018 at 4:40 PM, Sowmini Varadhan
 wrote:
> if either or both of MSG_ZEROCOPY and SOCK_ZEROCOPY have not been
> specified, the rm->data.op_mmp_znotifier allocation will be skipped.
> In this case, it is invalid ot pass down a cmsghdr with
> RDS_CMSG_ZCOPY_COOKIE, so return EINVAL from rds_msg_zcopy for this
> case.
>
> Reported-by: syzbot+f893ae7bb2f6456df...@syzkaller.appspotmail.com
> Fixes: 0cebaccef3ac ("rds: zerocopy Tx support.")
> Signed-off-by: Sowmini Varadhan 

Acked-by: Willem de Bruijn 


Re: [PATCH] netlink: put module reference if dump start fails

2018-02-22 Thread Jason A. Donenfeld
Thanks!

Jason


RE: [PATCH v2 net-next 1/2] lan743x: Add main source files for new lan743x driver

2018-02-22 Thread Bryan.Whitehead
> On Thu, Feb 22, 2018 at 10:45:34PM +0100, Andrew Lunn wrote:
> > > Also I'm allocating interrupt resources on interface up, and freeing
> > > resources on interface down. So if there is an up, down, up sequence
> > > then the driver will allocate resources twice. In order for devm to
> > > work properly, should I move all resource allocation into the probe
> function?
> >
> > Hi Bryan
> >
> > It is better to fail early if the resource is not available, so yes, i
> > would register the interrupt handler in probe.
> 
> And we maintainers don't always agree with each other :-)
> 
> Doing irq handling in open/close without devm_ is also O.K.
> 
>Andrew

Thanks Andrew, and Florian,

Moving irq allocation and free, to probe and remove, will require a bit of 
refactoring and possibly introduce new issues. For now I will keep IRQ handling 
in open/close without devm_.

Other resource allocations are already in probe/remove so I will apply your 
suggestions in the next patch revision.

Thanks,
Bryan


Re: [PATCH v2 net-next 1/2] lan743x: Add main source files for new lan743x driver

2018-02-22 Thread Florian Fainelli
On 02/21/2018 11:06 AM, Bryan Whitehead wrote:
> Add main source files for new lan743x driver.
> 
> Signed-off-by: Bryan Whitehead 
> ---

> +lan743x-objs := lan743x_main.o

Should we assume that you have additional object files you would like to
contribute at a later point? If that is the case, this is fine, if this
is going to be only file of this driver, consider renaming it so you
don't even have to have this lan743x-objs line at all.

> diff --git a/drivers/net/ethernet/microchip/lan743x_main.c 
> b/drivers/net/ethernet/microchip/lan743x_main.c
> new file mode 100644
> index 000..3de39e1
> --- /dev/null
> +++ b/drivers/net/ethernet/microchip/lan743x_main.c
> @@ -0,0 +1,2757 @@
> +/*
> + * Copyright (C) 2018 Microchip Technology

You should consider the SPDX license tags to reduce the license
boilerplate standard disclaimer.

> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, see .
> + */
> +
> +#include "lan743x_main.h"

This is not ideal, having all your header dependencies resolved by a
main header file usually leads to unnecessary inclusions when fewer are
needed.

> +
> +static void lan743x_pci_cleanup(struct lan743x_adapter *adapter)
> +{
> + struct lan743x_pci *pci = >pci;
> +
> + if (pci->init_flags & INIT_FLAG_PCI_REGIONS_REQUESTED) {

There is a pattern throughout the driver of maintaining flags to track
what was initialized and what was not, do you really need that, or can
you check for specific book keeping private information. Maintaining
flags is error prone and requires you to keep adding new ones, that does
not really scale.

[snip]

> +static void lan743x_intr_software_isr(void *context)
> +{
> + struct lan743x_adapter *adapter = context;
> + struct lan743x_intr *intr = >intr;
> + u32 int_sts;
> +
> + int_sts = lan743x_csr_read(adapter, INT_STS);
> + if (int_sts & INT_BIT_SW_GP_) {
> + lan743x_csr_write(adapter, INT_STS, INT_BIT_SW_GP_);
> + intr->software_isr_flag = 1;
> + }
> +}
> +
> +static void lan743x_tx_isr(void *context, u32 int_sts, u32 flags)
> +{
> + struct lan743x_tx *tx = context;
> + struct lan743x_adapter *adapter = tx->adapter;
> + int enable_flag = 1;

This is inherently a boolean type.

> + u32 int_en = 0;
> +
> + int_en = lan743x_csr_read(adapter, INT_EN_SET);
> + if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR) {
> + lan743x_csr_write(adapter, INT_EN_CLR,
> +   INT_BIT_DMA_TX_(tx->channel_number));
> + }
> + if (int_sts & INT_BIT_DMA_TX_(tx->channel_number)) {
> + u32 ioc_bit = DMAC_INT_BIT_TX_IOC_(tx->channel_number);
> + u32 dmac_int_sts;
> + u32 dmac_int_en;
> +
> + if (flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ)
> + dmac_int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
> + else
> + dmac_int_sts = ioc_bit;
> + if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK)
> + dmac_int_en = lan743x_csr_read(adapter,
> +DMAC_INT_EN_SET);
> + else
> + dmac_int_en = ioc_bit;
> +
> + dmac_int_en &= ioc_bit;
> + dmac_int_sts &= dmac_int_en;
> + if (dmac_int_sts & ioc_bit) {
> + tasklet_schedule(>tx_isr_bottom_half);
> + enable_flag = 0;/* tasklet will re-enable later */
> + }

Consider migrating your TX buffer reclamation to a NAPI context. If you
have one TX queue and one RX, the same NAPI context can be re-used, if
you have separate RX/TX queues, you may create a NAPI context per RX/TX
pair, or you may create separate NAPI contexts per TX queues and RX queues.

> + }
> + if (enable_flag)
> + /* enable isr */
> + lan743x_csr_write(adapter, INT_EN_SET,
> +   INT_BIT_DMA_TX_(tx->channel_number));
> +}
> +
> +static void lan743x_rx_isr(void *context, u32 int_sts, u32 flags)
> +{
> + struct lan743x_rx *rx = context;
> + struct lan743x_adapter *adapter = rx->adapter;
> + int enable_flag = 1;
> +
> + if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR) {
> + lan743x_csr_write(adapter, INT_EN_CLR,
> +

Re: [RFC PATCH V2] virtio_pci: Add SR-IOV support

2018-02-22 Thread Rustad, Mark D
> On Feb 22, 2018, at 10:26 AM, Christoph Hellwig  wrote:
> 
> Can we move this into common code as a a generic_sriov_configure
> helper?  Nothing is really virtio specific, and it seems like
> some other drivers could also use it, e.g. ena or nvme.

That seems like a good idea to me, especially if PCI developers concur.

-- 
Mark Rustad, Networking Division, Intel Corporation



[PATCH net-next] rds: rds_msg_zcopy should return error of null rm->data.op_mmp_znotifier

2018-02-22 Thread Sowmini Varadhan
if either or both of MSG_ZEROCOPY and SOCK_ZEROCOPY have not been
specified, the rm->data.op_mmp_znotifier allocation will be skipped.
In this case, it is invalid ot pass down a cmsghdr with
RDS_CMSG_ZCOPY_COOKIE, so return EINVAL from rds_msg_zcopy for this
case.

Reported-by: syzbot+f893ae7bb2f6456df...@syzkaller.appspotmail.com
Fixes: 0cebaccef3ac ("rds: zerocopy Tx support.")
Signed-off-by: Sowmini Varadhan 
---
 net/rds/send.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/net/rds/send.c b/net/rds/send.c
index 028ab59..c848cbb 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -939,7 +939,8 @@ static int rds_cmsg_zcopy(struct rds_sock *rs, struct 
rds_message *rm,
 {
u32 *cookie;
 
-   if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)))
+   if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)) ||
+   !rm->data.op_mmp_znotifier)
return -EINVAL;
cookie = CMSG_DATA(cmsg);
rm->data.op_mmp_znotifier->z_cookie = *cookie;
-- 
1.7.1



[PATCH 2/2] arm: mvebu: 370-rd: Enable PHY interrupt handling

2018-02-22 Thread Andrew Lunn
The Ethernet switch has an embedded interrupt controller. Interrupts
from the embedded PHYs are part of this interrupt controller.
Explicitly list the MDIO bus the embedded PHYs are on, and wire up the
interrupts.

Signed-off-by: Andrew Lunn 
---
 arch/arm/boot/dts/armada-370-rd.dts | 32 
 1 file changed, 32 insertions(+)

diff --git a/arch/arm/boot/dts/armada-370-rd.dts 
b/arch/arm/boot/dts/armada-370-rd.dts
index 8b2fa9a49967..c28afb242393 100644
--- a/arch/arm/boot/dts/armada-370-rd.dts
+++ b/arch/arm/boot/dts/armada-370-rd.dts
@@ -56,6 +56,7 @@
 
 /dts-v1/;
 #include 
+#include 
 #include 
 #include "armada-370.dtsi"
 
@@ -243,6 +244,8 @@
#address-cells = <1>;
#size-cells = <0>;
reg = <0x10>;
+   interrupt-controller;
+   #interrupt-cells = <2>;
 
ports {
#address-cells = <1>;
@@ -278,6 +281,35 @@
};
};
};
+
+   mdio {
+   #address-cells = <1>;
+   #size-cells = <0>;
+
+   switchphy0: switchphy@0 {
+   reg = <0>;
+   interrupt-parent = <>;
+   interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+   };
+
+   switchphy1: switchphy@1 {
+   reg = <1>;
+   interrupt-parent = <>;
+   interrupts = <1 IRQ_TYPE_LEVEL_HIGH>;
+   };
+
+   switchphy2: switchphy@2 {
+   reg = <2>;
+   interrupt-parent = <>;
+   interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+   };
+
+   switchphy3: switchphy@3 {
+   reg = <3>;
+   interrupt-parent = <>;
+   interrupts = <3 IRQ_TYPE_LEVEL_HIGH>;
+   };
+   };
};
 };
 
-- 
2.15.1



[PATCH 0/2] mv88e6xxx: Poll when no interrupt defined

2018-02-22 Thread Andrew Lunn
Not all boards using the mv88e6xxx switches have the interrupt output
connected to a GPIO. On these boards phylib has to poll the PHYs,
rather than use interrupts. Have the driver poll the interrupt status
register, which is more efficient than having phylib do it. And it
enables other switch interrupts to be services.

The Armada 370RD is such a board without a interrupt GPIO. Now that
interrupts work, wire up the PHYs to make use if them.

Gregory: Are you O.K. for the second patch to go through netdev?

Andrew Lunn (2):
  net: dsa: mv88e6xxx: Poll when no interrupt defined
  arm: mvebu: 370-rd: Enable PHY interrupt handling

 arch/arm/boot/dts/armada-370-rd.dts |  32 
 drivers/net/dsa/mv88e6xxx/chip.c| 146 +---
 drivers/net/dsa/mv88e6xxx/chip.h|   3 +
 3 files changed, 138 insertions(+), 43 deletions(-)

-- 
2.15.1



[PATCH 1/2] net: dsa: mv88e6xxx: Poll when no interrupt defined

2018-02-22 Thread Andrew Lunn
Not all boards have the interrupt output from the switch connected to
a GPIO line. In such cases, phylib has to poll the internal PHYs,
rather than receive an interrupt when there is a change in the link
state. phylib polls once per second, and per PHY reads around 4
words. With a switch typically having 4 internal PHYs, this means 16
MDIO transactions per second.

Rather than performing this phylib level polling, have the driver poll
the interrupt status register. If the status register indicates an
interrupt condition processing of interrupts in the same way as if a
GPIO was used.

Polling 10 times a second places less load on the MDIO bus. But rather
than taking on average 0.5s to detect a link change, it takes less
than 0.05s. Additionally, other interrupts, such as the watchdog, ATU
and VTU violations will be reported.

Signed-off-by: Andrew Lunn 
---
 drivers/net/dsa/mv88e6xxx/chip.c | 146 +++
 drivers/net/dsa/mv88e6xxx/chip.h |   3 +
 2 files changed, 106 insertions(+), 43 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index e1b5c5c66fce..24486f96dd39 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -253,9 +253,8 @@ static void mv88e6xxx_g1_irq_unmask(struct irq_data *d)
chip->g1_irq.masked &= ~(1 << n);
 }
 
-static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip)
 {
-   struct mv88e6xxx_chip *chip = dev_id;
unsigned int nhandled = 0;
unsigned int sub_irq;
unsigned int n;
@@ -280,6 +279,13 @@ static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, 
void *dev_id)
return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
 }
 
+static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+{
+   struct mv88e6xxx_chip *chip = dev_id;
+
+   return mv88e6xxx_g1_irq_thread_work(chip);
+}
+
 static void mv88e6xxx_g1_irq_bus_lock(struct irq_data *d)
 {
struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d);
@@ -335,7 +341,7 @@ static const struct irq_domain_ops 
mv88e6xxx_g1_irq_domain_ops = {
.xlate  = irq_domain_xlate_twocell,
 };
 
-static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free_common(struct mv88e6xxx_chip *chip)
 {
int irq, virq;
u16 mask;
@@ -344,8 +350,6 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip 
*chip)
mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
 
-   free_irq(chip->irq, chip);
-
for (irq = 0; irq < chip->g1_irq.nirqs; irq++) {
virq = irq_find_mapping(chip->g1_irq.domain, irq);
irq_dispose_mapping(virq);
@@ -354,7 +358,14 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip 
*chip)
irq_domain_remove(chip->g1_irq.domain);
 }
 
-static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+{
+   mv88e6xxx_g1_irq_free(chip);
+
+   free_irq(chip->irq, chip);
+}
+
+static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
 {
int err, irq, virq;
u16 reg, mask;
@@ -387,13 +398,6 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip 
*chip)
if (err)
goto out_disable;
 
-   err = request_threaded_irq(chip->irq, NULL,
-  mv88e6xxx_g1_irq_thread_fn,
-  IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
-  dev_name(chip->dev), chip);
-   if (err)
-   goto out_disable;
-
return 0;
 
 out_disable:
@@ -411,6 +415,62 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip 
*chip)
return err;
 }
 
+static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+{
+   int err;
+
+   err = mv88e6xxx_g1_irq_setup_common(chip);
+   if (err)
+   return err;
+
+   err = request_threaded_irq(chip->irq, NULL,
+  mv88e6xxx_g1_irq_thread_fn,
+  IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
+  dev_name(chip->dev), chip);
+   if (err)
+   mv88e6xxx_g1_irq_free_common(chip);
+
+   return err;
+}
+
+static void mv88e6xxx_irq_poll(struct kthread_work *work)
+{
+   struct mv88e6xxx_chip *chip = container_of(work,
+  struct mv88e6xxx_chip,
+  irq_poll_work.work);
+   mv88e6xxx_g1_irq_thread_work(chip);
+
+   kthread_queue_delayed_work(chip->kworker, >irq_poll_work,
+  msecs_to_jiffies(100));
+}
+
+static int mv88e6xxx_irq_poll_setup(struct mv88e6xxx_chip *chip)
+{
+   int err;
+
+   err = 

Re: [patch net-next] mlxsw: spectrum_switchdev: Allow port enslavement to a VLAN-unaware bridge

2018-02-22 Thread David Ahern
On 2/22/18 1:55 PM, Ido Schimmel wrote:
> On Thu, Feb 22, 2018 at 12:27:35PM -0700, David Ahern wrote:
>> Ido:
>>
>> IPv4 works at boot; IPv6 requires the mcast snooping disable. For this
>> vlan-unaware bridges can that be set automatically?
> 
> Can you please try the following patch?
> 
...
> 
> It should fix your problem.

it does.

> 
> The real problem that I can then address in net-next is the fact that
> the Linux bridge tries to be smart and only resorts to flooding
> unregistered multicast packets in case its querier is disabled and in
> case it didn't detect any other querier in the network. This isn't
> currently reflected to underlying drivers. Only mcast snooping on/off.
> 
> Anyway, it's not related to the patch in question. You'd get the same
> behavior with VLAN-aware bridges.
> 
>> And then, what are the options for lldp?
> 
> Didn't understand the question. Can you clarify?
> 

nm. mental lapse.


Re: [PATCH v2 net-next 1/2] lan743x: Add main source files for new lan743x driver

2018-02-22 Thread Andrew Lunn
On Thu, Feb 22, 2018 at 10:45:34PM +0100, Andrew Lunn wrote:
> > Also I'm allocating interrupt resources on interface up, and freeing 
> > resources
> > on interface down. So if there is an up, down, up sequence then the driver
> > will allocate resources twice. In order for devm to work properly, should I
> > move all resource allocation into the probe function?
> 
> Hi Bryan
> 
> It is better to fail early if the resource is not available, so yes, i
> would register the interrupt handler in probe.

And we maintainers don't always agree with each other :-)

Doing irq handling in open/close without devm_ is also O.K.

 Andrew


Re: [PATCH v2 net-next 1/2] lan743x: Add main source files for new lan743x driver

2018-02-22 Thread Andrew Lunn
> Also I'm allocating interrupt resources on interface up, and freeing resources
> on interface down. So if there is an up, down, up sequence then the driver
> will allocate resources twice. In order for devm to work properly, should I
> move all resource allocation into the probe function?

Hi Bryan

It is better to fail early if the resource is not available, so yes, i
would register the interrupt handler in probe.

  Andrew


Re: [PATCH v2 net-next 1/2] lan743x: Add main source files for new lan743x driver

2018-02-22 Thread Florian Fainelli
On 02/22/2018 01:31 PM, bryan.whiteh...@microchip.com wrote:
>>> +static void lan743x_intr_unregister_isr(struct lan743x_adapter *adapter,
>>> +   int vector_index)
>>> +{
>>> +   struct lan743x_vector *vector = >intr.vector_list
>>> +   [vector_index];
>>> +
>>> +   devm_free_irq(>pci.pdev->dev, vector->irq, vector);
>>
>> Hu Bryan
>>
>> The point of devm_ is that you don't need to free resources you have
>> allocated using devm_. The core will release them when the device is
>> removed.
> 
> Hi Andrew,
> 
> When I remove the call devm_free_irq, I get a segmentation fault on close
> in pci_disable_msix. Did I do something else wrong?
> 
> Also I'm allocating interrupt resources on interface up, and freeing resources
> on interface down. So if there is an up, down, up sequence then the driver
> will allocate resources twice. In order for devm to work properly, should I
> move all resource allocation into the probe function?

No, most network drivers request their interrupt line in the open
function and free it in the close function. Because you are balancing
each devm_request_irq() with a devm_free_irq(), just don't just devm_*
functions, just the normal request_irq() and free_irq() functions.
-- 
Florian


RE: [PATCH v2 net-next 1/2] lan743x: Add main source files for new lan743x driver

2018-02-22 Thread Bryan.Whitehead
> > +static void lan743x_intr_unregister_isr(struct lan743x_adapter *adapter,
> > +   int vector_index)
> > +{
> > +   struct lan743x_vector *vector = >intr.vector_list
> > +   [vector_index];
> > +
> > +   devm_free_irq(>pci.pdev->dev, vector->irq, vector);
> 
> Hu Bryan
> 
> The point of devm_ is that you don't need to free resources you have
> allocated using devm_. The core will release them when the device is
> removed.

Hi Andrew,

When I remove the call devm_free_irq, I get a segmentation fault on close
in pci_disable_msix. Did I do something else wrong?

Also I'm allocating interrupt resources on interface up, and freeing resources
on interface down. So if there is an up, down, up sequence then the driver
will allocate resources twice. In order for devm to work properly, should I
move all resource allocation into the probe function?

Bryan


Re: [RFC PATCH v3 0/3] Enable virtio_net to act as a backup for a passthru device

2018-02-22 Thread Alexander Duyck
On Thu, Feb 22, 2018 at 12:11 AM, Jiri Pirko  wrote:
> Wed, Feb 21, 2018 at 09:57:09PM CET, alexander.du...@gmail.com wrote:
>>On Wed, Feb 21, 2018 at 11:38 AM, Jiri Pirko  wrote:
>>> Wed, Feb 21, 2018 at 06:56:35PM CET, alexander.du...@gmail.com wrote:
On Wed, Feb 21, 2018 at 8:58 AM, Jiri Pirko  wrote:
> Wed, Feb 21, 2018 at 05:49:49PM CET, alexander.du...@gmail.com wrote:
>>On Wed, Feb 21, 2018 at 8:11 AM, Jiri Pirko  wrote:
>>> Wed, Feb 21, 2018 at 04:56:48PM CET, alexander.du...@gmail.com wrote:
On Wed, Feb 21, 2018 at 1:51 AM, Jiri Pirko  wrote:
> Tue, Feb 20, 2018 at 11:33:56PM CET, kubak...@wp.pl wrote:
>>On Tue, 20 Feb 2018 21:14:10 +0100, Jiri Pirko wrote:
>>> Yeah, I can see it now :( I guess that the ship has sailed and we 
>>> are
>>> stuck with this ugly thing forever...
>>>
>>> Could you at least make some common code that is shared in between
>>> netvsc and virtio_net so this is handled in exacly the same way in 
>>> both?
>>
>>IMHO netvsc is a vendor specific driver which made a mistake on what
>>behaviour it provides (or tried to align itself with Windows SR-IOV).
>>Let's not make a far, far more commonly deployed and important driver
>>(virtio) bug-compatible with netvsc.
>
> Yeah. netvsc solution is a dangerous precedent here and in my 
> opinition
> it was a huge mistake to merge it. I personally would vote to unmerge 
> it
> and make the solution based on team/bond.
>
>
>>
>>To Jiri's initial comments, I feel the same way, in fact I've talked 
>>to
>>the NetworkManager guys to get auto-bonding based on MACs handled in
>>user space.  I think it may very well get done in next versions of NM,
>>but isn't done yet.  Stephen also raised the point that not everybody 
>>is
>>using NM.
>
> Can be done in NM, networkd or other network management tools.
> Even easier to do this in teamd and let them all benefit.
>
> Actually, I took a stab to implement this in teamd. Took me like an 
> hour
> and half.
>
> You can just run teamd with config option "kidnap" like this:
> # teamd/teamd -c '{"kidnap": true }'
>
> Whenever teamd sees another netdev to appear with the same mac as his,
> or whenever teamd sees another netdev to change mac to his,
> it enslaves it.
>
> Here's the patch (quick and dirty):
>
> Subject: [patch teamd] teamd: introduce kidnap feature
>
> Signed-off-by: Jiri Pirko 

So this doesn't really address the original problem we were trying to
solve. You asked earlier why the netdev name mattered and it mostly
has to do with configuration. Specifically what our patch is
attempting to resolve is the issue of how to allow a cloud provider to
upgrade their customer to SR-IOV support and live migration without
requiring them to reconfigure their guest. So the general idea with
our patch is to take a VM that is running with virtio_net only and
allow it to instead spawn a virtio_bypass master using the same netdev
name as the original virtio, and then have the virtio_net and VF come
up and be enslaved by the bypass interface. Doing it this way we can
allow for multi-vendor SR-IOV live migration support using a guest
that was originally configured for virtio only.

The problem with your solution is we already have teaming and bonding
as you said. There is already a write-up from Red Hat on how to do it
(https://access.redhat.com/documentation/en-us/red_hat_virtualization/4.1/html/virtual_machine_management_guide/sect-migrating_virtual_machines_between_hosts).
That is all well and good as long as you are willing to keep around
two VM images, one for virtio, and one for SR-IOV with live migration.
>>>
>>> You don't need 2 images. You need only one. The one with the team setup.
>>> That's it. If another netdev with the same mac appears, teamd will
>>> enslave it and run traffic on it. If not, ok, you'll go only through
>>> virtio_net.
>>
>>Isn't that going to cause the routing table to get messed up when we
>>rearrange the netdevs? We don't want to have an significant disruption
>> in traffic when we are adding/removing the VF. It seems like we would
>>need to invalidate any entries that were configured for the virtio_net
>>and reestablish them on the new team interface. Part of the criteria
>>we have been working with is that we should be able to transition from

Re: [PATCH v2 iproute2-next 2/3] ip: Display ip rule protocol used

2018-02-22 Thread David Ahern
On 2/21/18 7:12 PM, Donald Sharp wrote:
> diff --git a/ip/iprule.c b/ip/iprule.c
> index 00a6c26a..39008768 100644
> --- a/ip/iprule.c
> +++ b/ip/iprule.c
> @@ -47,6 +47,7 @@ static void usage(void)
>   "[ iif STRING ] [ oif STRING ] [ pref NUMBER ] [ 
> l3mdev ]\n"
>   "[ uidrange NUMBER-NUMBER ]\n"
>   "ACTION := [ table TABLE_ID ]\n"
> + "  [ protocol RPROTO ]\n"

Drop the 'R' makes it harder to read; just 'PROTO' is fine.


>   "  [ nat ADDRESS ]\n"
>   "  [ realms [SRCREALM/]DSTREALM ]\n"
>   "  [ goto NUMBER ]\n"
> @@ -71,6 +72,8 @@ static struct
>   struct fib_rule_uid_range range;
>   inet_prefix src;
>   inet_prefix dst;
> + int protocol;
> + int protocolmask;
>  } filter;
>  
>  static inline int frh_get_table(struct fib_rule_hdr *frh, struct rtattr **tb)
> @@ -338,6 +341,10 @@ int print_rule(const struct sockaddr_nl *who, struct 
> nlmsghdr *n, void *arg)
>   rtnl_rtntype_n2a(frh->action,
>b1, sizeof(b1)));
>  
> + if (frh->proto != RTPROT_UNSPEC)
> + fprintf(fp, " proto %s ",
> + rtnl_rtprot_n2a(frh->proto, b1, sizeof(b1)));
> +
>   fprintf(fp, "\n");
>   fflush(fp);
>   return 0;
> @@ -391,6 +398,9 @@ static int flush_rule(const struct sockaddr_nl *who, 
> struct nlmsghdr *n,
>  
>   parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
>  
> + if ((filter.protocol^frh->proto))
> + return 0;
> +
>   if (tb[FRA_PRIORITY]) {
>   n->nlmsg_type = RTM_DELRULE;
>   n->nlmsg_flags = NLM_F_REQUEST;
> @@ -415,12 +425,6 @@ static int iprule_list_flush_or_save(int argc, char 
> **argv, int action)
>   if (af == AF_UNSPEC)
>   af = AF_INET;
>  
> - if (action != IPRULE_LIST && argc > 0) {
> - fprintf(stderr, "\"ip rule %s\" does not take any arguments.\n",
> - action == IPRULE_SAVE ? "save" : "flush");
> - return -1;
> - }
> -
>   switch (action) {
>   case IPRULE_SAVE:
>   if (save_rule_prep())
> @@ -508,7 +512,18 @@ static int iprule_list_flush_or_save(int argc, char 
> **argv, int action)
>   NEXT_ARG();
>   if (get_prefix(, *argv, af))
>   invarg("from value is invalid\n", *argv);
> - } else {
> + } else if (matches(*argv, "protocol") == 0) {
> + __u32 prot;
> + NEXT_ARG();
> + filter.protocolmask = -1;
> + if (rtnl_rtprot_a2n(, *argv)) {
> + if (strcmp(*argv, "all") != 0)
> + invarg("invalid \"protocol\"\n", *argv);
> + prot = 0;
> + filter.protocolmask = 0;
> + }
> + filter.protocol = prot;
> + } else{
>   if (matches(*argv, "dst") == 0 ||
>   matches(*argv, "to") == 0) {
>   NEXT_ARG();
> diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8
> index a5c47981..98b2573d 100644
> --- a/man/man8/ip-rule.8
> +++ b/man/man8/ip-rule.8
> @@ -50,6 +50,8 @@ ip-rule \- routing policy database management
>  .IR ACTION " := [ "
>  .B  table
>  .IR TABLE_ID " ] [ "
> +.B  protocol
> +.IR RPROTO " ] [ "

same here and others in this file





Re: [patch net-next] mlxsw: spectrum_switchdev: Allow port enslavement to a VLAN-unaware bridge

2018-02-22 Thread Ido Schimmel
On Thu, Feb 22, 2018 at 12:27:35PM -0700, David Ahern wrote:
> Ido:
> 
> IPv4 works at boot; IPv6 requires the mcast snooping disable. For this
> vlan-unaware bridges can that be set automatically?

Can you please try the following patch?

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
index bbd238e50f05..54262af4e98f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
@@ -112,11 +112,11 @@ static const int 
mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
[MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP] = 1,
[MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL]   = 1,
[MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST] = 1,
+   [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6]   = 1,
 };
 
 static const int mlxsw_sp_sfgc_mc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
[MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4]   = 1,
-   [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6]   = 1,
 };
 
 static const int *mlxsw_sp_packet_type_sfgc_types[] = {

It should fix your problem.

The real problem that I can then address in net-next is the fact that
the Linux bridge tries to be smart and only resorts to flooding
unregistered multicast packets in case its querier is disabled and in
case it didn't detect any other querier in the network. This isn't
currently reflected to underlying drivers. Only mcast snooping on/off.

Anyway, it's not related to the patch in question. You'd get the same
behavior with VLAN-aware bridges.

> And then, what are the options for lldp?

Didn't understand the question. Can you clarify?


Re: [PATCH] dsa: ptp; mark dummy helpers as 'inline'

2018-02-22 Thread David Miller
From: Arnd Bergmann 
Date: Thu, 22 Feb 2018 12:44:40 +0100

> Declaring a static function in a header leads to a warning every
> time that header gets included without the function being used:
> 
> In file included from drivers/net/dsa/mv88e6xxx/chip.c:42:
> drivers/net/dsa/mv88e6xxx/ptp.h:92:13: error: 'mv88e6xxx_hwtstamp_work' 
> defined but not used [-Werror=unused-function]
>  static long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
> In file included from drivers/net/dsa/mv88e6xxx/chip.c:38:
> drivers/net/dsa/mv88e6xxx/global2.h:355:12: error: 'mv88e6xxx_g2_wait' 
> defined but not used [-Werror=unused-function]
>  static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
> ^
> drivers/net/dsa/mv88e6xxx/global2.h:350:12: error: 'mv88e6xxx_g2_update' 
> defined but not used [-Werror=unused-function]
>  static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 
> update)
> ^~~
> drivers/net/dsa/mv88e6xxx/global2.h:345:12: error: 'mv88e6xxx_g2_write' 
> defined but not used [-Werror=unused-function]
>  static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
> ^~
> drivers/net/dsa/mv88e6xxx/global2.h:340:12: error: 'mv88e6xxx_g2_read' 
> defined but not used [-Werror=unused-function]
>  static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
> 
> This marks all such functions in dsa inline to make sure we don't warn
> about them.
> 
> Fixes: c6fe0ad2c349 ("net: dsa: mv88e6xxx: add rx/tx timestamping support")
> Fixes: 0d632c3d6fe3 ("net: dsa: mv88e6xxx: add accessors for PTP/TAI 
> registers")
> Signed-off-by: Arnd Bergmann 

Applied, thanks Arnd.


[PATCH net-next] r8169: simplify and improve check for dash

2018-02-22 Thread Heiner Kallweit
r8168_check_dash() returns false anyway for all chip versions not
supporting dash. So we can simplify the check conditions.

In addition change the check functions to return bool instead of int,
because they actually return a bool value.

Signed-off-by: Heiner Kallweit 
---
 drivers/net/ethernet/realtek/r8169.c | 39 +---
 1 file changed, 9 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c 
b/drivers/net/ethernet/realtek/r8169.c
index 91a03d575..96db3283e 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -1472,19 +1472,19 @@ static void rtl8168_driver_stop(struct rtl8169_private 
*tp)
}
 }
 
-static int r8168dp_check_dash(struct rtl8169_private *tp)
+static bool r8168dp_check_dash(struct rtl8169_private *tp)
 {
u16 reg = rtl8168_get_ocp_reg(tp);
 
-   return (ocp_read(tp, 0x0f, reg) & 0x8000) ? 1 : 0;
+   return !!(ocp_read(tp, 0x0f, reg) & 0x8000);
 }
 
-static int r8168ep_check_dash(struct rtl8169_private *tp)
+static bool r8168ep_check_dash(struct rtl8169_private *tp)
 {
-   return (ocp_read(tp, 0x0f, 0x128) & 0x0001) ? 1 : 0;
+   return !!(ocp_read(tp, 0x0f, 0x128) & 0x0001);
 }
 
-static int r8168_check_dash(struct rtl8169_private *tp)
+static bool r8168_check_dash(struct rtl8169_private *tp)
 {
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_27:
@@ -1496,7 +1496,7 @@ static int r8168_check_dash(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_51:
return r8168ep_check_dash(tp);
default:
-   return 0;
+   return false;
}
 }
 
@@ -4982,15 +4982,8 @@ static void r8168_pll_power_down(struct rtl8169_private 
*tp)
 {
void __iomem *ioaddr = tp->mmio_addr;
 
-   if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-   r8168_check_dash(tp)) {
+   if (r8168_check_dash(tp))
return;
-   }
 
if ((tp->mac_version == RTL_GIGA_MAC_VER_23 ||
 tp->mac_version == RTL_GIGA_MAC_VER_24) &&
@@ -8202,15 +8195,8 @@ static void rtl_remove_one(struct pci_dev *pdev)
struct net_device *dev = pci_get_drvdata(pdev);
struct rtl8169_private *tp = netdev_priv(dev);
 
-   if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-   r8168_check_dash(tp)) {
+   if (r8168_check_dash(tp))
rtl8168_driver_stop(tp);
-   }
 
netif_napi_del(>napi);
 
@@ -8640,15 +8626,8 @@ static int rtl_init_one(struct pci_dev *pdev, const 
struct pci_device_id *ent)
   rtl_chip_infos[chipset].jumbo_tx_csum ? "ok" : "ko");
}
 
-   if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-   r8168_check_dash(tp)) {
+   if (r8168_check_dash(tp))
rtl8168_driver_start(tp);
-   }
 
netif_carrier_off(dev);
 
-- 
2.16.2



Re: [PATCH net] net: aquantia: Fix error handling in aq_pci_probe()

2018-02-22 Thread David Miller
From: Dan Carpenter 
Date: Thu, 22 Feb 2018 12:11:55 +0300

> We should check "self->aq_hw" for allocation failure, and also we should
> free it on the error paths.
> 
> Fixes: 23ee07ad3c2f ("net: aquantia: Cleanup pci functions module")
> Signed-off-by: Dan Carpenter 

Applied, thanks Dan.


Re: [PATCH] bpf: add schedule points in percpu arrays management

2018-02-22 Thread Daniel Borkmann
[ +Dennis for mm/pcpu ]

On 02/22/2018 05:33 PM, Eric Dumazet wrote:
> From: Eric Dumazet 
> 
> syszbot managed to trigger RCU detected stalls in
> bpf_array_free_percpu()
> 
> It takes time to allocate a huge percpu map, but even more time to free
> it.
> 
> Since we run in process context, use cond_resched() to yield cpu if
> needed.
> 
> Fixes: a10423b87a7e ("bpf: introduce BPF_MAP_TYPE_PERCPU_ARRAY map")
> Signed-off-by: Eric Dumazet 
> Reported-by: syzbot 

Applied to bpf tree, thanks Eric!

> ---
>  kernel/bpf/arraymap.c |5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index 
> a364c408f25a54a8175c92b6004a5e7e15f198cb..14750e7c5ee4872e4a7426e960bea7ae001e6623
>  100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -26,8 +26,10 @@ static void bpf_array_free_percpu(struct bpf_array *array)
>  {
>   int i;
>  
> - for (i = 0; i < array->map.max_entries; i++)
> + for (i = 0; i < array->map.max_entries; i++) {
>   free_percpu(array->pptrs[i]);
> + cond_resched();
> + }
>  }
>  
>  static int bpf_array_alloc_percpu(struct bpf_array *array)
> @@ -43,6 +45,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
>   return -ENOMEM;
>   }
>   array->pptrs[i] = ptr;
> + cond_resched();
>   }
>  
>   return 0;
> 



Re: [PATCH bpf v2] bpf: fix rcu lockdep warning for lpm_trie map_free callback

2018-02-22 Thread Daniel Borkmann
On 02/22/2018 07:10 PM, Yonghong Song wrote:
> Commit 9a3efb6b661f ("bpf: fix memory leak in lpm_trie map_free callback 
> function")
> fixed a memory leak and removed unnecessary locks in map_free callback 
> function.
> Unfortrunately, it introduced a lockdep warning. When lockdep checking is 
> turned on,
> running tools/testing/selftests/bpf/test_lpm_map will have:
> 
>   [   98.294321] =
>   [   98.294807] WARNING: suspicious RCU usage
>   [   98.295359] 4.16.0-rc2+ #193 Not tainted
>   [   98.295907] -
>   [   98.296486] /home/yhs/work/bpf/kernel/bpf/lpm_trie.c:572 suspicious 
> rcu_dereference_check() usage!
>   [   98.297657]
>   [   98.297657] other info that might help us debug this:
>   [   98.297657]
>   [   98.298663]
>   [   98.298663] rcu_scheduler_active = 2, debug_locks = 1
>   [   98.299536] 2 locks held by kworker/2:1/54:
>   [   98.300152]  #0:  ((wq_completion)"events"){+.+.}, at: 
> [<196bc1f0>] process_one_work+0x157/0x5c0
>   [   98.301381]  #1:  ((work_completion)(>work)){+.+.}, at: 
> [<196bc1f0>] process_one_work+0x157/0x5c0
> 
> Since actual trie tree removal happens only after no other
> accesses to the tree are possible, replacing
>   rcu_dereference_protected(*slot, lockdep_is_held(>lock))
> with
>   rcu_dereference_protected(*slot, 1)
> fixed the issue.
> 
> Fixes: 9a3efb6b661f ("bpf: fix memory leak in lpm_trie map_free callback 
> function")
> Reported-by: Eric Dumazet 
> Suggested-by: Eric Dumazet 
> Signed-off-by: Yonghong Song 

Applied to bpf tree, thanks everyone!


Re: [PATCH net-next 0/3] nfp: build and FW initramfs updates

2018-02-22 Thread David Miller
From: Jakub Kicinski 
Date: Wed, 21 Feb 2018 19:50:04 -0800

> This set brings empty makefiles to allow building single object files
> (useful for build-testing), Kbuild does not cater to this use case
> too well.  There are two ethernet drivers right now which suffer
> from this (nfp, aquantia), both are fixed.
> 
> Dirk adds an uncommon FW image name to the list of firmware files
> module may request.

Series applied, thanks for following up on this Jakub.


[PATCH net-next] r8169: disable WOL per default

2018-02-22 Thread Heiner Kallweit
Currently, if BIOS enables WOL in the chip, settings are inconsistent
because the device isn't marked as wakeup-enabled (if not done
explicitly via userspace tools). This causes issues with suspend/
resume because mdio_bus_phy_may_suspend() checks whether device is
wakeup-enabled. In detail MDIO bus access in phy_suspend() can fail
because the MDIO bus is disabled.

In the history of the driver we find two competing approaches:
8f9d5138035d "r8169: remember WOL preferences on driver load" prefers
to preserve what the BIOS may have set, whilst bde135a672bf
"r8169: only enable PCI wakeups when WOL is active" disabled PCI
wakeup per default to work around a bug on one platform.

Seems like nobody complained after the latter patch about non-working
WOL, what makes me think that nobody uses WOL w/o configuring it
explicitly.

My opinion:
Vast majority of users doesn't use WOL even if the BIOS enables it in
the chip. And having WOL being active keeps the PHY(s) from powering
down if being idle.
If somebody needs WOL, he can enable it during boot, e.g. by
configuring systemd.link/WakeOnLan.

Therefore, to make WOL consistent again, disable it per default.

Signed-off-by: Heiner Kallweit 
---
 drivers/net/ethernet/realtek/r8169.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c 
b/drivers/net/ethernet/realtek/r8169.c
index c16b97a56..91a03d575 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -8512,11 +8512,12 @@ static int rtl_init_one(struct pci_dev *pdev, const 
struct pci_device_id *ent)
tp->txd_version = rtl_chip_infos[chipset].txd_version;
 
RTL_W8(Cfg9346, Cfg9346_Unlock);
-   RTL_W8(Config1, RTL_R8(Config1) | PMEnable);
-   RTL_W8(Config5, RTL_R8(Config5) & (BWF | MWF | UWF | LanWake | 
PMEStatus));
tp->features |= rtl_try_msi(tp, cfg);
RTL_W8(Cfg9346, Cfg9346_Lock);
 
+   /* override BIOS settings, use userspace tools to enable WOL */
+   __rtl8169_set_wol(tp, 0);
+
if (rtl_tbi_enabled(tp)) {
tp->set_speed = rtl8169_set_speed_tbi;
tp->get_link_ksettings = rtl8169_get_link_ksettings_tbi;
-- 
2.16.2



Re: pull-request: mac80211-next 2018-02-22

2018-02-22 Thread Johannes Berg
On Thu, 2018-02-22 at 15:19 -0500, David Miller wrote:
> From: Johannes Berg 
> Date: Thu, 22 Feb 2018 21:16:18 +0100
> 
> > Wireless is slow ... but we're preparing for HE (802.11ax),
> > so I guess soon we'll have a big chunk of work coming :-)
> 
> I wondered where you guys have been hiding :-)

Yeah, I don't like this development model much, but the spec isn't
finished yet and every time I look at an area I end up changing it
*again* which isn't fun to do upstream :-)

(Was just doing the HE sniffer stuff in radiotap these days ... uh,
yeah, I've more or less rewritten it twice already - hopefully no more)

johannes


Re: pull-request: mac80211-next 2018-02-22

2018-02-22 Thread David Miller
From: Johannes Berg 
Date: Thu, 22 Feb 2018 21:16:18 +0100

> Wireless is slow ... but we're preparing for HE (802.11ax),
> so I guess soon we'll have a big chunk of work coming :-)

I wondered where you guys have been hiding :-)

> Please pull and let me know if there's any problem.

Pulled, thank you!


Re: pull-request: mac80211 2018-02-22

2018-02-22 Thread David Miller
From: Johannes Berg 
Date: Thu, 22 Feb 2018 21:08:39 +0100

> A bunch of fixes, including the nla_put_string() issue
> just in from Kees. Otherwise nothing really super urgent
> or interesting.
> 
> Please pull and let me know if there's any problem.

Pulled.

Thanks for taking care of that NLA_STRING thing so fast.


pull-request: mac80211-next 2018-02-22

2018-02-22 Thread Johannes Berg
Hi Dave,

Wireless is slow ... but we're preparing for HE (802.11ax),
so I guess soon we'll have a big chunk of work coming :-)

Please pull and let me know if there's any problem.

Thanks,
johannes



The following changes since commit 91e6dd8284256ef62b43b78da6e7684e4f06ac2f:

  ipmr: Fix ptrdiff_t print formatting (2018-01-30 09:20:25 -0500)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git 
tags/mac80211-next-for-davem-2018-02-22

for you to fetch changes up to 94ba92713f8329c96e0a8e2880b3c1a785d1c95c:

  mac80211: Call mgd_prep_tx before transmitting deauthentication (2018-02-22 
21:13:04 +0100)


Various updates across wireless.

One thing to note: I've included a new ethertype
that wireless uses (ETH_P_PREAUTH) in if_ether.h.


Ben Greear (1):
  mac80211: Add txq flags to debugfs

Benjamin Beichler (3):
  mac80211_hwsim: add permanent mac address option for new radios
  mac80211_hwsim: add nl_err_msg in hwsim_new_radio in netlink case
  mac80211_hwsim: add generation count for netlink dump operation

Colin Ian King (1):
  mac80211: remove redundant initialization to pointer 'hdr'

Denis Kenzior (1):
  uapi: Add 802.11 Preauthentication to if_ether

Ilan Peer (1):
  mac80211: Call mgd_prep_tx before transmitting deauthentication

Johannes Berg (2):
  nl80211: remove unnecessary genlmsg_cancel() calls
  mac80211: support reporting A-MPDU EOF bit value/known

Sara Sharon (1):
  mac80211: add get TID helper

Srinivas Dasari (4):
  cfg80211/nl80211: Optional authentication offload to userspace
  nl80211: Allow SAE Authentication for NL80211_CMD_CONNECT
  nl80211: Fix external_auth check for offloaded authentication
  ieee80211: Increase PMK maximum length to 64 bytes

Sunil Dutt (1):
  nl80211: Introduce scan flags to emphasize requested scan behavior

Venkateswara Naralasetty (2):
  cfg80211: send ack_signal to user in probe client response
  mac80211: Add tx ack signal support in sta info

tami...@codeaurora.org (2):
  cfg80211: Add support to notify station's opmode change to userspace
  mac80211: Add support to notify ht/vht opmode modification.

 drivers/net/wireless/ath/wil6210/cfg80211.c |   3 +-
 drivers/net/wireless/mac80211_hwsim.c   |  81 ---
 drivers/net/wireless/mac80211_hwsim.h   |   9 +-
 include/linux/ieee80211.h   |  14 +-
 include/net/cfg80211.h  | 104 +-
 include/net/ieee80211_radiotap.h|   2 +
 include/net/mac80211.h  |  19 +++
 include/uapi/linux/if_ether.h   |   1 +
 include/uapi/linux/nl80211.h|  90 +++-
 net/mac80211/debugfs.c  |   1 +
 net/mac80211/debugfs_sta.c  |  10 +-
 net/mac80211/iface.c|   3 +-
 net/mac80211/michael.c  |   2 +-
 net/mac80211/mlme.c |  18 ++-
 net/mac80211/rc80211_minstrel_ht.c  |   2 +-
 net/mac80211/rx.c   |  24 +++-
 net/mac80211/sta_info.c |   6 +
 net/mac80211/sta_info.h |   2 +
 net/mac80211/status.c   |  11 +-
 net/mac80211/tx.c   |  11 +-
 net/mac80211/vht.c  |   9 ++
 net/mac80211/wpa.c  |   8 +-
 net/wireless/nl80211.c  | 203 +++-
 net/wireless/rdev-ops.h |  15 ++
 net/wireless/trace.h|  23 
 25 files changed, 584 insertions(+), 87 deletions(-)


pull-request: mac80211 2018-02-22

2018-02-22 Thread Johannes Berg
Hi Dave,

A bunch of fixes, including the nla_put_string() issue
just in from Kees. Otherwise nothing really super urgent
or interesting.

Please pull and let me know if there's any problem.

Thanks,
johannes



The following changes since commit ba804bb4b72e57374b5f567b783aa0298fba0ce6:

  Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (2018-01-26 
09:03:16 -0800)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git 
tags/mac80211-for-davem-2018-02-22

for you to fetch changes up to 657308f73e674e86b60509a430a46e569bf02846:

  regulatory: add NUL to request alpha2 (2018-02-22 20:57:48 +0100)


Various fixes across the tree, the shortlog basically says it all:

  cfg80211: fix cfg80211_beacon_dup
  -> old bug in this code

  cfg80211: clear wep keys after disconnection
  -> certain ways of disconnecting left the keys

  mac80211: round IEEE80211_TX_STATUS_HEADROOM up to multiple of 4
  -> alignment issues with using 14 bytes

  mac80211: Do not disconnect on invalid operating class
  -> if the AP has a bogus operating class, let it be

  mac80211: Fix sending ADDBA response for an ongoing session
  -> don't send the same frame twice

  cfg80211: use only 1Mbps for basic rates in mesh
  -> interop issue with old versions of our code

  mac80211_hwsim: don't use WQ_MEM_RECLAIM
  -> it causes splats because it flushes work on a non-reclaim WQ

  regulatory: add NUL to request alpha2
  -> nla_put_string() issue from Kees

  mac80211: mesh: fix wrong mesh TTL offset calculation
  -> protocol issue

  mac80211: fix a possible leak of station stats
  -> error path might leak memory

  mac80211: fix calling sleeping function in atomic context
  -> percpu allocations need to be made with gfp flags


Arnd Bergmann (1):
  cfg80211: fix cfg80211_beacon_dup

Avraham Stern (1):
  cfg80211: clear wep keys after disconnection

Felix Fietkau (1):
  mac80211: round IEEE80211_TX_STATUS_HEADROOM up to multiple of 4

Ilan Peer (2):
  mac80211: Do not disconnect on invalid operating class
  mac80211: Fix sending ADDBA response for an ongoing session

Johannes Berg (3):
  cfg80211: use only 1Mbps for basic rates in mesh
  mac80211_hwsim: don't use WQ_MEM_RECLAIM
  regulatory: add NUL to request alpha2

Peter Oh (1):
  mac80211: mesh: fix wrong mesh TTL offset calculation

Sara Sharon (2):
  mac80211: fix a possible leak of station stats
  mac80211: fix calling sleeping function in atomic context

 drivers/net/wireless/mac80211_hwsim.c |  2 +-
 include/net/mac80211.h|  2 +-
 include/net/regulatory.h  |  2 +-
 net/mac80211/agg-rx.c |  4 +---
 net/mac80211/cfg.c|  2 +-
 net/mac80211/ieee80211_i.h|  2 +-
 net/mac80211/mesh.c   | 17 ++---
 net/mac80211/spectmgmt.c  |  7 +++
 net/mac80211/sta_info.c   |  3 ++-
 net/wireless/mesh.c   | 25 ++---
 net/wireless/sme.c|  2 ++
 11 files changed, 41 insertions(+), 27 deletions(-)


Re: [PATCH net-next] ibmvnic: Split counters for scrq/pools/napi

2018-02-22 Thread David Miller
From: Nathan Fontenot 
Date: Wed, 21 Feb 2018 21:33:56 -0600

> The approach of one counter to rule them all when tracking the number
> of active sub-crqs, pools, and napi has problems handling some failover
> scenarios. This is due to the split in initializing the sub crqs,
> pools and napi in different places and the placement of updating
> the active counts.
> 
> This patch simplifies this by having a counter for tx and rx
> sub-crqs, pools, and napi.
> 
> Signed-off-by: Nathan Fontenot 

Applied, thanks Nathan.


Re: [PATCH] selftest: fix kselftest-merge depend on 'RUNTIME_TESTING_MENU'

2018-02-22 Thread Anders Roxell
On 22 February 2018 at 12:53, Zong Li  wrote:
> Since the 'commit d3deafaa8b5c ("lib/: make RUNTIME_TESTS a menuconfig
> to ease disabling it all")', the make kselftest-merge cannot merge the
> config dependencies of kselftest to the existing .config file.
>
> These config dependencies of kselftest need to enable the
> 'CONFIG_RUNTIME_TESTING_MENU=y' at the same time.

Is this patch needed when patch sha 'f29c79906064 ("lib/Kconfig.debug: enable
RUNTIME_TESTING_MENU")' find its way into the kernel ?
I think it's in linux-next now.

Cheers,
Anders

>
> Signed-off-by: Zong Li 
> Cc: Greentime Hu 
> ---
>  tools/testing/selftests/bpf/config | 1 +
>  tools/testing/selftests/firmware/config| 1 +
>  tools/testing/selftests/kmod/config| 1 +
>  tools/testing/selftests/lib/config | 1 +
>  tools/testing/selftests/net/config | 1 +
>  tools/testing/selftests/static_keys/config | 1 +
>  tools/testing/selftests/sysctl/config  | 1 +
>  tools/testing/selftests/user/config| 1 +
>  8 files changed, 8 insertions(+)
>
> diff --git a/tools/testing/selftests/bpf/config 
> b/tools/testing/selftests/bpf/config
> index 983dd25d49f4..d93b82144b19 100644
> --- a/tools/testing/selftests/bpf/config
> +++ b/tools/testing/selftests/bpf/config
> @@ -1,3 +1,4 @@
> +CONFIG_RUNTIME_TESTING_MENU=y
>  CONFIG_BPF=y
>  CONFIG_BPF_SYSCALL=y
>  CONFIG_NET_CLS_BPF=m
> diff --git a/tools/testing/selftests/firmware/config 
> b/tools/testing/selftests/firmware/config
> index c8137f70e291..01d7445ef007 100644
> --- a/tools/testing/selftests/firmware/config
> +++ b/tools/testing/selftests/firmware/config
> @@ -1 +1,2 @@
> +CONFIG_RUNTIME_TESTING_MENU=y
>  CONFIG_TEST_FIRMWARE=y
> diff --git a/tools/testing/selftests/kmod/config 
> b/tools/testing/selftests/kmod/config
> index 259f4fd6b5e2..37070985e428 100644
> --- a/tools/testing/selftests/kmod/config
> +++ b/tools/testing/selftests/kmod/config
> @@ -1,3 +1,4 @@
> +CONFIG_RUNTIME_TESTING_MENU=y
>  CONFIG_TEST_KMOD=m
>  CONFIG_TEST_LKM=m
>  CONFIG_XFS_FS=m
> diff --git a/tools/testing/selftests/lib/config 
> b/tools/testing/selftests/lib/config
> index 126933bcc950..d1fe14c2c8cb 100644
> --- a/tools/testing/selftests/lib/config
> +++ b/tools/testing/selftests/lib/config
> @@ -1,3 +1,4 @@
> +CONFIG_RUNTIME_TESTING_MENU=y
>  CONFIG_TEST_PRINTF=m
>  CONFIG_TEST_BITMAP=m
>  CONFIG_PRIME_NUMBERS=m
> diff --git a/tools/testing/selftests/net/config 
> b/tools/testing/selftests/net/config
> index 7177bea1fdfa..847a99873128 100644
> --- a/tools/testing/selftests/net/config
> +++ b/tools/testing/selftests/net/config
> @@ -1,3 +1,4 @@
> +CONFIG_RUNTIME_TESTING_MENU=y
>  CONFIG_USER_NS=y
>  CONFIG_BPF_SYSCALL=y
>  CONFIG_TEST_BPF=m
> diff --git a/tools/testing/selftests/static_keys/config 
> b/tools/testing/selftests/static_keys/config
> index d538fb774b96..732d17f6b9a1 100644
> --- a/tools/testing/selftests/static_keys/config
> +++ b/tools/testing/selftests/static_keys/config
> @@ -1 +1,2 @@
> +CONFIG_RUNTIME_TESTING_MENU=y
>  CONFIG_TEST_STATIC_KEYS=m
> diff --git a/tools/testing/selftests/sysctl/config 
> b/tools/testing/selftests/sysctl/config
> index 6ca14800d755..772ce8c3c0d9 100644
> --- a/tools/testing/selftests/sysctl/config
> +++ b/tools/testing/selftests/sysctl/config
> @@ -1 +1,2 @@
> +CONFIG_RUNTIME_TESTING_MENU=y
>  CONFIG_TEST_SYSCTL=y
> diff --git a/tools/testing/selftests/user/config 
> b/tools/testing/selftests/user/config
> index 784ed8416324..f9f491fa4ae8 100644
> --- a/tools/testing/selftests/user/config
> +++ b/tools/testing/selftests/user/config
> @@ -1 +1,2 @@
> +CONFIG_RUNTIME_TESTING_MENU=y
>  CONFIG_TEST_USER_COPY=m
> --
> 2.16.1
>


Re: nla_put_string() vs NLA_STRING

2018-02-22 Thread Johannes Berg
On Tue, 2018-02-20 at 22:00 -0800, Kees Cook wrote:

> It seems that in at least one case[1], nla_put_string() is being used
> on an NLA_STRING, which lacks a NULL terminator, which leads to
> silliness when nla_put_string() uses strlen() to figure out the size:

Fun! I'm not a big fan of the whole NLA_STRING thing with or without
NUL terminator anyway, it's a bit confusing at times :-)

> This is a problem at least here:
> 
> struct regulatory_request {
> ...
> char alpha2[2];
> ...
> 
> static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
> ...
> [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 },
> ...

Yeah, this is clearly stupid. We already fixed one of these, see commit
a5fe8e7695dc ("regulatory: add NUL to alpha2"). I'll fix up the second
one too.

> So, this specific problem needs fixing (in at least two places calling
> nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, ...)). While I suspect
> it's only ever written an extra byte from the following variable in
> the structure which is an enum nl80211_dfs_regions, 

Only one, since the other has alpha2[3] already :-)

And in that case, yes, on little endian and only if the dfs region is
non-zero, though the dfs region was added later so dunno what else
there was - but certainly this struct would have always contained some
enum value that had zero-bytes.

> I worry there
> might be a lot more of these (though I'd hope unterminated strings are
> uncommon for internal representation).

Generally they are, I'd argue.

> And more generally, it seems
> like only the NLA _input_ functions actually check nla_policy details.
> It seems that the output functions should do the same too, yes?

It doesn't really work that way - there's no real guarantee that the
policy is symmetric on input/output.

johannes


Re: [PATCH] selftest: fix kselftest-merge depend on 'RUNTIME_TESTING_MENU'

2018-02-22 Thread Luis R. Rodriguez
On Thu, Feb 22, 2018 at 07:53:07PM +0800, Zong Li wrote:
> Since the 'commit d3deafaa8b5c ("lib/: make RUNTIME_TESTS a menuconfig
> to ease disabling it all")', the make kselftest-merge cannot merge the
> config dependencies of kselftest to the existing .config file.
> 
> These config dependencies of kselftest need to enable the
> 'CONFIG_RUNTIME_TESTING_MENU=y' at the same time.
> 
> Signed-off-by: Zong Li 
> Cc: Greentime Hu 

Please add respective Fixes: tag with the sha1sum, and commit name.

  Luis


Re: [PATCH v2 net-next] net: dsa: mv88e6xxx: scratch registers and external MDIO pins

2018-02-22 Thread David Miller
From: Andrew Lunn 
Date: Thu, 22 Feb 2018 01:51:49 +0100

> MV88E6352 and later switches support GPIO control through the "Scratch
> & Misc" global2 register. Two of the pins controlled this way on the
> mv88e6390 family are the external MDIO pins. They can either by used
> as part of the MII interface for port 0, GPIOs, or MDIO. Add a
> function to configure them for MDIO, if possible, and call it when
> registering the external MDIO bus.
> 
> Suggested-by: Russell King 
> Signed-off-by: Andrew Lunn 
> ---
> v2: Make stub function static inline, as reported by 0-day.

Applied, thanks Andrew.


Re: [PATCH net] ibmvnic: Fix early release of login buffer

2018-02-22 Thread David Miller
From: Thomas Falcon 
Date: Wed, 21 Feb 2018 18:18:30 -0600

> The login buffer is released before the driver can perform
> sanity checks between resources the driver requested and what
> firmware will provide. Don't release the login buffer until
> the sanity check is performed.
> 
> Fixes: 34f0f4e3f488 ("ibmvnic: Fix login buffer memory leaks")
> Signed-off-by: Thomas Falcon 

Applied.


Re: [PATCH net-next] ibmvnic: Fix TX descriptor tracking

2018-02-22 Thread David Miller
From: Thomas Falcon 
Date: Wed, 21 Feb 2018 18:21:10 -0600

> With the recent change, transmissions that only needed
> one descriptor were being missed. The result is that such
> packets were tracked as outstanding transmissions but never
> removed when its completion notification was received.
> 
> Fixes: ffc385b95adb ("ibmvnic: Keep track of supplementary TX descriptors")
> Signed-off-by: Thomas Falcon 

Applied.


Re: [PATCH] net/smc9194: Remove bogus CONFIG_MAC reference

2018-02-22 Thread David Miller
From: Finn Thain 
Date: Thu, 22 Feb 2018 09:24:59 +1100 (AEDT)

> AFAIK the only version of smc9194.c with Mac support is the one in the
> linux-mac68k CVS repo, which never made it to the mainline.
> 
> Despite that, from v2.3.45, arch/m68k/config.in listed CONFIG_SMC9194
> under CONFIG_MAC. This mistake got carried over into Kconfig in v2.5.55.
> (See pre-git era "[PATCH] add m68k dependencies to net driver config".)
> 
> Signed-off-by: Finn Thain 

Applied, thank you.


Re: [PATCH net-next 0/7] net/ipv6: Add support for path selection using hash of 5-tuple

2018-02-22 Thread David Miller
From: David Ahern 
Date: Thu, 22 Feb 2018 12:31:01 -0700

> On 2/22/18 12:27 PM, David Miller wrote:
>> From: David Ahern 
>> Date: Wed, 21 Feb 2018 10:49:47 -0800
>> 
>>> Patch 5 adds the L4 hash support.
>> 
>> Please address Ido's feedback about how the ports aren't actually being
>> taken into consideration because they aren't present in the flow
>> information being used.
> 
> It's the forwarding case; I need to add the skb to the route lookup
> functions. I'll send a v2 with that change in the next few days.

Ok, thank you.


Re: [pull request][for-next 0/7] Mellanox, mlx5 shared code updates 2018-02-21

2018-02-22 Thread David Miller
From: Saeed Mahameed 
Date: Wed, 21 Feb 2018 12:13:47 -0800

> This series includes shared code updates for mlx5 core driver for both
> netdev and rdma subsystems.  This series should be pulled to both
> trees so we can continue netdev and rdma specific submissions separately.
> 
> For more information please see tag log below.
> 
> P.S. We expect two more shared code pull requests.
> 
> The series doesn't cause any conflict with the latest mlx5 net fixes
> series.
> 
> Please pull and let me know if there's any issue,

Looks good to me, pulled into net-next, thanks.


[PATCH next-queue 3/3] ixgbe: remove unneeded ipsec state free callback

2018-02-22 Thread Shannon Nelson
With commit 7f05b467a735 ("xfrm: check for xdo_dev_state_free")
we no longer need to add an empty callback function
to the driver, so now let's remove the useless code.

Signed-off-by: Shannon Nelson 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 13 -
 1 file changed, 13 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 8623013..f225452 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -724,23 +724,10 @@ static bool ixgbe_ipsec_offload_ok(struct sk_buff *skb, 
struct xfrm_state *xs)
return true;
 }
 
-/**
- * ixgbe_ipsec_free - called by xfrm garbage collections
- * @xs: pointer to transformer state struct
- *
- * We don't have any garbage to collect, so we shouldn't bother
- * implementing this function, but the XFRM code doesn't check for
- * existence before calling the API callback.
- **/
-static void ixgbe_ipsec_free(struct xfrm_state *xs)
-{
-}
-
 static const struct xfrmdev_ops ixgbe_xfrmdev_ops = {
.xdo_dev_state_add = ixgbe_ipsec_add_sa,
.xdo_dev_state_delete = ixgbe_ipsec_del_sa,
.xdo_dev_offload_ok = ixgbe_ipsec_offload_ok,
-   .xdo_dev_state_free = ixgbe_ipsec_free,
 };
 
 /**
-- 
2.7.4



Re: [PATCH net-next 0/7] net/ipv6: Add support for path selection using hash of 5-tuple

2018-02-22 Thread David Ahern
On 2/22/18 12:27 PM, David Miller wrote:
> From: David Ahern 
> Date: Wed, 21 Feb 2018 10:49:47 -0800
> 
>> Patch 5 adds the L4 hash support.
> 
> Please address Ido's feedback about how the ports aren't actually being
> taken into consideration because they aren't present in the flow
> information being used.

It's the forwarding case; I need to add the skb to the route lookup
functions. I'll send a v2 with that change in the next few days.


Re: [PATCH net] net: ipv4: Set addr_type in hash_keys for forwarded case

2018-02-22 Thread David Miller
From: David Ahern 
Date: Wed, 21 Feb 2018 11:00:54 -0800

> The result of the skb flow dissect is copied from keys to hash_keys to
> ensure only the intended data is hashed. The original L4 hash patch
> overlooked setting the addr_type for this case; add it.
> 
> Fixes: bf4e0a3db97eb ("net: ipv4: add support for ECMP hash policy choice")
> Reported-by: Ido Schimmel 
> Signed-off-by: David Ahern 

Applied and queued up for -stable, thanks David!


Re: [patch net-next] mlxsw: spectrum_switchdev: Allow port enslavement to a VLAN-unaware bridge

2018-02-22 Thread David Ahern
On 2/22/18 11:58 AM, David Miller wrote:
> From: David Ahern 
> Date: Wed, 21 Feb 2018 11:16:35 -0700
> 
>> On 2/20/18 12:45 AM, Jiri Pirko wrote:
>>> From: Ido Schimmel 
>>>
>>> Up until now we only allowed VLAN devices to be put in a VLAN-unaware
>>> bridge, but some users need the ability to enslave physical ports as
>>> well.
>>>
>>> This is achieved by mapping the port and VID 1 to the bridge's vFID,
>>> instead of the port and the VID used by the VLAN device.
>>>
>>> The above is valid because as long as the port is not enslaved to a
>>> bridge, VID 1 is guaranteed to be configured as PVID and egress
>>> untagged.
>>>
>>> Signed-off-by: Ido Schimmel 
>>> Signed-off-by: Jiri Pirko 
>>> ---
>>>  drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 12 +---
>>>  1 file changed, 5 insertions(+), 7 deletions(-)
>>>
>>
>> Maybe I am missing something in the setup, but I am not getting
>> host-to-host connectivity. I booted a switch with this patch, configured
>> a bridge:
> 
> I'm waiting for this discussion to be fully resolved before applying this
> patch.  Just FYI...
> 

Ido:

IPv4 works at boot; IPv6 requires the mcast snooping disable. For this
vlan-unaware bridges can that be set automatically?

And then, what are the options for lldp?


Re: [PATCH net-next 0/7] net/ipv6: Add support for path selection using hash of 5-tuple

2018-02-22 Thread David Miller
From: David Ahern 
Date: Wed, 21 Feb 2018 10:49:47 -0800

> Patch 5 adds the L4 hash support.

Please address Ido's feedback about how the ports aren't actually being
taken into consideration because they aren't present in the flow
information being used.

Thanks.


Re: [PATCH net] tcp_bbr: better deal with suboptimal GSO

2018-02-22 Thread David Miller
From: Eric Dumazet 
Date: Wed, 21 Feb 2018 06:43:03 -0800

> From: Eric Dumazet 
> 
> BBR uses tcp_tso_autosize() in an attempt to probe what would be the
> burst sizes and to adjust cwnd in bbr_target_cwnd() with following
> gold formula :
> 
> /* Allow enough full-sized skbs in flight to utilize end systems. */
> cwnd += 3 * bbr->tso_segs_goal;
> 
> But GSO can be lacking or be constrained to very small
> units (ip link set dev ... gso_max_segs 2)
> 
> What we really want is to have enough packets in flight so that both
> GSO and GRO are efficient.
> 
> So in the case GSO is off or downgraded, we still want to have the same
> number of packets in flight as if GSO/TSO was fully operational, so
> that GRO can hopefully be working efficiently.
> 
> To fix this issue, we make tcp_tso_autosize() unaware of
> sk->sk_gso_max_segs
> 
> Only tcp_tso_segs() has to enforce the gso_max_segs limit.
 . ..
> Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
> Signed-off-by: Eric Dumazet 
> Reported-by: Oleksandr Natalenko 


Applied and queued up for -stable, thanks Eric.


[PATCH next-queue 2/3] ixgbe: fix ipsec trailer length

2018-02-22 Thread Shannon Nelson
Fix up the Tx trailer length calculation.  We can't believe the
trailer len from the xstate information because it was calculated
before the packet was put together and padding added.  This bit
of code finds the padding value in the trailer, adds it to the
authentication length, and saves it so later we can put it into
the Tx descriptor to tell the device where to stop the checksum
calculation.

Fixes: 592594704761 ("ixgbe: process the Tx ipsec offload")
Signed-off-by: Shannon Nelson 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 8b7dbc8..8623013 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -789,11 +789,33 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
 
itd->flags = 0;
if (xs->id.proto == IPPROTO_ESP) {
+   struct sk_buff *skb = first->skb;
+   int ret, authlen, trailerlen;
+   u8 padlen;
+
itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
  IXGBE_ADVTXD_TUCMD_L4T_TCP;
if (first->protocol == htons(ETH_P_IP))
itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4;
-   itd->trailer_len = xs->props.trailer_len;
+
+   /* The actual trailer length is authlen (16 bytes) plus
+* 2 bytes for the proto and the padlen values, plus
+* padlen bytes of padding.  This ends up not the same
+* as the static value found in xs->props.trailer_len (21).
+*
+* The "correct" way to get the auth length would be to use
+*authlen = crypto_aead_authsize(xs->data);
+* but since we know we only have one size to worry about
+* we can let the compiler use the constant and save us a
+* few CPU cycles.
+*/
+   authlen = IXGBE_IPSEC_AUTH_BITS / 8;
+
+   ret = skb_copy_bits(skb, skb->len - (authlen + 2), , 1);
+   if (unlikely(ret))
+   return 0;
+   trailerlen = authlen + 2 + padlen;
+   itd->trailer_len = trailerlen;
}
if (tsa->encrypt)
itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;
-- 
2.7.4



[PATCH next-queue 0/3] ixgbe: ipsec fixups

2018-02-22 Thread Shannon Nelson
These are a couple of updates for the ixgbe IPsec offload support.

Shannon Nelson (3):
  ixgbe: check for 128-bit authentication
  ixgbe: fix ipsec trailer length
  ixgbe: remove unneeded ipsec state free callback

 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 53 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h |  1 +
 2 files changed, 35 insertions(+), 19 deletions(-)

-- 
2.7.4



[PATCH next-queue 1/3] ixgbe: check for 128-bit authentication

2018-02-22 Thread Shannon Nelson
Make sure the Security Association is using
a 128-bit authentication, since that's the only
size that the hardware offload supports.

Signed-off-by: Shannon Nelson 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 16 +++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h |  1 +
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 93eacdd..8b7dbc8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -423,15 +423,21 @@ static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state 
*xs,
const char aes_gcm_name[] = "rfc4106(gcm(aes))";
int key_len;
 
-   if (xs->aead) {
-   key_data = >aead->alg_key[0];
-   key_len = xs->aead->alg_key_len;
-   alg_name = xs->aead->alg_name;
-   } else {
+   if (!xs->aead) {
netdev_err(dev, "Unsupported IPsec algorithm\n");
return -EINVAL;
}
 
+   if (xs->aead->alg_icv_len != IXGBE_IPSEC_AUTH_BITS) {
+   netdev_err(dev, "IPsec offload requires %d bit 
authentication\n",
+  IXGBE_IPSEC_AUTH_BITS);
+   return -EINVAL;
+   }
+
+   key_data = >aead->alg_key[0];
+   key_len = xs->aead->alg_key_len;
+   alg_name = xs->aead->alg_name;
+
if (strcmp(alg_name, aes_gcm_name)) {
netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n",
   aes_gcm_name);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
index da3ce78..87d2800 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
@@ -32,6 +32,7 @@
 #define IXGBE_IPSEC_MAX_RX_IP_COUNT128
 #define IXGBE_IPSEC_BASE_RX_INDEX  0
 #define IXGBE_IPSEC_BASE_TX_INDEX  IXGBE_IPSEC_MAX_SA_COUNT
+#define IXGBE_IPSEC_AUTH_BITS  128
 
 #define IXGBE_RXTXIDX_IPS_EN   0x0001
 #define IXGBE_RXIDX_TBL_SHIFT  1
-- 
2.7.4



Re: [PATCH net-next 2/5] net/smc: fix structure size

2018-02-22 Thread David Miller
From: Ursula Braun 
Date: Wed, 21 Feb 2018 12:32:32 +0100

> diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
> index ab240b37ad11..d2012fd22100 100644
> --- a/net/smc/smc_cdc.h
> +++ b/net/smc/smc_cdc.h
> @@ -48,7 +48,7 @@ struct smc_cdc_msg {
>   struct smc_cdc_producer_flags   prod_flags;
>   struct smc_cdc_conn_state_flags conn_state_flags;
>   u8  reserved[18];
> -} __aligned(8);
> +} __packed;  /* format defined in RFC7609 */

Hold on, __packed should only be used as the absolute last possible
option to fix structure layout problems.

Also, a sub-structure of smc_cdc_msg, union smc_cdc_cursor, is still
marked with __aligned(8).  That makes no sense at all.

Please fix this without using __packed, as __packed has a severe
detrimental effect on code generation for accessing such structure
on several cpu architectures.

Also, if this these are legitimate bug fixes you should target those
at 'net' not 'net-next'.

Thank you.


Re: nla_put_string() vs NLA_STRING

2018-02-22 Thread David Miller
From: Kees Cook 
Date: Tue, 20 Feb 2018 22:00:26 -0800

> So, this specific problem needs fixing (in at least two places calling
> nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, ...)). While I suspect
> it's only ever written an extra byte from the following variable in
> the structure which is an enum nl80211_dfs_regions, I worry there
> might be a lot more of these (though I'd hope unterminated strings are
> uncommon for internal representation). And more generally, it seems
> like only the NLA _input_ functions actually check nla_policy details.
> It seems that the output functions should do the same too, yes?

Generally speaking, the policy is for making sure the user doesn't
give us garbage.

When building netlink attributes itself, the kernel is supposed to
know what it is doing.


[PATCH net] gianfar: simplify FCS handling and fix memory leak

2018-02-22 Thread Andy Spencer
Previously, buffer descriptors containing only the frame check sequence
(FCS) were skipped and not added to the skb. However, the page reference
count was still incremented, leading to a memory leak.

Fixing this inside gfar_add_rx_frag() is difficult due to reserved
memory handling and page reuse. Instead, move the FCS handling to
gfar_process_frame() and trim off the FCS before passing the skb up the
networking stack.

Signed-off-by: Andy Spencer 
Signed-off-by: Jim Gruen 
---
 drivers/net/ethernet/freescale/gianfar.c | 23 +++
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c 
b/drivers/net/ethernet/freescale/gianfar.c
index 3bdeb29..f5c87bd3 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2934,29 +2934,17 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, 
u32 lstatus,
 {
int size = lstatus & BD_LENGTH_MASK;
struct page *page = rxb->page;
-   bool last = !!(lstatus & BD_LFLAG(RXBD_LAST));
-
-   /* Remove the FCS from the packet length */
-   if (last)
-   size -= ETH_FCS_LEN;
 
if (likely(first)) {
skb_put(skb, size);
} else {
/* the last fragments' length contains the full frame length */
-   if (last)
+   if (lstatus & BD_LFLAG(RXBD_LAST))
size -= skb->len;
 
-   /* Add the last fragment if it contains something other than
-* the FCS, otherwise drop it and trim off any part of the FCS
-* that was already received.
-*/
-   if (size > 0)
-   skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-   rxb->page_offset + RXBUF_ALIGNMENT,
-   size, GFAR_RXB_TRUESIZE);
-   else if (size < 0)
-   pskb_trim(skb, skb->len + size);
+   skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+   rxb->page_offset + RXBUF_ALIGNMENT,
+   size, GFAR_RXB_TRUESIZE);
}
 
/* try reuse page */
@@ -3069,6 +3057,9 @@ static void gfar_process_frame(struct net_device *ndev, 
struct sk_buff *skb)
if (priv->padding)
skb_pull(skb, priv->padding);
 
+   /* Trim off the FCS */
+   pskb_trim(skb, skb->len - ETH_FCS_LEN);
+
if (ndev->features & NETIF_F_RXCSUM)
gfar_rx_checksum(skb, fcb);
 
-- 
2.7.4



Re: [PATCH] Carrier detect ok, don't turn off negotiation

2018-02-22 Thread David Miller
From: Denis Du 
Date: Wed, 21 Feb 2018 03:35:31 + (UTC)

> How  is your thinking about this patch?

I cannot apply a patch which has been corrupted by your email client like
this.

Please send it properly again, plain ASCII text, and no trasnformations
by your email client.

You should send the patch to yourself and try to apply the patch you
receive, do not send to the list until you can pass the test properly.

Do not use attachments to fix this problem, the patch must be inline
after your commit message and signoffs.

Please read Documentation/process/submitting-patches.rst and
Documentation/process/email-clients.rsDt for more information.

Thank you.


Re: [PATCH net] smsc75xx: fix smsc75xx_set_features()

2018-02-22 Thread David Miller
From: Eric Dumazet 
Date: Tue, 20 Feb 2018 21:42:26 -0800

> From: Eric Dumazet 
> 
> If an attempt is made to disable RX checksums, USB adapter is changed
> but netdev->features is not, because smsc75xx_set_features() returns a
> non zero value.
> 
> This throws errors from netdev_rx_csum_fault() :
> : hw csum failure
> 
> Signed-off-by: Eric Dumazet 
> Cc: Steve Glendinning 

Applied, thanks Eric.


Re: [PATCH] netlink: put module reference if dump start fails

2018-02-22 Thread David Miller
From: "Jason A. Donenfeld" 
Date: Wed, 21 Feb 2018 04:41:59 +0100

> Before, if cb->start() failed, the module reference would never be put,
> because cb->cb_running is intentionally false at this point. Users are
> generally annoyed by this because they can no longer unload modules that
> leak references. Also, it may be possible to tediously wrap a reference
> counter back to zero, especially since module.c still uses atomic_inc
> instead of refcount_inc.
> 
> This patch expands the error path to simply call module_put if
> cb->start() fails.
> 
> Signed-off-by: Jason A. Donenfeld 
> ---
> This probably should be queued up for stable.

Applied and queued up for -stable.


Re: [patch net-next] mlxsw: spectrum_switchdev: Allow port enslavement to a VLAN-unaware bridge

2018-02-22 Thread David Miller
From: David Ahern 
Date: Wed, 21 Feb 2018 11:16:35 -0700

> On 2/20/18 12:45 AM, Jiri Pirko wrote:
>> From: Ido Schimmel 
>> 
>> Up until now we only allowed VLAN devices to be put in a VLAN-unaware
>> bridge, but some users need the ability to enslave physical ports as
>> well.
>> 
>> This is achieved by mapping the port and VID 1 to the bridge's vFID,
>> instead of the port and the VID used by the VLAN device.
>> 
>> The above is valid because as long as the port is not enslaved to a
>> bridge, VID 1 is guaranteed to be configured as PVID and egress
>> untagged.
>> 
>> Signed-off-by: Ido Schimmel 
>> Signed-off-by: Jiri Pirko 
>> ---
>>  drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 12 +---
>>  1 file changed, 5 insertions(+), 7 deletions(-)
>> 
> 
> Maybe I am missing something in the setup, but I am not getting
> host-to-host connectivity. I booted a switch with this patch, configured
> a bridge:

I'm waiting for this discussion to be fully resolved before applying this
patch.  Just FYI...


Re: nft/bpf interpreters and spectre2. Was: [PATCH RFC 0/4] net: add bpfilter

2018-02-22 Thread Jann Horn
[resend as plaintext, apparently mobile gmail will send HTML mails]

On Thu, Feb 22, 2018 at 3:20 AM, Alexei Starovoitov
 wrote:
> On Wed, Feb 21, 2018 at 01:13:03PM +0100, Florian Westphal wrote:
>>
>> Obvious candidates are: meta, numgen, limit, objref, quota, reject.
>>
>> We should probably also consider removing
>> CONFIG_NFT_SET_RBTREE and CONFIG_NFT_SET_HASH and just always
>> build both too (at least rbtree since that offers interval).
>>
>> For the indirect call issue we can use direct calls from eval loop for
>> some of the more frequently used ones, similar to what we do already
>> for nft_cmp_fast_expr.
>
> nft_cmp_fast_expr and other expressions mentioned above made me thinking...
>
> do we have the same issue with nft interpreter as we had with bpf one?
> bpf interpreter was used as part of spectre2 attack to leak
> information via cache side channel and let VM read hypervisor memory.
> Due to that issue we removed bpf interpreter from the kernel code.
> That's what CONFIG_BPF_JIT_ALWAYS_ON for...
> but we still have nft interpreter in the kernel that can also
> execute arbitrary nft expressions.
>
> Jann's exploit used the following bpf instructions:
[...]
>
> and a gadget to jump into __bpf_prog_run with insn pointing
> to memory controlled by the guest while accessible
> (at different virt address) by the hypervisor.
>
> It seems possible to construct similar sequence of instructions
> out of nft expressions and use gadget that jumps into nft_do_chain().
[...]
> Obviously such exploit is harder to do than bpf based one.
> Do we need to do anything about it ?
> May be it's easier to find gadgets in .text of vmlinux
> instead of messing with interpreters?
>
> Jann,
> can you comment on removing interpreters in general?
> Do we need to worry about having bpf and/or nft interpreter
> in the kernel?

I think that for Spectre V2, the presence of interpreters isn't a big
problem. It simplifies writing attacks a bit, but I don't expect it to
be necessary if an attacker invests some time into finding useful
gadgets.


Re: ppp/pppoe, still panic 4.15.3 in ppp_push

2018-02-22 Thread Denys Fedoryshchenko

On 2018-02-22 20:30, Guillaume Nault wrote:

On Wed, Feb 21, 2018 at 12:04:30PM -0800, Cong Wang wrote:
On Thu, Feb 15, 2018 at 11:31 AM, Guillaume Nault 
 wrote:

> On Thu, Feb 15, 2018 at 06:01:16PM +0200, Denys Fedoryshchenko wrote:
>> On 2018-02-15 17:55, Guillaume Nault wrote:
>> > On Thu, Feb 15, 2018 at 12:19:52PM +0200, Denys Fedoryshchenko wrote:
>> > > Here we go:
>> > >
>> > >   [24558.921549]
>> > > ==
>> > >   [24558.922167] BUG: KASAN: use-after-free in
>> > > ppp_ioctl+0xa6a/0x1522
>> > > [ppp_generic]
>> > >   [24558.922776] Write of size 8 at addr 8803d35bf3f8 by task
>> > > accel-pppd/12622
>> > >   [24558.923113]
>> > >   [24558.923451] CPU: 0 PID: 12622 Comm: accel-pppd Tainted: G
>> > > W
>> > > 4.15.3-build-0134 #1
>> > >   [24558.924058] Hardware name: HP ProLiant DL320e Gen8 v2,
>> > > BIOS P80
>> > > 04/02/2015
>> > >   [24558.924406] Call Trace:
>> > >   [24558.924753]  dump_stack+0x46/0x59
>> > >   [24558.925103]  print_address_description+0x6b/0x23b
>> > >   [24558.925451]  ? ppp_ioctl+0xa6a/0x1522 [ppp_generic]
>> > >   [24558.925797]  kasan_report+0x21b/0x241
>> > >   [24558.926136]  ppp_ioctl+0xa6a/0x1522 [ppp_generic]
>> > >   [24558.926479]  ? ppp_nl_newlink+0x1da/0x1da [ppp_generic]
>> > >   [24558.926829]  ? sock_sendmsg+0x89/0x99
>> > >   [24558.927176]  ? __vfs_write+0xd9/0x4ad
>> > >   [24558.927523]  ? kernel_read+0xed/0xed
>> > >   [24558.927872]  ? SyS_getpeername+0x18c/0x18c
>> > >   [24558.928213]  ? bit_waitqueue+0x2a/0x2a
>> > >   [24558.928561]  ? wake_atomic_t_function+0x115/0x115
>> > >   [24558.928898]  vfs_ioctl+0x6e/0x81
>> > >   [24558.929228]  do_vfs_ioctl+0xa00/0xb10
>> > >   [24558.929571]  ? sigprocmask+0x1a6/0x1d0
>> > >   [24558.929907]  ? sigsuspend+0x13e/0x13e
>> > >   [24558.930239]  ? ioctl_preallocate+0x14e/0x14e
>> > >   [24558.930568]  ? SyS_rt_sigprocmask+0xf1/0x142
>> > >   [24558.930904]  ? sigprocmask+0x1d0/0x1d0
>> > >   [24558.931252]  SyS_ioctl+0x39/0x55
>> > >   [24558.931595]  ? do_vfs_ioctl+0xb10/0xb10
>> > >   [24558.931942]  do_syscall_64+0x1b1/0x31f
>> > >   [24558.932288]  entry_SYSCALL_64_after_hwframe+0x21/0x86
>> > >   [24558.932627] RIP: 0033:0x7f302849d8a7
>> > >   [24558.932965] RSP: 002b:7f3029a52af8 EFLAGS: 0206
>> > > ORIG_RAX:
>> > > 0010
>> > >   [24558.933578] RAX: ffda RBX: 7f3027d861e3 RCX:
>> > > 7f302849d8a7
>> > >   [24558.933927] RDX: 7f3023f49468 RSI: 4004743a RDI:
>> > > 3a67
>> > >   [24558.934266] RBP: 7f3029a52b20 R08:  R09:
>> > > 55c8308d8e40
>> > >   [24558.934607] R10: 0008 R11: 0206 R12:
>> > > 7f3023f49358
>> > >   [24558.934947] R13: 7ffe86e5723f R14:  R15:
>> > > 7f3029a53700
>> > >   [24558.935288]
>> > >   [24558.935626] Allocated by task 12622:
>> > >   [24558.935972]  ppp_register_net_channel+0x5f/0x5c6
>> > > [ppp_generic]
>> > >   [24558.936306]  pppoe_connect+0xab7/0xc71 [pppoe]
>> > >   [24558.936640]  SyS_connect+0x14b/0x1b7
>> > >   [24558.936975]  do_syscall_64+0x1b1/0x31f
>> > >   [24558.937319]  entry_SYSCALL_64_after_hwframe+0x21/0x86
>> > >   [24558.937655]
>> > >   [24558.937993] Freed by task 12622:
>> > >   [24558.938321]  kfree+0xb0/0x11d
>> > >   [24558.938658]  ppp_release+0x111/0x120 [ppp_generic]
>> > >   [24558.938994]  __fput+0x2ba/0x51a
>> > >   [24558.939332]  task_work_run+0x11c/0x13d
>> > >   [24558.939676]  exit_to_usermode_loop+0x7c/0xaf
>> > >   [24558.940022]  do_syscall_64+0x2ea/0x31f
>> > >   [24558.940368]  entry_SYSCALL_64_after_hwframe+0x21/0x86
>> > >   [24558.947099]
>> >
>> > Your first guess was right. It looks like we have an issue with
>> > reference counting on the channels. Can you send me your ppp_generic.o?
>> http://nuclearcat.com/ppp_generic.o
>> Compiled with gcc version 6.4.0 (Gentoo 6.4.0-r1 p1.3)
>>
> From what I can see, ppp_release() and ioctl(PPPIOCCONNECT) are called
> concurrently on the same ppp_file. Even if this ppp_file was pointed at
> by two different file descriptors, I can't see how this could defeat
> the reference counting mechanism. I'm going to think more about it.

For me it looks like pch->clist is not removed from the list 
ppp->channels
when destroyed via ppp_release(). But I don't want to pretend I 
understand

ppp logic.


I've thought about that too, but couldn't find a scenario that could
trigger the bug.

To get ->private_data pointing to a struct channel pointer, a file 
needs

to ioctl(PPPIOCATTCHAN) first. For this call to succeed, the channel
must have been registered with ppp_register_net_channel(). Both
operations take a reference on the channel, which means that, before
adding pch->clist to a ppp->channels list (with ppp_connect_channel()),
the channel is already held by a /dev/ppp file and by the code that
registered the channel in the first place.

Therefore, closing the /dev/ppp 

Re: [PATCH bpf v2] bpf: fix rcu lockdep warning for lpm_trie map_free callback

2018-02-22 Thread David Miller
From: Yonghong Song 
Date: Thu, 22 Feb 2018 10:10:35 -0800

> Commit 9a3efb6b661f ("bpf: fix memory leak in lpm_trie map_free callback 
> function")
> fixed a memory leak and removed unnecessary locks in map_free callback 
> function.
> Unfortrunately, it introduced a lockdep warning. When lockdep checking is 
> turned on,
> running tools/testing/selftests/bpf/test_lpm_map will have:
> 
>   [   98.294321] =
>   [   98.294807] WARNING: suspicious RCU usage
>   [   98.295359] 4.16.0-rc2+ #193 Not tainted
>   [   98.295907] -
>   [   98.296486] /home/yhs/work/bpf/kernel/bpf/lpm_trie.c:572 suspicious 
> rcu_dereference_check() usage!
>   [   98.297657]
>   [   98.297657] other info that might help us debug this:
>   [   98.297657]
>   [   98.298663]
>   [   98.298663] rcu_scheduler_active = 2, debug_locks = 1
>   [   98.299536] 2 locks held by kworker/2:1/54:
>   [   98.300152]  #0:  ((wq_completion)"events"){+.+.}, at: 
> [<196bc1f0>] process_one_work+0x157/0x5c0
>   [   98.301381]  #1:  ((work_completion)(>work)){+.+.}, at: 
> [<196bc1f0>] process_one_work+0x157/0x5c0
> 
> Since actual trie tree removal happens only after no other
> accesses to the tree are possible, replacing
>   rcu_dereference_protected(*slot, lockdep_is_held(>lock))
> with
>   rcu_dereference_protected(*slot, 1)
> fixed the issue.
> 
> Fixes: 9a3efb6b661f ("bpf: fix memory leak in lpm_trie map_free callback 
> function")
> Reported-by: Eric Dumazet 
> Suggested-by: Eric Dumazet 
> Signed-off-by: Yonghong Song 

Acked-by: David S. Miller 


Re: ppp/pppoe, still panic 4.15.3 in ppp_push

2018-02-22 Thread Guillaume Nault
On Wed, Feb 21, 2018 at 12:04:30PM -0800, Cong Wang wrote:
> On Thu, Feb 15, 2018 at 11:31 AM, Guillaume Nault  
> wrote:
> > On Thu, Feb 15, 2018 at 06:01:16PM +0200, Denys Fedoryshchenko wrote:
> >> On 2018-02-15 17:55, Guillaume Nault wrote:
> >> > On Thu, Feb 15, 2018 at 12:19:52PM +0200, Denys Fedoryshchenko wrote:
> >> > > Here we go:
> >> > >
> >> > >   [24558.921549]
> >> > > ==
> >> > >   [24558.922167] BUG: KASAN: use-after-free in
> >> > > ppp_ioctl+0xa6a/0x1522
> >> > > [ppp_generic]
> >> > >   [24558.922776] Write of size 8 at addr 8803d35bf3f8 by task
> >> > > accel-pppd/12622
> >> > >   [24558.923113]
> >> > >   [24558.923451] CPU: 0 PID: 12622 Comm: accel-pppd Tainted: G
> >> > > W
> >> > > 4.15.3-build-0134 #1
> >> > >   [24558.924058] Hardware name: HP ProLiant DL320e Gen8 v2,
> >> > > BIOS P80
> >> > > 04/02/2015
> >> > >   [24558.924406] Call Trace:
> >> > >   [24558.924753]  dump_stack+0x46/0x59
> >> > >   [24558.925103]  print_address_description+0x6b/0x23b
> >> > >   [24558.925451]  ? ppp_ioctl+0xa6a/0x1522 [ppp_generic]
> >> > >   [24558.925797]  kasan_report+0x21b/0x241
> >> > >   [24558.926136]  ppp_ioctl+0xa6a/0x1522 [ppp_generic]
> >> > >   [24558.926479]  ? ppp_nl_newlink+0x1da/0x1da [ppp_generic]
> >> > >   [24558.926829]  ? sock_sendmsg+0x89/0x99
> >> > >   [24558.927176]  ? __vfs_write+0xd9/0x4ad
> >> > >   [24558.927523]  ? kernel_read+0xed/0xed
> >> > >   [24558.927872]  ? SyS_getpeername+0x18c/0x18c
> >> > >   [24558.928213]  ? bit_waitqueue+0x2a/0x2a
> >> > >   [24558.928561]  ? wake_atomic_t_function+0x115/0x115
> >> > >   [24558.928898]  vfs_ioctl+0x6e/0x81
> >> > >   [24558.929228]  do_vfs_ioctl+0xa00/0xb10
> >> > >   [24558.929571]  ? sigprocmask+0x1a6/0x1d0
> >> > >   [24558.929907]  ? sigsuspend+0x13e/0x13e
> >> > >   [24558.930239]  ? ioctl_preallocate+0x14e/0x14e
> >> > >   [24558.930568]  ? SyS_rt_sigprocmask+0xf1/0x142
> >> > >   [24558.930904]  ? sigprocmask+0x1d0/0x1d0
> >> > >   [24558.931252]  SyS_ioctl+0x39/0x55
> >> > >   [24558.931595]  ? do_vfs_ioctl+0xb10/0xb10
> >> > >   [24558.931942]  do_syscall_64+0x1b1/0x31f
> >> > >   [24558.932288]  entry_SYSCALL_64_after_hwframe+0x21/0x86
> >> > >   [24558.932627] RIP: 0033:0x7f302849d8a7
> >> > >   [24558.932965] RSP: 002b:7f3029a52af8 EFLAGS: 0206
> >> > > ORIG_RAX:
> >> > > 0010
> >> > >   [24558.933578] RAX: ffda RBX: 7f3027d861e3 RCX:
> >> > > 7f302849d8a7
> >> > >   [24558.933927] RDX: 7f3023f49468 RSI: 4004743a RDI:
> >> > > 3a67
> >> > >   [24558.934266] RBP: 7f3029a52b20 R08:  R09:
> >> > > 55c8308d8e40
> >> > >   [24558.934607] R10: 0008 R11: 0206 R12:
> >> > > 7f3023f49358
> >> > >   [24558.934947] R13: 7ffe86e5723f R14:  R15:
> >> > > 7f3029a53700
> >> > >   [24558.935288]
> >> > >   [24558.935626] Allocated by task 12622:
> >> > >   [24558.935972]  ppp_register_net_channel+0x5f/0x5c6
> >> > > [ppp_generic]
> >> > >   [24558.936306]  pppoe_connect+0xab7/0xc71 [pppoe]
> >> > >   [24558.936640]  SyS_connect+0x14b/0x1b7
> >> > >   [24558.936975]  do_syscall_64+0x1b1/0x31f
> >> > >   [24558.937319]  entry_SYSCALL_64_after_hwframe+0x21/0x86
> >> > >   [24558.937655]
> >> > >   [24558.937993] Freed by task 12622:
> >> > >   [24558.938321]  kfree+0xb0/0x11d
> >> > >   [24558.938658]  ppp_release+0x111/0x120 [ppp_generic]
> >> > >   [24558.938994]  __fput+0x2ba/0x51a
> >> > >   [24558.939332]  task_work_run+0x11c/0x13d
> >> > >   [24558.939676]  exit_to_usermode_loop+0x7c/0xaf
> >> > >   [24558.940022]  do_syscall_64+0x2ea/0x31f
> >> > >   [24558.940368]  entry_SYSCALL_64_after_hwframe+0x21/0x86
> >> > >   [24558.947099]
> >> >
> >> > Your first guess was right. It looks like we have an issue with
> >> > reference counting on the channels. Can you send me your ppp_generic.o?
> >> http://nuclearcat.com/ppp_generic.o
> >> Compiled with gcc version 6.4.0 (Gentoo 6.4.0-r1 p1.3)
> >>
> > From what I can see, ppp_release() and ioctl(PPPIOCCONNECT) are called
> > concurrently on the same ppp_file. Even if this ppp_file was pointed at
> > by two different file descriptors, I can't see how this could defeat
> > the reference counting mechanism. I'm going to think more about it.
> 
> For me it looks like pch->clist is not removed from the list ppp->channels
> when destroyed via ppp_release(). But I don't want to pretend I understand
> ppp logic.
> 
I've thought about that too, but couldn't find a scenario that could
trigger the bug.

To get ->private_data pointing to a struct channel pointer, a file needs
to ioctl(PPPIOCATTCHAN) first. For this call to succeed, the channel
must have been registered with ppp_register_net_channel(). Both
operations take a reference on the channel, which means that, before
adding pch->clist to a ppp->channels list (with ppp_connect_channel()),
the channel is 

Re: [RFC PATCH V2] virtio_pci: Add SR-IOV support

2018-02-22 Thread Christoph Hellwig
Can we move this into common code as a a generic_sriov_configure
helper?  Nothing is really virtio specific, and it seems like
some other drivers could also use it, e.g. ena or nvme.


Re: [PATCH bpf v2] bpf: fix rcu lockdep warning for lpm_trie map_free callback

2018-02-22 Thread Eric Dumazet
On Thu, 2018-02-22 at 10:10 -0800, Yonghong Song wrote:
> Commit 9a3efb6b661f ("bpf: fix memory leak in lpm_trie map_free callback 
> function")
> fixed a memory leak and removed unnecessary locks in map_free callback 
> function.
> Unfortrunately, it introduced a lockdep warning. When lockdep checking is 
> turned on,
> running tools/testing/selftests/bpf/test_lpm_map will have:
> 

> Fixes: 9a3efb6b661f ("bpf: fix memory leak in lpm_trie map_free callback 
> function")
> Reported-by: Eric Dumazet 
> Suggested-by: Eric Dumazet 
> Signed-off-by: Yonghong Song 
> ---
>  kernel/bpf/lpm_trie.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> v1 -> v2:
>  . fix sparse warning which is introduced by v1, suggested by Eric.
> 
> diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
> index a75e02c..b4b5b81 100644
> --- a/kernel/bpf/lpm_trie.c
> +++ b/kernel/bpf/lpm_trie.c
> @@ -569,8 +569,7 @@ static void trie_free(struct bpf_map *map)
>   slot = >root;
>  
>   for (;;) {
> - node = rcu_dereference_protected(*slot,
> - lockdep_is_held(>lock));
> + node = rcu_dereference_protected(*slot, 1);
>   if (!node)
>   goto out;

SGTM, thanks !

Reviewed-by: Eric Dumazet 




[PATCH bpf v2] bpf: fix rcu lockdep warning for lpm_trie map_free callback

2018-02-22 Thread Yonghong Song
Commit 9a3efb6b661f ("bpf: fix memory leak in lpm_trie map_free callback 
function")
fixed a memory leak and removed unnecessary locks in map_free callback function.
Unfortrunately, it introduced a lockdep warning. When lockdep checking is 
turned on,
running tools/testing/selftests/bpf/test_lpm_map will have:

  [   98.294321] =
  [   98.294807] WARNING: suspicious RCU usage
  [   98.295359] 4.16.0-rc2+ #193 Not tainted
  [   98.295907] -
  [   98.296486] /home/yhs/work/bpf/kernel/bpf/lpm_trie.c:572 suspicious 
rcu_dereference_check() usage!
  [   98.297657]
  [   98.297657] other info that might help us debug this:
  [   98.297657]
  [   98.298663]
  [   98.298663] rcu_scheduler_active = 2, debug_locks = 1
  [   98.299536] 2 locks held by kworker/2:1/54:
  [   98.300152]  #0:  ((wq_completion)"events"){+.+.}, at: 
[<196bc1f0>] process_one_work+0x157/0x5c0
  [   98.301381]  #1:  ((work_completion)(>work)){+.+.}, at: 
[<196bc1f0>] process_one_work+0x157/0x5c0

Since actual trie tree removal happens only after no other
accesses to the tree are possible, replacing
  rcu_dereference_protected(*slot, lockdep_is_held(>lock))
with
  rcu_dereference_protected(*slot, 1)
fixed the issue.

Fixes: 9a3efb6b661f ("bpf: fix memory leak in lpm_trie map_free callback 
function")
Reported-by: Eric Dumazet 
Suggested-by: Eric Dumazet 
Signed-off-by: Yonghong Song 
---
 kernel/bpf/lpm_trie.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

v1 -> v2:
 . fix sparse warning which is introduced by v1, suggested by Eric.

diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index a75e02c..b4b5b81 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -569,8 +569,7 @@ static void trie_free(struct bpf_map *map)
slot = >root;
 
for (;;) {
-   node = rcu_dereference_protected(*slot,
-   lockdep_is_held(>lock));
+   node = rcu_dereference_protected(*slot, 1);
if (!node)
goto out;
 
-- 
2.9.5



Re: [PATCH v2 net-next 1/2] lan743x: Add main source files for new lan743x driver

2018-02-22 Thread kbuild test robot
Hi Bryan,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on net-next/master]

url:
https://github.com/0day-ci/linux/commits/Bryan-Whitehead/lan743x-Add-new-lan743x-driver/20180222-225510
reproduce:
# apt-get install sparse
make ARCH=x86_64 allmodconfig
make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

>> drivers/net/ethernet/microchip/lan743x_main.c:68:5: sparse: symbol 
>> 'lan743x_csr_read' was not declared. Should it be
>> drivers/net/ethernet/microchip/lan743x_main.c:73:6: sparse: symbol 
>> 'lan743x_csr_write' was not declared. Should it be

Please review and possibly fold the followup patch.

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


[RFC PATCH] lan743x: lan743x_csr_read() can be static

2018-02-22 Thread kbuild test robot

Fixes: 896121de80db ("lan743x: Add main source files for new lan743x driver")
Signed-off-by: Fengguang Wu 
---
 lan743x_main.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c 
b/drivers/net/ethernet/microchip/lan743x_main.c
index 3de39e1..dd5ed86 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -65,12 +65,12 @@ static int lan743x_pci_init(struct lan743x_adapter *adapter,
return ret;
 }
 
-u32 lan743x_csr_read(struct lan743x_adapter *adapter, int offset)
+static u32 lan743x_csr_read(struct lan743x_adapter *adapter, int offset)
 {
return ioread32(>csr.csr_address[offset]);
 }
 
-void lan743x_csr_write(struct lan743x_adapter *adapter, int offset, u32 data)
+static void lan743x_csr_write(struct lan743x_adapter *adapter, int offset, u32 
data)
 {
iowrite32(data, >csr.csr_address[offset]);
 }


[Crypto v7 06/12] cxgb4: LLD driver changes to enable TLS

2018-02-22 Thread Atul Gupta
Read FW capability. Read key area size. Dump the TLS record count.

Signed-off-by: Atul Gupta 
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 32 +---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h  |  7 ++
 drivers/net/ethernet/chelsio/cxgb4/sge.c| 98 -
 3 files changed, 126 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 56bc626..ab5937e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4284,18 +4284,32 @@ static int adap_init0(struct adapter *adap)
adap->num_ofld_uld += 2;
}
if (caps_cmd.cryptocaps) {
-   /* Should query params here...TODO */
-   params[0] = FW_PARAM_PFVF(NCRYPTO_LOOKASIDE);
-   ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
- params, val);
-   if (ret < 0) {
-   if (ret != -EINVAL)
+   if (ntohs(caps_cmd.cryptocaps) &
+   FW_CAPS_CONFIG_CRYPTO_LOOKASIDE) {
+   params[0] = FW_PARAM_PFVF(NCRYPTO_LOOKASIDE);
+   ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+ 2, params, val);
+   if (ret < 0) {
+   if (ret != -EINVAL)
+   goto bye;
+   } else {
+   adap->vres.ncrypto_fc = val[0];
+   }
+   adap->num_ofld_uld += 1;
+   }
+   if (ntohs(caps_cmd.cryptocaps) &
+   FW_CAPS_CONFIG_TLS_INLINE) {
+   params[0] = FW_PARAM_PFVF(TLS_START);
+   params[1] = FW_PARAM_PFVF(TLS_END);
+   ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+ 2, params, val);
+   if (ret < 0)
goto bye;
-   } else {
-   adap->vres.ncrypto_fc = val[0];
+   adap->vres.key.start = val[0];
+   adap->vres.key.size = val[1] - val[0] + 1;
+   adap->num_uld += 1;
}
adap->params.crypto = ntohs(caps_cmd.cryptocaps);
-   adap->num_uld += 1;
}
 #undef FW_PARAM_PFVF
 #undef FW_PARAM_DEV
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index a14e8db..3d3ef3f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -237,6 +237,7 @@ enum cxgb4_uld {
CXGB4_ULD_ISCSI,
CXGB4_ULD_ISCSIT,
CXGB4_ULD_CRYPTO,
+   CXGB4_ULD_TLS,
CXGB4_ULD_MAX
 };
 
@@ -287,6 +288,7 @@ struct cxgb4_virt_res {  /* virtualized 
HW resources */
struct cxgb4_range qp;
struct cxgb4_range cq;
struct cxgb4_range ocq;
+   struct cxgb4_range key;
unsigned int ncrypto_fc;
 };
 
@@ -298,6 +300,9 @@ struct chcr_stats_debug {
atomic_t error;
atomic_t fallback;
atomic_t ipsec_cnt;
+   atomic_t tls_pdu_tx;
+   atomic_t tls_pdu_rx;
+   atomic_t tls_key;
 };
 
 #define OCQ_WIN_OFFSET(pdev, vres) \
@@ -378,6 +383,8 @@ struct cxgb4_uld_info {
 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
 int cxgb4_unregister_uld(enum cxgb4_uld type);
 int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
+int cxgb4_immdata_send(struct net_device *dev, unsigned int idx,
+  const void *src, unsigned int len);
 int cxgb4_crypto_send(struct net_device *dev, struct sk_buff *skb);
 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
 unsigned int cxgb4_port_chan(const struct net_device *dev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c 
b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 6e310a0..32e3779 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -1740,9 +1740,9 @@ static void txq_stop_maperr(struct sge_uld_txq *q)
  * Stops an offload Tx queue that has become full and modifies the packet
  * being written to request a wakeup.
  */
-static void ofldtxq_stop(struct sge_uld_txq *q, struct sk_buff *skb)
+static void ofldtxq_stop(struct sge_uld_txq *q, void *src)
 {
-   struct fw_wr_hdr *wr = (struct fw_wr_hdr *)skb->data;
+   struct fw_wr_hdr *wr = (struct fw_wr_hdr *)src;
 
wr->lo |= htonl(FW_WR_EQUEQ_F | FW_WR_EQUIQ_F);
q->q.stops++;
@@ -2005,6 +2005,100 @@ int cxgb4_ofld_send(struct net_device *dev, struct 
sk_buff *skb)
 }
 EXPORT_SYMBOL(cxgb4_ofld_send);
 
+static void 

[Crypto v7 12/12] Makefile Kconfig

2018-02-22 Thread Atul Gupta
Entry for Inline TLS as another driver dependent on cxgb4 and chcr

Signed-off-by: Atul Gupta 
---
 drivers/crypto/chelsio/Kconfig| 11 +++
 drivers/crypto/chelsio/Makefile   |  1 +
 drivers/crypto/chelsio/chtls/Makefile |  4 
 3 files changed, 16 insertions(+)
 create mode 100644 drivers/crypto/chelsio/chtls/Makefile

diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig
index 5ae9f87..930d82d 100644
--- a/drivers/crypto/chelsio/Kconfig
+++ b/drivers/crypto/chelsio/Kconfig
@@ -29,3 +29,14 @@ config CHELSIO_IPSEC_INLINE
 default n
 ---help---
   Enable support for IPSec Tx Inline.
+
+config CRYPTO_DEV_CHELSIO_TLS
+tristate "Chelsio Crypto Inline TLS Driver"
+depends on CHELSIO_T4
+depends on TLS
+select CRYPTO_DEV_CHELSIO
+---help---
+  Support Chelsio Inline TLS with Chelsio crypto accelerator.
+
+  To compile this driver as a module, choose M here: the module
+  will be called chtls.
diff --git a/drivers/crypto/chelsio/Makefile b/drivers/crypto/chelsio/Makefile
index eaecaf1..639e571 100644
--- a/drivers/crypto/chelsio/Makefile
+++ b/drivers/crypto/chelsio/Makefile
@@ -3,3 +3,4 @@ ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
 obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chcr.o
 chcr-objs :=  chcr_core.o chcr_algo.o
 chcr-$(CONFIG_CHELSIO_IPSEC_INLINE) += chcr_ipsec.o
+obj-$(CONFIG_CRYPTO_DEV_CHELSIO_TLS) += chtls/
diff --git a/drivers/crypto/chelsio/chtls/Makefile 
b/drivers/crypto/chelsio/chtls/Makefile
new file mode 100644
index 000..df13795
--- /dev/null
+++ b/drivers/crypto/chelsio/chtls/Makefile
@@ -0,0 +1,4 @@
+ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 -Idrivers/crypto/chelsio/
+
+obj-$(CONFIG_CRYPTO_DEV_CHELSIO_TLS) += chtls.o
+chtls-objs := chtls_main.o chtls_cm.o chtls_io.o chtls_hw.o
-- 
1.8.3.1



[Crypto v7 08/12] chtls: Key program

2018-02-22 Thread Atul Gupta
Program the tx and rx key on chip.

Signed-off-by: Atul Gupta 
---
 drivers/crypto/chelsio/chtls/chtls_hw.c | 394 
 1 file changed, 394 insertions(+)
 create mode 100644 drivers/crypto/chelsio/chtls/chtls_hw.c

diff --git a/drivers/crypto/chelsio/chtls/chtls_hw.c 
b/drivers/crypto/chelsio/chtls/chtls_hw.c
new file mode 100644
index 000..c3e17159
--- /dev/null
+++ b/drivers/crypto/chelsio/chtls/chtls_hw.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2017 Chelsio Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Written by: Atul Gupta (atul.gu...@chelsio.com)
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "chtls.h"
+#include "chtls_cm.h"
+
+static void __set_tcb_field_direct(struct chtls_sock *csk,
+  struct cpl_set_tcb_field *req, u16 word,
+  u64 mask, u64 val, u8 cookie, int no_reply)
+{
+   struct ulptx_idata *sc;
+
+   INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, csk->tid);
+   req->wr.wr_mid |= htonl(FW_WR_FLOWID_V(csk->tid));
+   req->reply_ctrl = htons(NO_REPLY_V(no_reply) |
+   QUEUENO_V(csk->rss_qid));
+   req->word_cookie = htons(TCB_WORD_V(word) | TCB_COOKIE_V(cookie));
+   req->mask = cpu_to_be64(mask);
+   req->val = cpu_to_be64(val);
+   sc = (struct ulptx_idata *)(req + 1);
+   sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
+   sc->len = htonl(0);
+}
+
+static void __set_tcb_field(struct sock *sk, struct sk_buff *skb, u16 word,
+   u64 mask, u64 val, u8 cookie, int no_reply)
+{
+   struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+   struct cpl_set_tcb_field *req;
+   struct ulptx_idata *sc;
+   unsigned int wrlen = roundup(sizeof(*req) + sizeof(*sc), 16);
+
+   req = (struct cpl_set_tcb_field *)__skb_put(skb, wrlen);
+   __set_tcb_field_direct(csk, req, word, mask, val, cookie, no_reply);
+   set_wr_txq(skb, CPL_PRIORITY_CONTROL, csk->port_id);
+}
+
+static int chtls_set_tcb_field(struct sock *sk, u16 word, u64 mask, u64 val)
+{
+   struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+   struct sk_buff *skb;
+   struct cpl_set_tcb_field *req;
+   struct ulptx_idata *sc;
+   unsigned int wrlen = roundup(sizeof(*req) + sizeof(*sc), 16);
+   unsigned int credits_needed = DIV_ROUND_UP(wrlen, 16);
+
+   skb = alloc_skb(wrlen, GFP_ATOMIC);
+   if (!skb)
+   return -ENOMEM;
+
+   __set_tcb_field(sk, skb, word, mask, val, 0, 1);
+   set_queue(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA, sk);
+   csk->wr_credits -= credits_needed;
+   csk->wr_unacked += credits_needed;
+   enqueue_wr(csk, skb);
+   cxgb4_ofld_send(csk->egress_dev, skb);
+   return 0;
+}
+
+/*
+ * Set one of the t_flags bits in the TCB.
+ */
+int chtls_set_tcb_tflag(struct sock *sk, unsigned int bit_pos, int val)
+{
+   return chtls_set_tcb_field(sk, 1, 1ULL << bit_pos,
+   val << bit_pos);
+}
+
+static int chtls_set_tcb_keyid(struct sock *sk, int keyid)
+{
+   return chtls_set_tcb_field(sk, 31, 0xULL, keyid);
+}
+
+static int chtls_set_tcb_seqno(struct sock *sk)
+{
+   return chtls_set_tcb_field(sk, 28, ~0ULL, 0);
+}
+
+static int chtls_set_tcb_quiesce(struct sock *sk, int val)
+{
+   return chtls_set_tcb_field(sk, 1, (1ULL << TF_RX_QUIESCE_S),
+  TF_RX_QUIESCE_V(val));
+}
+
+static void *chtls_alloc_mem(unsigned long size)
+{
+   void *p = kmalloc(size, GFP_KERNEL);
+
+   if (!p)
+   p = vmalloc(size);
+   if (p)
+   memset(p, 0, size);
+   return p;
+}
+
+static void chtls_free_mem(void *addr)
+{
+   unsigned long p = (unsigned long)addr;
+
+   if (p >= VMALLOC_START && p < VMALLOC_END)
+   vfree(addr);
+   else
+   kfree(addr);
+}
+
+/* TLS Key bitmap processing */
+int chtls_init_kmap(struct chtls_dev *cdev, struct cxgb4_lld_info *lldi)
+{
+   unsigned int num_key_ctx, bsize;
+
+   num_key_ctx = (lldi->vr->key.size / TLS_KEY_CONTEXT_SZ);
+   bsize = BITS_TO_LONGS(num_key_ctx);
+
+   cdev->kmap.size = num_key_ctx;
+   cdev->kmap.available = bsize;
+   cdev->kmap.addr = chtls_alloc_mem(sizeof(*cdev->kmap.addr) *
+ bsize);
+   if (!cdev->kmap.addr)
+   return -1;
+
+   cdev->kmap.start = lldi->vr->key.start;
+   spin_lock_init(>kmap.lock);
+   return 0;
+}
+
+void chtls_free_kmap(struct chtls_dev *cdev)
+{
+   if (cdev->kmap.addr)
+   chtls_free_mem(cdev->kmap.addr);
+}
+
+static int 

[Crypto v7 05/12] cxgb4: Inline TLS FW Interface

2018-02-22 Thread Atul Gupta
Key area size in hw-config file. CPL struct for TLS request
and response. Work request for Inline TLS.

Signed-off-by: Atul Gupta 
---
 drivers/net/ethernet/chelsio/cxgb4/t4_msg.h   | 121 ++-
 drivers/net/ethernet/chelsio/cxgb4/t4_regs.h  |   2 +
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 165 +-
 3 files changed, 283 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h 
b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index d0db442..507cb5a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -81,6 +81,7 @@ enum {
CPL_RX_ISCSI_CMP  = 0x45,
CPL_TRACE_PKT_T5  = 0x48,
CPL_RX_ISCSI_DDP  = 0x49,
+   CPL_RX_TLS_CMP= 0x4E,
 
CPL_RDMA_READ_REQ = 0x60,
 
@@ -88,6 +89,7 @@ enum {
CPL_ACT_OPEN_REQ6 = 0x83,
 
CPL_TX_TLS_PDU =0x88,
+   CPL_TX_TLS_SFO= 0x89,
CPL_TX_SEC_PDU= 0x8A,
CPL_TX_TLS_ACK= 0x8B,
 
@@ -97,6 +99,7 @@ enum {
CPL_RX_MPS_PKT= 0xAF,
 
CPL_TRACE_PKT = 0xB0,
+   CPL_TLS_DATA  = 0xB1,
CPL_ISCSI_DATA= 0xB2,
 
CPL_FW4_MSG   = 0xC0,
@@ -151,6 +154,7 @@ enum {
ULP_MODE_RDMA  = 4,
ULP_MODE_TCPDDP= 5,
ULP_MODE_FCOE  = 6,
+   ULP_MODE_TLS   = 8,
 };
 
 enum {
@@ -1415,6 +1419,14 @@ struct cpl_tx_data {
 #define TX_FORCE_S 13
 #define TX_FORCE_V(x)  ((x) << TX_FORCE_S)
 
+#define TX_SHOVE_S14
+#define TX_SHOVE_V(x) ((x) << TX_SHOVE_S)
+
+#define TX_ULP_MODE_S10
+#define TX_ULP_MODE_M0x7
+#define TX_ULP_MODE_V(x) ((x) << TX_ULP_MODE_S)
+#define TX_ULP_MODE_G(x) (((x) >> TX_ULP_MODE_S) & TX_ULP_MODE_M)
+
 #define T6_TX_FORCE_S  20
 #define T6_TX_FORCE_V(x)   ((x) << T6_TX_FORCE_S)
 #define T6_TX_FORCE_F  T6_TX_FORCE_V(1U)
@@ -1429,12 +1441,21 @@ enum {
ULP_TX_SC_NOOP = 0x80,
ULP_TX_SC_IMM  = 0x81,
ULP_TX_SC_DSGL = 0x82,
-   ULP_TX_SC_ISGL = 0x83
+   ULP_TX_SC_ISGL = 0x83,
+   ULP_TX_SC_MEMRD = 0x86
 };
 
 #define ULPTX_CMD_S24
 #define ULPTX_CMD_V(x) ((x) << ULPTX_CMD_S)
 
+#define ULPTX_LEN16_S0
+#define ULPTX_LEN16_M0xFF
+#define ULPTX_LEN16_V(x) ((x) << ULPTX_LEN16_S)
+
+#define ULP_TX_SC_MORE_S 23
+#define ULP_TX_SC_MORE_V(x) ((x) << ULP_TX_SC_MORE_S)
+#define ULP_TX_SC_MORE_F  ULP_TX_SC_MORE_V(1U)
+
 struct ulptx_sge_pair {
__be32 len[2];
__be64 addr[2];
@@ -2112,4 +2133,102 @@ enum {
X_CPL_RX_MPS_PKT_TYPE_QFC   = 1 << 2,
X_CPL_RX_MPS_PKT_TYPE_PTP   = 1 << 3
 };
+
+struct cpl_tx_tls_sfo {
+   __be32 op_to_seg_len;
+   __be32 pld_len;
+   __be32 type_protover;
+   __be32 r1_lo;
+   __be32 seqno_numivs;
+   __be32 ivgen_hdrlen;
+   __be64 scmd1;
+};
+
+/* cpl_tx_tls_sfo macros */
+#define CPL_TX_TLS_SFO_OPCODE_S 24
+#define CPL_TX_TLS_SFO_OPCODE_V(x)  ((x) << CPL_TX_TLS_SFO_OPCODE_S)
+
+#define CPL_TX_TLS_SFO_DATA_TYPE_S  20
+#define CPL_TX_TLS_SFO_DATA_TYPE_V(x)   ((x) << CPL_TX_TLS_SFO_DATA_TYPE_S)
+
+#define CPL_TX_TLS_SFO_CPL_LEN_S16
+#define CPL_TX_TLS_SFO_CPL_LEN_V(x) ((x) << CPL_TX_TLS_SFO_CPL_LEN_S)
+
+#define CPL_TX_TLS_SFO_SEG_LEN_S0
+#define CPL_TX_TLS_SFO_SEG_LEN_M0x
+#define CPL_TX_TLS_SFO_SEG_LEN_V(x) ((x) << CPL_TX_TLS_SFO_SEG_LEN_S)
+#define CPL_TX_TLS_SFO_SEG_LEN_G(x) \
+   (((x) >> CPL_TX_TLS_SFO_SEG_LEN_S) & CPL_TX_TLS_SFO_SEG_LEN_M)
+
+#define CPL_TX_TLS_SFO_TYPE_S   24
+#define CPL_TX_TLS_SFO_TYPE_M   0xff
+#define CPL_TX_TLS_SFO_TYPE_V(x)((x) << CPL_TX_TLS_SFO_TYPE_S)
+#define CPL_TX_TLS_SFO_TYPE_G(x)\
+   (((x) >> CPL_TX_TLS_SFO_TYPE_S) & CPL_TX_TLS_SFO_TYPE_M)
+
+#define CPL_TX_TLS_SFO_PROTOVER_S   8
+#define CPL_TX_TLS_SFO_PROTOVER_M   0x
+#define CPL_TX_TLS_SFO_PROTOVER_V(x)((x) << CPL_TX_TLS_SFO_PROTOVER_S)
+#define CPL_TX_TLS_SFO_PROTOVER_G(x)\
+   (((x) >> CPL_TX_TLS_SFO_PROTOVER_S) & CPL_TX_TLS_SFO_PROTOVER_M)
+
+struct cpl_tls_data {
+   struct rss_header rsshdr;
+   union opcode_tid ot;
+   __be32 length_pkd;
+   __be32 seq;
+   __be32 r1;
+};
+
+#define CPL_TLS_DATA_OPCODE_S   24
+#define CPL_TLS_DATA_OPCODE_M   0xff
+#define CPL_TLS_DATA_OPCODE_V(x)((x) << CPL_TLS_DATA_OPCODE_S)
+#define CPL_TLS_DATA_OPCODE_G(x)\
+   (((x) >> CPL_TLS_DATA_OPCODE_S) & CPL_TLS_DATA_OPCODE_M)
+
+#define CPL_TLS_DATA_TID_S  0
+#define CPL_TLS_DATA_TID_M  0xff
+#define CPL_TLS_DATA_TID_V(x)   ((x) << CPL_TLS_DATA_TID_S)
+#define CPL_TLS_DATA_TID_G(x)   \
+   (((x) >> CPL_TLS_DATA_TID_S) & CPL_TLS_DATA_TID_M)
+
+#define CPL_TLS_DATA_LENGTH_S   0
+#define CPL_TLS_DATA_LENGTH_M   0x
+#define 

[Crypto v7 10/12] chtls: Inline crypto request Tx/Rx

2018-02-22 Thread Atul Gupta
TLS handler for record transmit and receive.
Create Inline TLS work request and post to FW.

Signed-off-by: Atul Gupta 
---
 drivers/crypto/chelsio/chtls/chtls_io.c | 1867 +++
 1 file changed, 1867 insertions(+)
 create mode 100644 drivers/crypto/chelsio/chtls/chtls_io.c

diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c 
b/drivers/crypto/chelsio/chtls/chtls_io.c
new file mode 100644
index 000..0c5d6c1
--- /dev/null
+++ b/drivers/crypto/chelsio/chtls/chtls_io.c
@@ -0,0 +1,1867 @@
+/*
+ * Copyright (c) 2017 Chelsio Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Written by: Atul Gupta (atul.gu...@chelsio.com)
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "chtls.h"
+#include "chtls_cm.h"
+
+static bool is_tls_hw(struct chtls_sock *csk)
+{
+   return csk->tlshws.ofld;
+}
+
+static bool is_tls_rx(struct chtls_sock *csk)
+{
+   return (csk->tlshws.rxkey >= 0);
+}
+
+static bool is_tls_tx(struct chtls_sock *csk)
+{
+   return (csk->tlshws.txkey >= 0);
+}
+
+static bool is_tls_skb(struct chtls_sock *csk, const struct sk_buff *skb)
+{
+   return (is_tls_hw(csk) && skb_ulp_tls_skb_flags(skb));
+}
+
+static int key_size(void *sk)
+{
+   return 16; /* Key on DDR */
+}
+
+#define ceil(x, y) \
+   ({ unsigned long __x = (x), __y = (y); (__x + __y - 1) / __y; })
+
+static int data_sgl_len(const struct sk_buff *skb)
+{
+   unsigned int cnt;
+
+   cnt = skb_shinfo(skb)->nr_frags;
+   return (sgl_len(cnt) * 8);
+}
+
+static int nos_ivs(struct sock *sk, unsigned int size)
+{
+   struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+   return ceil(size, csk->tlshws.mfs);
+}
+
+#define TLS_WR_CPL_LEN \
+   (sizeof(struct fw_tlstx_data_wr) + \
+   sizeof(struct cpl_tx_tls_sfo))
+
+static int is_ivs_imm(struct sock *sk, const struct sk_buff *skb)
+{
+   int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE;
+   int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb);
+
+   if ((hlen + key_size(sk) + ivs_size) <
+   MAX_IMM_OFLD_TX_DATA_WR_LEN) {
+   ULP_SKB_CB(skb)->ulp.tls.iv = 1;
+   return 1;
+   }
+   ULP_SKB_CB(skb)->ulp.tls.iv = 0;
+   return 0;
+}
+
+static int max_ivs_size(struct sock *sk, int size)
+{
+   return (nos_ivs(sk, size) * CIPHER_BLOCK_SIZE);
+}
+
+static int ivs_size(struct sock *sk, const struct sk_buff *skb)
+{
+   return (is_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) *
+CIPHER_BLOCK_SIZE) : 0);
+}
+
+static int flowc_wr_credits(int nparams, int *flowclenp)
+{
+   int flowclen16, flowclen;
+
+   flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
+   flowclen16 = DIV_ROUND_UP(flowclen, 16);
+   flowclen = flowclen16 * 16;
+
+   if (flowclenp)
+   *flowclenp = flowclen;
+
+   return flowclen16;
+}
+
+static struct sk_buff *create_flowc_wr_skb(struct sock *sk,
+  struct fw_flowc_wr *flowc,
+  int flowclen)
+{
+   struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+   struct sk_buff *skb;
+
+   skb = alloc_skb(flowclen, GFP_ATOMIC);
+   if (!skb)
+   return NULL;
+
+   memcpy(__skb_put(skb, flowclen), flowc, flowclen);
+   set_queue(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA, sk);
+
+   return skb;
+}
+
+static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc,
+int flowclen)
+{
+   struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+   bool syn_sent = (sk->sk_state == TCP_SYN_SENT);
+   struct tcp_sock *tp = tcp_sk(sk);
+   int flowclen16 = flowclen / 16;
+   struct sk_buff *skb;
+
+   if (csk_flag(sk, CSK_TX_DATA_SENT)) {
+   skb = create_flowc_wr_skb(sk, flowc, flowclen);
+   if (!skb)
+   return -ENOMEM;
+
+   if (syn_sent)
+   __skb_queue_tail(>ooo_queue, skb);
+   else
+   skb_entail(sk, skb,
+  ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
+   return 0;
+   }
+
+   if (!syn_sent) {
+   int ret;
+
+   ret = cxgb4_immdata_send(csk->egress_dev,
+csk->txq_idx,
+flowc, flowclen);
+   if (!ret)
+   return flowclen16;
+   }
+   skb = create_flowc_wr_skb(sk, flowc, flowclen);
+   if (!skb)
+   return -ENOMEM;
+   send_or_defer(sk, tp, skb, 0);
+   return flowclen16;
+}
+
+static u8 

[RFC PATCH V2] virtio_pci: Add SR-IOV support

2018-02-22 Thread Mark Rustad
Hardware-realized virtio-pci devices can implement SR-IOV, so this
patch enables its use. The device in question is an upcoming Intel
NIC that implements both a virtio-net PF and virtio-net VFs. These
are hardware realizations of what has been up to now been a software
interface.

The device in question has the following 4-part PCI IDs:

PF: device: 1af4 vendor: 1041 subvendor: 8086 subdevice: 15fe
VF: device: 1af4 vendor: 1041 subvendor: 8086 subdevice: 05fe

The patch needs no check for device ID, because the callback will
never be made for devices that do not assert the capability or
when run on a platform incapable of SR-IOV.

One reason for this patch is because the hardware requires the
vendor ID of a VF to be the same as the vendor ID of the PF that
created it. So it seemed logical to simply have a fully-functioning
virtio-net PF create the VFs. This patch makes that possible.

Signed-off-by: Mark Rustad 
Reviewed-by: Alexander Duyck 
---
Changes in V2:
- Simplified logic from previous version, removed added driver variable
- Disable SR-IOV on driver removal excapt when VFs are assigned
- Sent as RFC to virtio-dev, linux-pci, netdev, lkml and others
---
 drivers/virtio/virtio_pci_common.c |   47 
 1 file changed, 47 insertions(+)

diff --git a/drivers/virtio/virtio_pci_common.c 
b/drivers/virtio/virtio_pci_common.c
index 48d4d1cf1cb6..78b53ffc4cee 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -572,6 +572,47 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
return rc;
 }
 
+#ifdef CONFIG_PCI_IOV
+static int virtio_pci_sriov_disable(struct pci_dev *pci_dev)
+{
+   /* If vfs are assigned we cannot shut down SR-IOV without causing
+* issues, so just leave the hardware available.
+*/
+   if (pci_vfs_assigned(pci_dev)) {
+   dev_warn(_dev->dev,
+"Unloading driver while VFs are assigned - VFs will 
not be deallocated\n");
+   return -EPERM;
+   }
+   pci_disable_sriov(pci_dev);
+   return 0;
+}
+
+static int virtio_pci_sriov_enable(struct pci_dev *pci_dev, int num_vfs)
+{
+   int rc = 0;
+
+   if (pci_num_vf(pci_dev))
+   return -EINVAL;
+
+   rc = pci_enable_sriov(pci_dev, num_vfs);
+   if (rc) {
+   dev_warn(_dev->dev, "Failed to enable PCI sriov: %d\n", rc);
+   return rc;
+   }
+   dev_info(_dev->dev, "SR-IOV enabled with %d VFs\n", num_vfs);
+   return num_vfs;
+}
+
+static int virtio_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
+{
+   if (num_vfs)
+   return virtio_pci_sriov_enable(dev, num_vfs);
+   if (!pci_num_vf(dev))
+   return -EINVAL;
+   return virtio_pci_sriov_disable(dev);
+}
+#endif /* CONFIG_PCI_IOV */
+
 static void virtio_pci_remove(struct pci_dev *pci_dev)
 {
struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
@@ -584,6 +625,9 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
else
virtio_pci_modern_remove(vp_dev);
 
+#ifdef CONFIG_PCI_IOV
+   virtio_pci_sriov_disable(pci_dev);
+#endif
pci_disable_device(pci_dev);
put_device(dev);
 }
@@ -596,6 +640,9 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
 #ifdef CONFIG_PM_SLEEP
.driver.pm  = _pci_pm_ops,
 #endif
+#ifdef CONFIG_PCI_IOV
+   .sriov_configure = virtio_pci_sriov_configure,
+#endif
 };
 
 module_pci_driver(virtio_pci_driver);



[Crypto v7 11/12] chtls: Register chtls Inline TLS with net tls

2018-02-22 Thread Atul Gupta
Register chtls as Inline TLS driver, chtls is ULD to cxgb4.
Setsockopt to program (tx/rx) keys on chip. Support AES GCM
of key size 128. Support both Inline Rx and Tx.

Signed-off-by: Atul Gupta 
---
 drivers/crypto/chelsio/chtls/chtls_main.c | 600 ++
 include/uapi/linux/tls.h  |   1 +
 2 files changed, 601 insertions(+)
 create mode 100644 drivers/crypto/chelsio/chtls/chtls_main.c

diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c 
b/drivers/crypto/chelsio/chtls/chtls_main.c
new file mode 100644
index 000..657c515
--- /dev/null
+++ b/drivers/crypto/chelsio/chtls/chtls_main.c
@@ -0,0 +1,600 @@
+/*
+ * Copyright (c) 2017 Chelsio Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Written by: Atul Gupta (atul.gu...@chelsio.com)
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "chtls.h"
+#include "chtls_cm.h"
+
+#define DRV_NAME "chtls"
+
+/*
+ * chtls device management
+ * maintains a list of the chtls devices
+ */
+static LIST_HEAD(cdev_list);
+static DEFINE_MUTEX(cdev_mutex);
+static DEFINE_MUTEX(cdev_list_lock);
+
+static struct proto chtls_cpl_prot;
+static struct proto chtls_base_prot;
+static DEFINE_MUTEX(notify_mutex);
+static RAW_NOTIFIER_HEAD(listen_notify_list);
+struct request_sock_ops chtls_rsk_ops;
+static uint send_page_order = (14 - PAGE_SHIFT < 0) ? 0 : 14 - PAGE_SHIFT;
+
+static int register_listen_notifier(struct notifier_block *nb)
+{
+   int err;
+
+   mutex_lock(_mutex);
+   err = raw_notifier_chain_register(_notify_list, nb);
+   mutex_unlock(_mutex);
+   return err;
+}
+
+static int unregister_listen_notifier(struct notifier_block *nb)
+{
+   int err;
+
+   mutex_lock(_mutex);
+   err = raw_notifier_chain_unregister(_notify_list, nb);
+   mutex_unlock(_mutex);
+   return err;
+}
+
+static int listen_notify_handler(struct notifier_block *this,
+unsigned long event, void *data)
+{
+   struct sock *sk = data;
+   struct chtls_dev *cdev;
+   int ret =  NOTIFY_DONE;
+
+   switch (event) {
+   case CHTLS_LISTEN_START:
+   case CHTLS_LISTEN_STOP:
+   mutex_lock(_list_lock);
+   list_for_each_entry(cdev, _list, list) {
+   if (event == CHTLS_LISTEN_START)
+   ret = chtls_listen_start(cdev, sk);
+   else
+   chtls_listen_stop(cdev, sk);
+   }
+   mutex_unlock(_list_lock);
+   break;
+   }
+   return ret;
+}
+
+static struct notifier_block listen_notifier = {
+   .notifier_call = listen_notify_handler
+};
+
+static int listen_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+   if (likely(skb_transport_header(skb) != skb_network_header(skb)))
+   return tcp_v4_do_rcv(sk, skb);
+   BLOG_SKB_CB(skb)->backlog_rcv(sk, skb);
+   return 0;
+}
+
+static int chtls_start_listen(struct sock *sk)
+{
+   int err;
+
+   if (sk->sk_protocol != IPPROTO_TCP)
+   return -EPROTONOSUPPORT;
+
+   if (sk->sk_family == PF_INET &&
+   LOOPBACK(inet_sk(sk)->inet_rcv_saddr))
+   return -EADDRNOTAVAIL;
+
+   sk->sk_backlog_rcv = listen_backlog_rcv;
+   mutex_lock(_mutex);
+   err = raw_notifier_call_chain(_notify_list,
+ CHTLS_LISTEN_START, sk);
+   mutex_unlock(_mutex);
+   return err;
+}
+
+static int chtls_hash(struct sock *sk)
+{
+   int err;
+
+   err = tcp_prot.hash(sk);
+   if (sk->sk_state == TCP_LISTEN)
+   err |= chtls_start_listen(sk);
+
+   if (err)
+   tcp_prot.unhash(sk);
+   return err;
+}
+
+static int chtls_stop_listen(struct sock *sk)
+{
+   if (sk->sk_protocol != IPPROTO_TCP)
+   return -EPROTONOSUPPORT;
+
+   mutex_lock(_mutex);
+   raw_notifier_call_chain(_notify_list,
+   CHTLS_LISTEN_STOP, sk);
+   mutex_unlock(_mutex);
+   return 0;
+}
+
+static void chtls_unhash(struct sock *sk)
+{
+   if (sk->sk_state == TCP_LISTEN)
+   chtls_stop_listen(sk);
+   tcp_prot.unhash(sk);
+}
+
+static int chtls_netdev(struct tls_device *dev,
+   struct net_device *netdev)
+{
+   struct chtls_dev *cdev = to_chtls_dev(dev);
+   int i;
+
+   for (i = 0; i < cdev->lldi->nports; i++)
+   if (cdev->ports[i] == netdev)
+   return 1;
+
+   return 0;
+}
+
+static int chtls_inline_feature(struct tls_device *dev)
+{
+   struct chtls_dev *cdev = to_chtls_dev(dev);
+   struct net_device *netdev;
+   int i;
+
+ 

[Crypto v7 09/12] chtls: CPL handler definition

2018-02-22 Thread Atul Gupta
CPL handlers for TLS session, record transmit and receive.

Signed-off-by: Atul Gupta 
---
 drivers/crypto/chelsio/chtls/chtls_cm.c | 2041 +++
 net/ipv4/tcp_minisocks.c|1 +
 2 files changed, 2042 insertions(+)
 create mode 100644 drivers/crypto/chelsio/chtls/chtls_cm.c

diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c 
b/drivers/crypto/chelsio/chtls/chtls_cm.c
new file mode 100644
index 000..1c95e87
--- /dev/null
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
@@ -0,0 +1,2041 @@
+/*
+ * Copyright (c) 2017 Chelsio Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Written by: Atul Gupta (atul.gu...@chelsio.com)
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "chtls.h"
+#include "chtls_cm.h"
+
+extern struct request_sock_ops chtls_rsk_ops;
+
+/*
+ * State transitions and actions for close.  Note that if we are in SYN_SENT
+ * we remain in that state as we cannot control a connection while it's in
+ * SYN_SENT; such connections are allowed to establish and are then aborted.
+ */
+static unsigned char new_state[16] = {
+   /* current state: new state:  action: */
+   /* (Invalid)   */ TCP_CLOSE,
+   /* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+   /* TCP_SYN_SENT*/ TCP_SYN_SENT,
+   /* TCP_SYN_RECV*/ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+   /* TCP_FIN_WAIT1   */ TCP_FIN_WAIT1,
+   /* TCP_FIN_WAIT2   */ TCP_FIN_WAIT2,
+   /* TCP_TIME_WAIT   */ TCP_CLOSE,
+   /* TCP_CLOSE   */ TCP_CLOSE,
+   /* TCP_CLOSE_WAIT  */ TCP_LAST_ACK | TCP_ACTION_FIN,
+   /* TCP_LAST_ACK*/ TCP_LAST_ACK,
+   /* TCP_LISTEN  */ TCP_CLOSE,
+   /* TCP_CLOSING */ TCP_CLOSING,
+};
+
+static struct chtls_sock *chtls_sock_create(struct chtls_dev *cdev)
+{
+   struct chtls_sock *csk = kzalloc(sizeof(*csk), GFP_ATOMIC);
+
+   if (!csk)
+   return NULL;
+
+   csk->txdata_skb_cache = alloc_skb(TXDATA_SKB_LEN, GFP_ATOMIC);
+   if (!csk->txdata_skb_cache) {
+   kfree(csk);
+   return NULL;
+   }
+
+   kref_init(>kref);
+   csk->cdev = cdev;
+   skb_queue_head_init(>txq);
+   csk->wr_skb_head = NULL;
+   csk->wr_skb_tail = NULL;
+   csk->mss = MAX_MSS;
+   csk->tlshws.ofld = 1;
+   csk->tlshws.txkey = -1;
+   csk->tlshws.rxkey = -1;
+   csk->tlshws.mfs = TLS_MFS;
+   skb_queue_head_init(>tlshws.sk_recv_queue);
+   return csk;
+}
+
+static void chtls_sock_release(struct kref *ref)
+{
+   struct chtls_sock *csk =
+   container_of(ref, struct chtls_sock, kref);
+
+   kfree(csk);
+}
+
+static struct net_device *chtls_ipv4_netdev(struct chtls_dev *cdev,
+   struct sock *sk)
+{
+   struct net_device *ndev = cdev->ports[0];
+
+   if (likely(!inet_sk(sk)->inet_rcv_saddr))
+   return ndev;
+
+   ndev = ip_dev_find(_net, inet_sk(sk)->inet_rcv_saddr);
+   if (!ndev)
+   return NULL;
+
+   if (is_vlan_dev(ndev))
+   return vlan_dev_real_dev(ndev);
+   return ndev;
+}
+
+static void assign_rxopt(struct sock *sk, unsigned int opt)
+{
+   struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+   struct tcp_sock *tp = tcp_sk(sk);
+   const struct chtls_dev *cdev;
+
+   cdev = csk->cdev;
+   tp->tcp_header_len   = sizeof(struct tcphdr);
+   tp->rx_opt.mss_clamp = cdev->mtus[TCPOPT_MSS_G(opt)] - 40;
+   tp->mss_cache= tp->rx_opt.mss_clamp;
+   tp->rx_opt.tstamp_ok = TCPOPT_TSTAMP_G(opt);
+   tp->rx_opt.snd_wscale= TCPOPT_SACK_G(opt);
+   tp->rx_opt.wscale_ok = TCPOPT_WSCALE_OK_G(opt);
+   SND_WSCALE(tp)   = TCPOPT_SND_WSCALE_G(opt);
+   if (!tp->rx_opt.wscale_ok)
+   tp->rx_opt.rcv_wscale = 0;
+   if (tp->rx_opt.tstamp_ok) {
+   tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
+   tp->rx_opt.mss_clamp -= TCPOLEN_TSTAMP_ALIGNED;
+   } else if (csk->opt2 & TSTAMPS_EN_F) {
+   csk->opt2 &= ~TSTAMPS_EN_F;
+   csk->mtu_idx = TCPOPT_MSS_G(opt);
+   }
+}
+
+static void chtls_purge_rcv_queue(struct sock *sk)
+{
+   struct sk_buff *skb;
+
+   while ((skb = __skb_dequeue(>sk_receive_queue)) != NULL) {
+   skb_dst_set(skb, (void *)NULL);
+   kfree_skb(skb);
+   }
+}
+
+static void chtls_purge_write_queue(struct sock *sk)
+{
+   struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+   struct sk_buff *skb;
+
+   while ((skb = __skb_dequeue(>txq))) {
+

[Crypto v7 07/12] chcr: Key Macro

2018-02-22 Thread Atul Gupta
Define macro for TLS Key context

Signed-off-by: Atul Gupta 
---
 drivers/crypto/chelsio/chcr_algo.h | 42 +
 drivers/crypto/chelsio/chcr_core.h | 55 +-
 2 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.h 
b/drivers/crypto/chelsio/chcr_algo.h
index d1673a5..f263cd4 100644
--- a/drivers/crypto/chelsio/chcr_algo.h
+++ b/drivers/crypto/chelsio/chcr_algo.h
@@ -86,6 +86,39 @@
 KEY_CONTEXT_OPAD_PRESENT_M)
 #define KEY_CONTEXT_OPAD_PRESENT_F  KEY_CONTEXT_OPAD_PRESENT_V(1U)
 
+#define TLS_KEYCTX_RXFLIT_CNT_S 24
+#define TLS_KEYCTX_RXFLIT_CNT_V(x) ((x) << TLS_KEYCTX_RXFLIT_CNT_S)
+
+#define TLS_KEYCTX_RXPROT_VER_S 20
+#define TLS_KEYCTX_RXPROT_VER_M 0xf
+#define TLS_KEYCTX_RXPROT_VER_V(x) ((x) << TLS_KEYCTX_RXPROT_VER_S)
+
+#define TLS_KEYCTX_RXCIPH_MODE_S 16
+#define TLS_KEYCTX_RXCIPH_MODE_M 0xf
+#define TLS_KEYCTX_RXCIPH_MODE_V(x) ((x) << TLS_KEYCTX_RXCIPH_MODE_S)
+
+#define TLS_KEYCTX_RXAUTH_MODE_S 12
+#define TLS_KEYCTX_RXAUTH_MODE_M 0xf
+#define TLS_KEYCTX_RXAUTH_MODE_V(x) ((x) << TLS_KEYCTX_RXAUTH_MODE_S)
+
+#define TLS_KEYCTX_RXCIAU_CTRL_S 11
+#define TLS_KEYCTX_RXCIAU_CTRL_V(x) ((x) << TLS_KEYCTX_RXCIAU_CTRL_S)
+
+#define TLS_KEYCTX_RX_SEQCTR_S 9
+#define TLS_KEYCTX_RX_SEQCTR_M 0x3
+#define TLS_KEYCTX_RX_SEQCTR_V(x) ((x) << TLS_KEYCTX_RX_SEQCTR_S)
+
+#define TLS_KEYCTX_RX_VALID_S 8
+#define TLS_KEYCTX_RX_VALID_V(x) ((x) << TLS_KEYCTX_RX_VALID_S)
+
+#define TLS_KEYCTX_RXCK_SIZE_S 3
+#define TLS_KEYCTX_RXCK_SIZE_M 0x7
+#define TLS_KEYCTX_RXCK_SIZE_V(x) ((x) << TLS_KEYCTX_RXCK_SIZE_S)
+
+#define TLS_KEYCTX_RXMK_SIZE_S 0
+#define TLS_KEYCTX_RXMK_SIZE_M 0x7
+#define TLS_KEYCTX_RXMK_SIZE_V(x) ((x) << TLS_KEYCTX_RXMK_SIZE_S)
+
 #define CHCR_HASH_MAX_DIGEST_SIZE 64
 #define CHCR_MAX_SHA_DIGEST_SIZE 64
 
@@ -176,6 +209,15 @@
  KEY_CONTEXT_SALT_PRESENT_V(1) | \
  KEY_CONTEXT_CTX_LEN_V((ctx_len)))
 
+#define  FILL_KEY_CRX_HDR(ck_size, mk_size, d_ck, opad, ctx_len) \
+   htonl(TLS_KEYCTX_RXMK_SIZE_V(mk_size) | \
+ TLS_KEYCTX_RXCK_SIZE_V(ck_size) | \
+ TLS_KEYCTX_RX_VALID_V(1) | \
+ TLS_KEYCTX_RX_SEQCTR_V(3) | \
+ TLS_KEYCTX_RXAUTH_MODE_V(4) | \
+ TLS_KEYCTX_RXCIPH_MODE_V(2) | \
+ TLS_KEYCTX_RXFLIT_CNT_V((ctx_len)))
+
 #define FILL_WR_OP_CCTX_SIZE \
htonl( \
FW_CRYPTO_LOOKASIDE_WR_OPCODE_V( \
diff --git a/drivers/crypto/chelsio/chcr_core.h 
b/drivers/crypto/chelsio/chcr_core.h
index 3c29ee0..77056a9 100644
--- a/drivers/crypto/chelsio/chcr_core.h
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -65,10 +65,58 @@
 struct _key_ctx {
__be32 ctx_hdr;
u8 salt[MAX_SALT];
-   __be64 reserverd;
+   __be64 iv_to_auth;
unsigned char key[0];
 };
 
+#define KEYCTX_TX_WR_IV_S  55
+#define KEYCTX_TX_WR_IV_M  0x1ffULL
+#define KEYCTX_TX_WR_IV_V(x) ((x) << KEYCTX_TX_WR_IV_S)
+#define KEYCTX_TX_WR_IV_G(x) \
+   (((x) >> KEYCTX_TX_WR_IV_S) & KEYCTX_TX_WR_IV_M)
+
+#define KEYCTX_TX_WR_AAD_S 47
+#define KEYCTX_TX_WR_AAD_M 0xffULL
+#define KEYCTX_TX_WR_AAD_V(x) ((x) << KEYCTX_TX_WR_AAD_S)
+#define KEYCTX_TX_WR_AAD_G(x) (((x) >> KEYCTX_TX_WR_AAD_S) & \
+   KEYCTX_TX_WR_AAD_M)
+
+#define KEYCTX_TX_WR_AADST_S 39
+#define KEYCTX_TX_WR_AADST_M 0xffULL
+#define KEYCTX_TX_WR_AADST_V(x) ((x) << KEYCTX_TX_WR_AADST_S)
+#define KEYCTX_TX_WR_AADST_G(x) \
+   (((x) >> KEYCTX_TX_WR_AADST_S) & KEYCTX_TX_WR_AADST_M)
+
+#define KEYCTX_TX_WR_CIPHER_S 30
+#define KEYCTX_TX_WR_CIPHER_M 0x1ffULL
+#define KEYCTX_TX_WR_CIPHER_V(x) ((x) << KEYCTX_TX_WR_CIPHER_S)
+#define KEYCTX_TX_WR_CIPHER_G(x) \
+   (((x) >> KEYCTX_TX_WR_CIPHER_S) & KEYCTX_TX_WR_CIPHER_M)
+
+#define KEYCTX_TX_WR_CIPHERST_S 23
+#define KEYCTX_TX_WR_CIPHERST_M 0x7f
+#define KEYCTX_TX_WR_CIPHERST_V(x) ((x) << KEYCTX_TX_WR_CIPHERST_S)
+#define KEYCTX_TX_WR_CIPHERST_G(x) \
+   (((x) >> KEYCTX_TX_WR_CIPHERST_S) & KEYCTX_TX_WR_CIPHERST_M)
+
+#define KEYCTX_TX_WR_AUTH_S 14
+#define KEYCTX_TX_WR_AUTH_M 0x1ff
+#define KEYCTX_TX_WR_AUTH_V(x) ((x) << KEYCTX_TX_WR_AUTH_S)
+#define KEYCTX_TX_WR_AUTH_G(x) \
+   (((x) >> KEYCTX_TX_WR_AUTH_S) & KEYCTX_TX_WR_AUTH_M)
+
+#define KEYCTX_TX_WR_AUTHST_S 7
+#define KEYCTX_TX_WR_AUTHST_M 0x7f
+#define KEYCTX_TX_WR_AUTHST_V(x) ((x) << KEYCTX_TX_WR_AUTHST_S)
+#define KEYCTX_TX_WR_AUTHST_G(x) \
+   (((x) >> KEYCTX_TX_WR_AUTHST_S) & KEYCTX_TX_WR_AUTHST_M)
+
+#define KEYCTX_TX_WR_AUTHIN_S 0
+#define KEYCTX_TX_WR_AUTHIN_M 0x7f
+#define KEYCTX_TX_WR_AUTHIN_V(x) ((x) << KEYCTX_TX_WR_AUTHIN_S)
+#define KEYCTX_TX_WR_AUTHIN_G(x) \
+   (((x) >> KEYCTX_TX_WR_AUTHIN_S) & KEYCTX_TX_WR_AUTHIN_M)
+
 struct chcr_wr {
struct fw_crypto_lookaside_wr wreq;
struct ulp_txpkt ulptx;
@@ -90,6 +138,11 @@ struct uld_ctx {
struct chcr_dev 

  1   2   >