Re: [PATCH] net-next: treewide use is_vlan_dev() helper function.

2017-02-03 Thread Joe Perches
On Fri, 2017-02-03 at 22:26 -0600, Parav Pandit wrote:
> This patch makes use of is_vlan_dev() function instead of flag
> comparison which is exactly done by is_vlan_dev() helper function.

Thanks.

btw:  after applying this patch, there is one left

$ git grep -E -n "&\s*IFF_802_1Q_VLAN\b" -- "*.c"
drivers/net/ethernet/chelsio/cxgb4/l2t.c:435:   if (neigh->dev->priv_flags & 
IFF_802_1Q_VLAN)



[PATCH] net-next: treewide use is_vlan_dev() helper function.

2017-02-03 Thread Parav Pandit
This patch makes use of is_vlan_dev() function instead of flag
comparison which is exactly done by is_vlan_dev() helper function.

Signed-off-by: Parav Pandit 
Reviewed-by: Daniel Jurgens 
---
 drivers/infiniband/core/cma.c|  6 ++
 drivers/infiniband/sw/rxe/rxe_net.c  |  2 +-
 drivers/net/ethernet/broadcom/cnic.c |  2 +-
 drivers/net/ethernet/chelsio/cxgb3/l2t.c |  2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c  |  4 ++--
 drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c |  8 
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |  4 ++--
 drivers/net/hyperv/netvsc_drv.c  |  2 +-
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c|  6 +++---
 drivers/scsi/cxgbi/libcxgbi.c|  6 +++---
 drivers/scsi/fcoe/fcoe.c | 13 ++---
 include/rdma/ib_addr.h   |  6 ++
 net/hsr/hsr_slave.c  |  3 ++-
 13 files changed, 30 insertions(+), 34 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 3e70a9c..4eb5a80 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2467,14 +2467,12 @@ static int iboe_tos_to_sl(struct net_device *ndev, int 
tos)
struct net_device *dev;
 
prio = rt_tos2priority(tos);
-   dev = ndev->priv_flags & IFF_802_1Q_VLAN ?
-   vlan_dev_real_dev(ndev) : ndev;
-
+   dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
if (dev->num_tc)
return netdev_get_prio_tc_map(dev, prio);
 
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
-   if (ndev->priv_flags & IFF_802_1Q_VLAN)
+   if (is_vlan_dev(ndev))
return (vlan_dev_get_egress_qos_mask(ndev, prio) &
VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 #endif
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c 
b/drivers/infiniband/sw/rxe/rxe_net.c
index 4abdeb3..d9d1556 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -118,7 +118,7 @@ static struct device *dma_device(struct rxe_dev *rxe)
 
ndev = rxe->ndev;
 
-   if (ndev->priv_flags & IFF_802_1Q_VLAN)
+   if (is_vlan_dev(ndev))
ndev = vlan_dev_real_dev(ndev);
 
return ndev->dev.parent;
diff --git a/drivers/net/ethernet/broadcom/cnic.c 
b/drivers/net/ethernet/broadcom/cnic.c
index b1d2ac8..cec94bb 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -3665,7 +3665,7 @@ static int cnic_cm_destroy(struct cnic_sock *csk)
 static inline u16 cnic_get_vlan(struct net_device *dev,
struct net_device **vlan_dev)
 {
-   if (dev->priv_flags & IFF_802_1Q_VLAN) {
+   if (is_vlan_dev(dev)) {
*vlan_dev = vlan_dev_real_dev(dev);
return vlan_dev_vlan_id(dev);
}
diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.c 
b/drivers/net/ethernet/chelsio/cxgb3/l2t.c
index 5f226ed..5206358 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.c
@@ -351,7 +351,7 @@ struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct 
dst_entry *dst,
e->smt_idx = smt_idx;
atomic_set(>refcnt, 1);
neigh_replace(e, neigh);
-   if (neigh->dev->priv_flags & IFF_802_1Q_VLAN)
+   if (is_vlan_dev(neigh->dev))
e->vlan = vlan_dev_vlan_id(neigh->dev);
else
e->vlan = VLAN_NONE;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index f4f5690..7059014 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1805,7 +1805,7 @@ static void check_neigh_update(struct neighbour *neigh)
const struct device *parent;
const struct net_device *netdev = neigh->dev;
 
-   if (netdev->priv_flags & IFF_802_1Q_VLAN)
+   if (is_vlan_dev(netdev))
netdev = vlan_dev_real_dev(netdev);
parent = netdev->dev.parent;
if (parent && parent->driver == _driver.driver)
@@ -2111,7 +2111,7 @@ static int cxgb4_inet6addr_handler(struct notifier_block 
*this,
 #if IS_ENABLED(CONFIG_BONDING)
struct adapter *adap;
 #endif
-   if (event_dev->priv_flags & IFF_802_1Q_VLAN)
+   if (is_vlan_dev(event_dev))
event_dev = vlan_dev_real_dev(event_dev);
 #if IS_ENABLED(CONFIG_BONDING)
if (event_dev->flags & IFF_MASTER) {
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c 
b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 0cf8a37..3b5d7cf 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ 

[PATCH v3 net] bpf: add bpf_sk_netns_id() helper

2017-02-03 Thread Alexei Starovoitov
in cases where bpf programs are looking at sockets and packets
that belong to different netns, it could be useful to get an id
that uniquely identify a netns within the whole system.

Therefore introduce 'u64 bpf_sk_netns_id(sk);' helper. It returns
unique value that identifies netns of given socket or dev_net(skb->dev)
The upper 32-bits of the return value contain device id where namespace
filesystem resides and lower 32-bits contain inode number within that 
filesystem.
It's the same as
 struct stat st;
 stat("/proc/pid/ns/net", );
 return (st->st_dev << 32)  | st->st_ino;

For example to disallow raw sockets in all non-init netns
the bpf_type_cgroup_sock program can do:
if (sk->type == SOCK_RAW && bpf_sk_netns_id(sk) != 0x3f075)
  return 0;
where 0x3f075 comes from combination of st_dev and st_ino
of /proc/pid/ns/net

Note that all bpf programs types are global. The same socket filter
program can be attached to sockets in different netns,
just like cls_bpf can see ingress/egress packets of multiple
net_devices in different netns. The cgroup_bpf programs are
the most exposed to sockets and devices across netns,
but the need to identify netns applies to all.
For example, if bpf_type_cgroup_skb didn't exist the system wide
monitoring daemon could have used ld_preload mechanism and
attached the same program to see traffic from applications
across netns. Therefore make bpf_sk_netns_id() helper available
to all network related bpf program types.
For socket, cls_bpf and cgroup_skb programs this helper
can be considered a new feature, whereas for cgroup_sock
programs that modify sk->bound_dev_if (like 'ip vrf' does)
it's a bug fix, since 'ip vrf' needs to be netns aware.

Signed-off-by: Alexei Starovoitov 
---
Eric, I'v added proc_get_ns_devid_inum() to nsfs.c
right next to __ns_get_path(), so when it is time in the future
to make nsfs more namespace aware, it will be easy to adjust
both new_inode_pseudo(mnt->mnt_sb) line and proc_get_ns_devid_inum()
I thought about using ns->stashed, but it's obviously transient
inode and not usable. If later we decide to store dev_t into ns_common
it will be fine as well. We'll just change proc_get_ns_devid_inum()
without affecting user space.

v2->v3: build bot complained. s/static/static inline/. no other changes.
---
 fs/nsfs.c |  7 +++
 include/linux/proc_ns.h   |  3 ++-
 include/uapi/linux/bpf.h  | 14 +-
 net/core/filter.c | 44 ++-
 samples/bpf/bpf_helpers.h |  2 ++
 samples/bpf/sock_flags_kern.c |  2 ++
 samples/bpf/sockex1_kern.c|  2 ++
 7 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/fs/nsfs.c b/fs/nsfs.c
index 8c9fb29c6673..1a604bccef86 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -49,6 +49,13 @@ static void nsfs_evict(struct inode *inode)
ns->ops->put(ns);
 }
 
+u64 proc_get_ns_devid_inum(struct ns_common *ns)
+{
+   u64 dev = new_encode_dev(nsfs_mnt->mnt_sb->s_dev);
+
+   return (dev << 32) | ns->inum;
+}
+
 static void *__ns_get_path(struct path *path, struct ns_common *ns)
 {
struct vfsmount *mnt = nsfs_mnt;
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 12cb8bd81d2d..531c16105198 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -48,7 +48,7 @@ extern int pid_ns_prepare_proc(struct pid_namespace *ns);
 extern void pid_ns_release_proc(struct pid_namespace *ns);
 extern int proc_alloc_inum(unsigned int *pino);
 extern void proc_free_inum(unsigned int inum);
-
+extern u64 proc_get_ns_devid_inum(struct ns_common *ns);
 #else /* CONFIG_PROC_FS */
 
 static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; }
@@ -61,6 +61,7 @@ static inline int proc_alloc_inum(unsigned int *inum)
 }
 static inline void proc_free_inum(unsigned int inum) {}
 
+static inline u64 proc_get_ns_devid_inum(struct ns_common *ns) { return 0; }
 #endif /* CONFIG_PROC_FS */
 
 static inline int ns_alloc_inum(struct ns_common *ns)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0eb0e87dbe9f..e5b8cf16cbaf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -430,6 +430,17 @@ union bpf_attr {
  * @xdp_md: pointer to xdp_md
  * @delta: An positive/negative integer to be added to xdp_md.data
  * Return: 0 on success or negative on error
+ *
+ * u64 bpf_sk_netns_id(sk)
+ * Returns unique value that identifies netns of given socket or skb.
+ * The upper 32-bits of the return value contain device id where namespace
+ * filesystem resides and lower 32-bits contain inode number within
+ * that filesystem. It's the same value as:
+ *  struct stat st;
+ *  stat("/proc/pid/ns/net", );
+ *  return (st->st_dev << 32)  | st->st_ino;
+ * @sk: pointer to struct sock or struct __sk_buff
+ * Return: filesystem's device id | netns inode
  */
 #define __BPF_FUNC_MAPPER(FN)  \
FN(unspec),  

Re: Understanding mutual exclusion between rtnl_lock and rcu_read_lock

2017-02-03 Thread Joel Cunningham

> On Feb 3, 2017, at 3:40 PM, Cong Wang  wrote:
> 
> On Thu, Feb 2, 2017 at 6:05 PM, Joel Cunningham  
> wrote:
>> 
>> In the case of SIOCSIFHWADDR, we get a pointer to the net_device through 
>> __dev_get_by_name() and then pass it to dev_set_mac_address() to modify 
>> through ndo_set_mac_address().  I didn’t see any uses of RCU APIs on the 
>> writer side and that’s why I figured there was something going on with 
>> rtnl_lock() that I didn’t understand or that the dev_ioctl function wasn’t 
>> re-entrant from another CPU
>> 
> 
> You are right, that RCU read lock could merely protect the netdevice from
> being unregistered concurrently, can't prevent a concurrent dev_ifsioc().
> 
> I don't know why Eric changed it to RCU read lock, it is not a hot path, using
> rtnl lock is fine and can guarantee a atomic read.

Thanks for confirming what I was seeing.  I took a look through the history and 
the change happened in 3710becf8a58a5c6c4e797e3a3c968c161abdb41.  It was 
previously holding the dev_base_lock().

From the documentation in dev.c:
/*
 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 * semaphore.
 *
 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
 *
 * Writers must hold the rtnl semaphore while they loop through the
 * dev_base_head list, and hold dev_base_lock for writing when they do the
 * actual updates.  This allows pure readers to access the list even
 * while a writer is preparing to update it.
 *
 * To put it another way, dev_base_lock is held for writing only to
 * protect against pure readers; the rtnl semaphore provides the
 * protection against other writers.
 *
 * See, for example usages, register_netdevice() and
 * unregister_netdevice(), which must be called with the rtnl
 * semaphore held.
 */
Is the correct usage is to hold both rtnl_lock() and dev_base_lock when 
modifying a member of a struct net_device?  The wording seems vague as to which 
synchronization issue holding both covers.  What does “do the actual update” 
mean, updating the list or structure member?  If the latter, then maybe the 
concurrent dev_ioctl() case has never been safe

Joel

Re: [RFC PATCH 1/2] af_packet: direct dma for packet ineterface

2017-02-03 Thread Jason Wang



On 2017年01月28日 05:33, John Fastabend wrote:

This adds ndo ops for upper layer objects to request direct DMA from
the network interface into memory "slots". The slots must be DMA'able
memory given by a page/offset/size vector in a packet_ring_buffer
structure.

The PF_PACKET socket interface can use these ndo_ops to do zerocopy
RX from the network device into memory mapped userspace memory. For
this to work drivers encode the correct descriptor blocks and headers
so that existing PF_PACKET applications work without any modification.
This only supports the V2 header formats for now. And works by mapping
a ring of the network device to these slots. Originally I used V2
header formats but this does complicate the driver a bit.

V3 header formats added bulk polling via socket calls and timers
used in the polling interface to return every n milliseconds. Currently,
I don't see any way to support this in hardware because we can't
know if the hardware is in the middle of a DMA operation or not
on a slot. So when a timer fires I don't know how to advance the
descriptor ring leaving empty descriptors similar to how the software
ring works. The easiest (best?) route is to simply not support this.

It might be worth creating a new v4 header that is simple for drivers
to support direct DMA ops with. I can imagine using the xdp_buff
structure as a header for example. Thoughts?

The ndo operations and new socket option PACKET_RX_DIRECT work by
giving a queue_index to run the direct dma operations over. Once
setsockopt returns successfully the indicated queue is mapped
directly to the requesting application and can not be used for
other purposes. Also any kernel layers such as tc will be bypassed
and need to be implemented in the hardware via some other mechanism
such as tc offload or other offload interfaces.

Users steer traffic to the selected queue using flow director,
tc offload infrastructure or via macvlan offload.

The new socket option added to PF_PACKET is called PACKET_RX_DIRECT.
It takes a single unsigned int value specifying the queue index,

  setsockopt(sock, SOL_PACKET, PACKET_RX_DIRECT,
_index, sizeof(queue_index));

Implementing busy_poll support will allow userspace to kick the
drivers receive routine if needed. This work is TBD.

To test this I hacked a hardcoded test into  the tool psock_tpacket
in the selftests kernel directory here:

  ./tools/testing/selftests/net/psock_tpacket.c

Running this tool opens a socket and listens for packets over
the PACKET_RX_DIRECT enabled socket. Obviously it needs to be
reworked to enable all the older tests and not hardcode my
interface before it actually gets released.

In general this is a rough patch to explore the interface and
put something concrete up for debate. The patch does not handle
all the error cases correctly and needs to be cleaned up.

Known Limitations (TBD):

  (1) Users are required to match the number of rx ring
  slots with ethtool to the number requested by the
  setsockopt PF_PACKET layout. In the future we could
  possibly do this automatically.

  (2) Users need to configure Flow director or setup_tc
  to steer traffic to the correct queues. I don't believe
  this needs to be changed it seems to be a good mechanism
  for driving directed dma.

  (3) Not supporting timestamps or priv space yet, pushing
 a v4 packet header would resolve this nicely.

  (5) Only RX supported so far. TX already supports direct DMA
  interface but uses skbs which is really not needed. In
  the TX_RING case we can optimize this path as well.

To support TX case we can do a similar "slots" mechanism and
kick operation. The kick could be a busy_poll like operation
but on the TX side. The flow would be user space loads up
n number of slots with packets, kicks tx busy poll bit, the
driver sends packets, and finally when xmit is complete
clears header bits to give slots back. When we have qdisc
bypass set today we already bypass the entire stack so no
paticular reason to use skb's in this case. Using xdp_buff
as a v4 packet header would also allow us to consolidate
driver code.

To be done:

  (1) More testing and performance analysis
  (2) Busy polling sockets
  (3) Implement v4 xdp_buff headers for analysis


I like this idea and we should generalize the API that make rx zerocopy 
not specific to packet socket. Then we can make this use for e.g macvtap 
(pass-through mode). But instead of the headers, ndo_ops should support 
refill from non-fixed memory location from userspace (per packet or 
packets) to satisfy the requirement of virtqueues.


Thanks


  (4) performance testing :/ hopefully it looks good.

Signed-off-by: John Fastabend


[...]


[net-next v2 08/14] i40evf: remove unused device ID

2017-02-03 Thread Jeff Kirsher
From: Mitch Williams 

This device ID was intended for use when running Linux VF drivers under
Hyper-V, but we have determined that it is not necessary. Since it is
unused, and will never be used, remove it.

Change-ID: I74998ab4237db043cd400547bb54a0a5e2a37ea5
Signed-off-by: Mitch Williams 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40evf/i40evf_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c 
b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index c0fc533..3fe87e0 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -59,7 +59,6 @@ static const struct pci_device_id i40evf_pci_tbl[] = {
{PCI_VDEVICE(INTEL, I40E_DEV_ID_VF), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_VF_HV), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_X722_VF), 0},
-   {PCI_VDEVICE(INTEL, I40E_DEV_ID_X722_VF_HV), 0},
/* required last entry */
{0, }
 };
-- 
2.10.2



[net-next v2 10/14] i40e: Quick refactor to start moving data off stack and into Tx buffer info

2017-02-03 Thread Jeff Kirsher
From: Alexander Duyck 

This patch does some quick work to pull some of the data off of the stack
and hopefully start storing it in the Tx buffer info section of the Tx
ring.  Ideally we should be moving away from having to store much of
anything on the stack and can just maintain it all in the descriptor rings.

Change-ID: I4b4715ea1920e122502482b3f9e56a9a6cb1e9fe
Signed-off-by: Alexander Duyck 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 47 +++
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 47 +++
 2 files changed, 54 insertions(+), 40 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c 
b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 352cf7c..f5baeb1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2251,14 +2251,16 @@ static inline int i40e_tx_prepare_vlan_flags(struct 
sk_buff *skb,
 
 /**
  * i40e_tso - set up the tso context descriptor
- * @skb:  ptr to the skb we're sending
+ * @first:pointer to first Tx buffer for xmit
  * @hdr_len:  ptr to the size of the packet header
  * @cd_type_cmd_tso_mss: Quad Word 1
  *
  * Returns 0 if no TSO can happen, 1 if tso is going, or error
  **/
-static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
+static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len,
+   u64 *cd_type_cmd_tso_mss)
 {
+   struct sk_buff *skb = first->skb;
u64 cd_cmd, cd_tso_len, cd_mss;
union {
struct iphdr *v4;
@@ -2271,6 +2273,7 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 
*cd_type_cmd_tso_mss)
unsigned char *hdr;
} l4;
u32 paylen, l4_offset;
+   u16 gso_segs, gso_size;
int err;
 
if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -2335,10 +2338,18 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, 
u64 *cd_type_cmd_tso_mss)
/* compute length of segmentation header */
*hdr_len = (l4.tcp->doff * 4) + l4_offset;
 
+   /* pull values out of skb_shinfo */
+   gso_size = skb_shinfo(skb)->gso_size;
+   gso_segs = skb_shinfo(skb)->gso_segs;
+
+   /* update GSO size and bytecount with header size */
+   first->gso_segs = gso_segs;
+   first->bytecount += (first->gso_segs - 1) * *hdr_len;
+
/* find the field values */
cd_cmd = I40E_TX_CTX_DESC_TSO;
cd_tso_len = skb->len - *hdr_len;
-   cd_mss = skb_shinfo(skb)->gso_size;
+   cd_mss = gso_size;
*cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
(cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
(cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
@@ -2699,7 +2710,6 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, 
struct sk_buff *skb,
u16 i = tx_ring->next_to_use;
u32 td_tag = 0;
dma_addr_t dma;
-   u16 gso_segs;
u16 desc_count = 1;
 
if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
@@ -2708,15 +2718,6 @@ static inline void i40e_tx_map(struct i40e_ring 
*tx_ring, struct sk_buff *skb,
 I40E_TX_FLAGS_VLAN_SHIFT;
}
 
-   if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
-   gso_segs = skb_shinfo(skb)->gso_segs;
-   else
-   gso_segs = 1;
-
-   /* multiply data chunks by size of headers */
-   first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
-   first->gso_segs = gso_segs;
-   first->skb = skb;
first->tx_flags = tx_flags;
 
dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
@@ -2902,8 +2903,10 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff 
*skb,
 
count = i40e_xmit_descriptor_count(skb);
if (i40e_chk_linearize(skb, count)) {
-   if (__skb_linearize(skb))
-   goto out_drop;
+   if (__skb_linearize(skb)) {
+   dev_kfree_skb_any(skb);
+   return NETDEV_TX_OK;
+   }
count = i40e_txd_use_count(skb->len);
tx_ring->tx_stats.tx_linearize++;
}
@@ -2919,6 +2922,12 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff 
*skb,
return NETDEV_TX_BUSY;
}
 
+   /* record the location of the first descriptor for this packet */
+   first = _ring->tx_bi[tx_ring->next_to_use];
+   first->skb = skb;
+   first->bytecount = skb->len;
+   first->gso_segs = 1;
+
/* prepare the xmit flags */
if (i40e_tx_prepare_vlan_flags(skb, tx_ring, _flags))
goto out_drop;
@@ -2926,16 +2935,13 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff 
*skb,
/* obtain 

[net-next v2 07/14] i40e: Deprecating unused macro

2017-02-03 Thread Jeff Kirsher
From: Bimmy Pujari 

I40E_MAC_X710 was supposed to be for 10G and I40E_MAC_XL710
was supposed to be for 40G. But function i40e_is_mac_710
sets I40E_MAC_XL710 for all device IDS, I40E_MAC_X710 is not
used at all. As there is nothing to compare there is no need
for this function. Thus deprecating this extra macro and
removing this function entirely and replacing it with a direct
check.

Change-ID: I7d1769954dccd574a290ac04adb836ebd156730e
Signed-off-by: Bimmy Pujari 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e.h| 13 -
 drivers/net/ethernet/intel/i40e/i40e_main.c   |  6 +++---
 drivers/net/ethernet/intel/i40e/i40e_type.h   |  1 -
 drivers/net/ethernet/intel/i40evf/i40e_type.h |  1 -
 4 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h 
b/drivers/net/ethernet/intel/i40e/i40e.h
index 8bb0f4b..cdf812c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -134,19 +134,6 @@
 /* default to trying for four seconds */
 #define I40E_TRY_LINK_TIMEOUT  (4 * HZ)
 
-/**
- * i40e_is_mac_710 - Return true if MAC is X710/XL710
- * @hw: ptr to the hardware info
- **/
-static inline bool i40e_is_mac_710(struct i40e_hw *hw)
-{
-   if ((hw->mac.type == I40E_MAC_X710) ||
-   (hw->mac.type == I40E_MAC_XL710))
-   return true;
-
-   return false;
-}
-
 /* driver state flags */
 enum i40e_state_t {
__I40E_TESTING,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2d689e0..2481924 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -8684,7 +8684,7 @@ static int i40e_sw_init(struct i40e_pf *pf)
 pf->hw.func_caps.fd_filters_best_effort;
}
 
-   if (i40e_is_mac_710(>hw) &&
+   if ((pf->hw.mac.type == I40E_MAC_XL710) &&
(((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) ||
(pf->hw.aq.fw_maj_ver < 4))) {
pf->flags |= I40E_FLAG_RESTART_AUTONEG;
@@ -8693,13 +8693,13 @@ static int i40e_sw_init(struct i40e_pf *pf)
}
 
/* Disable FW LLDP if FW < v4.3 */
-   if (i40e_is_mac_710(>hw) &&
+   if ((pf->hw.mac.type == I40E_MAC_XL710) &&
(((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) ||
(pf->hw.aq.fw_maj_ver < 4)))
pf->flags |= I40E_FLAG_STOP_FW_LLDP;
 
/* Use the FW Set LLDP MIB API if FW > v4.40 */
-   if (i40e_is_mac_710(>hw) &&
+   if ((pf->hw.mac.type == I40E_MAC_XL710) &&
(((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver >= 40)) ||
(pf->hw.aq.fw_maj_ver >= 5)))
pf->flags |= I40E_FLAG_USE_SET_LLDP_MIB;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h 
b/drivers/net/ethernet/intel/i40e/i40e_type.h
index edc0abd..b6cf8d2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -125,7 +125,6 @@ enum i40e_debug_mask {
  */
 enum i40e_mac_type {
I40E_MAC_UNKNOWN = 0,
-   I40E_MAC_X710,
I40E_MAC_XL710,
I40E_MAC_VF,
I40E_MAC_X722,
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h 
b/drivers/net/ethernet/intel/i40evf/i40e_type.h
index c85e8a3..92ac60d 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h
@@ -100,7 +100,6 @@ enum i40e_debug_mask {
  */
 enum i40e_mac_type {
I40E_MAC_UNKNOWN = 0,
-   I40E_MAC_X710,
I40E_MAC_XL710,
I40E_MAC_VF,
I40E_MAC_X722,
-- 
2.10.2



[net-next v2 03/14] i40e: no need to check is_vsi_in_vlan before calling i40e_del_mac_all_vlan

2017-02-03 Thread Jeff Kirsher
From: Jacob Keller 

This function won't be appreciably slower when in VLAN mode, so there is
no real reason to not just call it directly. In either case, we still
must search the full table for a MAC/VLAN pair. We do get to stop
searching a tiny bit early in the case of knowing we are not in VLAN
mode, but this is a minor savings and we can avoid the code complexity
by not having to worry about the check.

Change-ID: I533412195b3a42f51cf629e3675dd5145aea8625
Signed-off-by: Jacob Keller 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 7a11d2b..ab0a04b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1779,10 +1779,7 @@ static int i40e_addr_unsync(struct net_device *netdev, 
const u8 *addr)
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
 
-   if (i40e_is_vsi_in_vlan(vsi))
-   i40e_del_mac_all_vlan(vsi, addr);
-   else
-   i40e_del_filter(vsi, addr, I40E_VLAN_ANY);
+   i40e_del_mac_all_vlan(vsi, addr);
 
return 0;
 }
-- 
2.10.2



[net-next v2 01/14] i40e: don't allow i40e_vsi_(add|kill)_vlan to operate when VID<1

2017-02-03 Thread Jeff Kirsher
From: Jacob Keller 

Now that we have the separate i40e_(add|rm)_vlan_all_mac functions, we
should not be using the i40e_vsi_kill_vlan or i40e_vsi_add_vlan
functions when PVID is set or when VID is less than 1. This allows us to
remove some checks in i40e_vsi_add_vlan and ensures that callers which
need to handle VID=0 or VID=-1 don't accidentally invoke the VLAN mode
handling used to convert filters when entering VLAN mode. We also update
the functions to take u16 instead of s16 as well since they no longer
expect to be called with VID=I40E_VLAN_ANY.

Change-ID: Ibddf44a8bb840dde8ceef2a4fdb92fd953b05a57
Signed-off-by: Jacob Keller 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e.h  |  4 ++--
 drivers/net/ethernet/intel/i40e/i40e_main.c | 14 ++
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h 
b/drivers/net/ethernet/intel/i40e/i40e.h
index 342007d..c164d50 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -852,9 +852,9 @@ int i40e_close(struct net_device *netdev);
 int i40e_vsi_open(struct i40e_vsi *vsi);
 void i40e_vlan_stripping_disable(struct i40e_vsi *vsi);
 int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid);
-int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid);
+int i40e_vsi_add_vlan(struct i40e_vsi *vsi, u16 vid);
 void i40e_rm_vlan_all_mac(struct i40e_vsi *vsi, s16 vid);
-void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid);
+void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid);
 struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi,
 const u8 *macaddr);
 int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, const u8 *macaddr);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index b2f76d2..0b4adcc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2568,12 +2568,15 @@ int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid)
 /**
  * i40e_vsi_add_vlan - Add VSI membership for given VLAN
  * @vsi: the VSI being configured
- * @vid: VLAN id to be added (0 = untagged only , -1 = any)
+ * @vid: VLAN id to be added
  **/
-int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
+int i40e_vsi_add_vlan(struct i40e_vsi *vsi, u16 vid)
 {
int err;
 
+   if (!vid || vsi->info.pvid)
+   return -EINVAL;
+
/* Locked once because all functions invoked below iterates list*/
spin_lock_bh(>mac_filter_hash_lock);
err = i40e_add_vlan_all_mac(vsi, vid);
@@ -2616,10 +2619,13 @@ void i40e_rm_vlan_all_mac(struct i40e_vsi *vsi, s16 vid)
 /**
  * i40e_vsi_kill_vlan - Remove VSI membership for given VLAN
  * @vsi: the VSI being configured
- * @vid: VLAN id to be removed (0 = untagged only , -1 = any)
+ * @vid: VLAN id to be removed
  **/
-void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
+void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid)
 {
+   if (!vid || vsi->info.pvid)
+   return;
+
spin_lock_bh(>mac_filter_hash_lock);
i40e_rm_vlan_all_mac(vsi, vid);
spin_unlock_bh(>mac_filter_hash_lock);
-- 
2.10.2



[net-next v2 02/14] i40e: fold the i40e_is_vsi_in_vlan check into i40e_put_mac_in_vlan

2017-02-03 Thread Jeff Kirsher
From: Jacob Keller 

Fold the check for determining when to call i40e_put_mac_in_vlan directly
into the function so that we don't need to decide which function to use
ahead of time. This allows us to just call i40e_put_mac_in_vlan directly
without having to check ahead of time.

Change-ID: Ifff526940748ac14b8418be5df5a149502eed137
Signed-off-by: Jacob Keller 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_main.c| 11 ---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c |  8 ++--
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 0b4adcc..7a11d2b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1498,6 +1498,9 @@ struct i40e_mac_filter *i40e_put_mac_in_vlan(struct 
i40e_vsi *vsi,
return i40e_add_filter(vsi, macaddr,
   le16_to_cpu(vsi->info.pvid));
 
+   if (!i40e_is_vsi_in_vlan(vsi))
+   return i40e_add_filter(vsi, macaddr, I40E_VLAN_ANY);
+
hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
if (f->state == I40E_FILTER_REMOVE)
continue;
@@ -1756,14 +1759,8 @@ static int i40e_addr_sync(struct net_device *netdev, 
const u8 *addr)
 {
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
-   struct i40e_mac_filter *f;
-
-   if (i40e_is_vsi_in_vlan(vsi))
-   f = i40e_put_mac_in_vlan(vsi, addr);
-   else
-   f = i40e_add_filter(vsi, addr, I40E_VLAN_ANY);
 
-   if (f)
+   if (i40e_put_mac_in_vlan(vsi, addr))
return 0;
else
return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c 
b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index a6198b7..0cdbdd3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1942,12 +1942,8 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, 
u8 *msg, u16 msglen)
struct i40e_mac_filter *f;
 
f = i40e_find_mac(vsi, al->list[i].addr);
-   if (!f) {
-   if (i40e_is_vsi_in_vlan(vsi))
-   f = i40e_put_mac_in_vlan(vsi, al->list[i].addr);
-   else
-   f = i40e_add_filter(vsi, al->list[i].addr, -1);
-   }
+   if (!f)
+   f = i40e_put_mac_in_vlan(vsi, al->list[i].addr);
 
if (!f) {
dev_err(>pdev->dev,
-- 
2.10.2



[net-next v2 12/14] i40e: remove unused function

2017-02-03 Thread Jeff Kirsher
From: Mitch Williams 

After refactoring the client open and close code, this is no longer
needed. Remove it.

Change-ID: If8e6e32baa354d857c2fd8b2f19404f1786011c4
Signed-off-by: Mitch Williams 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e.h|  1 -
 drivers/net/ethernet/intel/i40e/i40e_client.c | 35 ---
 2 files changed, 36 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h 
b/drivers/net/ethernet/intel/i40e/i40e.h
index cdf812c..fdd9069 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -792,7 +792,6 @@ int i40e_lan_add_device(struct i40e_pf *pf);
 int i40e_lan_del_device(struct i40e_pf *pf);
 void i40e_client_subtask(struct i40e_pf *pf);
 void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi);
-void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi);
 void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset);
 void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs);
 void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c 
b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 7fe72ab..7ca048f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -201,41 +201,6 @@ void i40e_notify_client_of_l2_param_changes(struct 
i40e_vsi *vsi)
 }
 
 /**
- * i40e_notify_client_of_netdev_open - call the client open callback
- * @vsi: the VSI with netdev opened
- *
- * If there is a client to this netdev, call the client with open
- **/
-void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi)
-{
-   struct i40e_client_instance *cdev;
-   int ret = 0;
-
-   if (!vsi)
-   return;
-   mutex_lock(_client_instance_mutex);
-   list_for_each_entry(cdev, _client_instances, list) {
-   if (cdev->lan_info.netdev == vsi->netdev) {
-   if (!cdev->client ||
-   !cdev->client->ops || !cdev->client->ops->open) {
-   dev_dbg(>back->pdev->dev,
-   "Cannot locate client instance open 
routine\n");
-   continue;
-   }
-   if (!(test_bit(__I40E_CLIENT_INSTANCE_OPENED,
-  >state))) {
-   ret = cdev->client->ops->open(>lan_info,
- cdev->client);
-   if (!ret)
-   set_bit(__I40E_CLIENT_INSTANCE_OPENED,
-   >state);
-   }
-   }
-   }
-   mutex_unlock(_client_instance_mutex);
-}
-
-/**
  * i40e_client_release_qvlist
  * @ldev: pointer to L2 context.
  *
-- 
2.10.2



[net-next v2 14/14] i40e: add interrupt rate limit verbosity

2017-02-03 Thread Jeff Kirsher
From: Alan Brady 

Due to the resolution of the register controlling interrupt rate
limiting, setting certain values for the interrupt rate limit make it
appear as though the limiting is not completely accurate.  The problem
is that the interrupt rate limit is getting rounded down to the nearest
multiple of 4.  This patch fixes the problem by adding some feedback to
the user as to the actual interrupt rate limit being used when it
differs from the requested limit.  Without this patch setting interrupt
rate limits may appear to behave inaccurately.

Change-ID: I3093cf3f2d437d35a4c4f4bb5af5ce1b85ab21b7
Signed-off-by: Alan Brady 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c 
b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 7500902..c4ab3c1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2116,6 +2116,7 @@ static int __i40e_set_coalesce(struct net_device *netdev,
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
+   u16 intrl_reg;
int i;
 
if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
@@ -2127,8 +2128,9 @@ static int __i40e_set_coalesce(struct net_device *netdev,
return -EINVAL;
}
 
-   if (ec->rx_coalesce_usecs_high >= INTRL_REG_TO_USEC(I40E_MAX_INTRL)) {
-   netif_info(pf, drv, netdev, "Invalid value, rx-usecs-high range 
is 0-235\n");
+   if (ec->rx_coalesce_usecs_high > INTRL_REG_TO_USEC(I40E_MAX_INTRL)) {
+   netif_info(pf, drv, netdev, "Invalid value, rx-usecs-high range 
is 0-%lu\n",
+  INTRL_REG_TO_USEC(I40E_MAX_INTRL));
return -EINVAL;
}
 
@@ -2141,7 +2143,12 @@ static int __i40e_set_coalesce(struct net_device *netdev,
return -EINVAL;
}
 
-   vsi->int_rate_limit = ec->rx_coalesce_usecs_high;
+   intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high);
+   vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg);
+   if (vsi->int_rate_limit != ec->rx_coalesce_usecs_high) {
+   netif_info(pf, drv, netdev, "Interrupt rate limit rounded down 
to %d\n",
+  vsi->int_rate_limit);
+   }
 
if (ec->tx_coalesce_usecs == 0) {
if (ec->use_adaptive_tx_coalesce)
-- 
2.10.2



[net-next v2 13/14] i40e: refactor macro INTRL_USEC_TO_REG

2017-02-03 Thread Jeff Kirsher
From: Alan Brady 

This patch refactors the macro INTRL_USEC_TO_REG into a static inline
function and fixes a couple subtle bugs caused by the macro.

This patch fixes a bug which was caused by passing a bad register value
to the firmware.  If enabling interrupt rate limiting, a non-zero value
for the rate limit must be used.  Otherwise the firmware sets the
interrupt rate limit to the maximum value.  Due to the limited
resolution of the register, attempting to set a value of 1, 2, or 3
would be rounded down to 0 and limiting was left enabled, causing
unexpected behavior.

This patch also fixes a possible bug in which using the macro itself can
introduce unintended side-affects because the macro argument is used
more than once in the macro definition (e.g. a variable post-increment
argument would perform a double increment on the variable).

Without this patch, attempting to set interrupt rate limits of 1, 2, or
3 results in unexpected behavior and future use of this macro could
cause subtle bugs.

Change-Id: I83ac842de0ca9c86761923d6e3a4d7b1b95f2b3f
Signed-off-by: Alan Brady 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_main.c|  2 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.h| 15 ++-
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c 
b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index cc1465a..7500902 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2072,7 +2072,7 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
struct i40e_q_vector *q_vector;
u16 vector, intrl;
 
-   intrl = INTRL_USEC_TO_REG(vsi->int_rate_limit);
+   intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit);
 
vsi->rx_rings[queue]->rx_itr_setting = ec->rx_coalesce_usecs;
vsi->tx_rings[queue]->tx_itr_setting = ec->tx_coalesce_usecs;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2481924..9f785c0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3268,7 +3268,7 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
 q_vector->tx.itr);
wr32(hw, I40E_PFINT_RATEN(vector - 1),
-INTRL_USEC_TO_REG(vsi->int_rate_limit));
+i40e_intrl_usec_to_reg(vsi->int_rate_limit));
 
/* Linked list for the queuepairs assigned to this vector */
wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), qp);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h 
b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index e065321..1ea820e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -52,7 +52,20 @@
  */
 #define INTRL_ENA  BIT(6)
 #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
-#define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0)
+/**
+ * i40e_intrl_usec_to_reg - convert interrupt rate limit to register
+ * @intrl: interrupt rate limit to convert
+ *
+ * This function converts a decimal interrupt rate limit to the appropriate
+ * register format expected by the firmware when setting interrupt rate limit.
+ */
+static inline u16 i40e_intrl_usec_to_reg(int intrl)
+{
+   if (intrl >> 2)
+   return ((intrl >> 2) | INTRL_ENA);
+   else
+   return 0;
+}
 #define I40E_INTRL_8K  125 /* 8000 ints/sec */
 #define I40E_INTRL_62K 16  /* 62500 ints/sec */
 #define I40E_INTRL_83K 12  /* 8 ints/sec */
-- 
2.10.2



[net-next v2 06/14] i40e: when adding or removing MAC filters, correctly handle VLANs

2017-02-03 Thread Jeff Kirsher
From: Jacob Keller 

Instead of using i40e_add_filter or i40e_del_filter directly, when
adding a MAC address, we should normally be using i40e_add_mac_filter or
i40e_del_mac_filter. These functions correctly handle the various cases
of VLAN mode or PVID settings. This ensures consistency and avoids the
issues that can occur with the recent addition of a WARN_ON() in
i40e_sync_vsi_filters.

Change-ID: I7fe62db063391fdd1180b2d6a6a3c5ab4307
Signed-off-by: Jacob Keller 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_main.c|  6 +++---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 11 ---
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index cabd728..2d689e0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -9341,7 +9341,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 */
i40e_rm_default_mac_filter(vsi, mac_addr);
spin_lock_bh(>mac_filter_hash_lock);
-   i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY);
+   i40e_add_mac_filter(vsi, mac_addr);
spin_unlock_bh(>mac_filter_hash_lock);
} else {
/* relate the VSI_VMDQ name to the VSI_MAIN name */
@@ -9350,7 +9350,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
random_ether_addr(mac_addr);
 
spin_lock_bh(>mac_filter_hash_lock);
-   i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY);
+   i40e_add_mac_filter(vsi, mac_addr);
spin_unlock_bh(>mac_filter_hash_lock);
}
 
@@ -9369,7 +9369,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 */
eth_broadcast_addr(broadcast);
spin_lock_bh(>mac_filter_hash_lock);
-   i40e_add_filter(vsi, broadcast, I40E_VLAN_ANY);
+   i40e_add_mac_filter(vsi, broadcast);
spin_unlock_bh(>mac_filter_hash_lock);
 
ether_addr_copy(netdev->dev_addr, mac_addr);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c 
b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 49941dd..cbbf864 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -689,17 +689,15 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum 
i40e_vsi_type type)
 
spin_lock_bh(>mac_filter_hash_lock);
if (is_valid_ether_addr(vf->default_lan_addr.addr)) {
-   f = i40e_add_filter(vsi, vf->default_lan_addr.addr,
-  vf->port_vlan_id ?
-  vf->port_vlan_id : -1);
+   f = i40e_add_mac_filter(vsi,
+   vf->default_lan_addr.addr);
if (!f)
dev_info(>pdev->dev,
 "Could not add MAC filter %pM for VF 
%d\n",
vf->default_lan_addr.addr, vf->vf_id);
}
eth_broadcast_addr(broadcast);
-   f = i40e_add_filter(vsi, broadcast,
-   vf->port_vlan_id ? vf->port_vlan_id : -1);
+   f = i40e_add_mac_filter(vsi, broadcast);
if (!f)
dev_info(>pdev->dev,
 "Could not allocate VF broadcast filter\n");
@@ -2718,8 +2716,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int 
vf_id, u8 *mac)
 
/* delete the temporary mac address */
if (!is_zero_ether_addr(vf->default_lan_addr.addr))
-   i40e_del_filter(vsi, vf->default_lan_addr.addr,
-   vf->port_vlan_id ? vf->port_vlan_id : -1);
+   i40e_del_mac_filter(vsi, vf->default_lan_addr.addr);
 
/* Delete all the filters for this VSI - we're going to kill it
 * anyway.
-- 
2.10.2



[net-next v2 05/14] i40e: avoid O(n^2) loop when deleting all filters

2017-02-03 Thread Jeff Kirsher
From: Jacob Keller 

Use __i40e_del_filter instead of using i40e_del_filter() which will
avoid doing an additional search to delete a filter we already have the
pointer for.

Change-ID: Iea5a7e3cafbf8c682ed9d3b6c69cf5ff53f44daf
Signed-off-by: Jacob Keller 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e.h | 1 +
 drivers/net/ethernet/intel/i40e/i40e_main.c| 2 +-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h 
b/drivers/net/ethernet/intel/i40e/i40e.h
index 4a64884..8bb0f4b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -762,6 +762,7 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t 
features);
 void i40e_set_ethtool_ops(struct net_device *netdev);
 struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
const u8 *macaddr, s16 vlan);
+void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f);
 void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan);
 int i40e_sync_vsi_filters(struct i40e_vsi *vsi);
 struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index da9f8d3..cabd728 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1434,7 +1434,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi 
*vsi,
  * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe()
  * instead of list_for_each_entry().
  **/
-static void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f)
+void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f)
 {
if (!f)
return;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c 
b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 1859911..49941dd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2725,7 +2725,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int 
vf_id, u8 *mac)
 * anyway.
 */
hash_for_each(vsi->mac_filter_hash, bkt, f, hlist)
-   i40e_del_filter(vsi, f->macaddr, f->vlan);
+   __i40e_del_filter(vsi, f);
 
spin_unlock_bh(>mac_filter_hash_lock);
 
-- 
2.10.2



[net-next v2 09/14] i40e: remove unnecessary __packed

2017-02-03 Thread Jeff Kirsher
From: Tushar Dave 

'struct i40e_dma_mem' defined with 'packed' directive causing kernel
unaligned errors on sparc.

e.g.
i40e: Intel(R) Ethernet Connection XL710 Network Driver - version
1.6.16-k
i40e: Copyright (c) 2013 - 2014 Intel Corporation.
Kernel unaligned access at TPC[44894c] dma_4v_alloc_coherent+0x1ac/0x300
Kernel unaligned access at TPC[44894c] dma_4v_alloc_coherent+0x1ac/0x300
Kernel unaligned access at TPC[44894c] dma_4v_alloc_coherent+0x1ac/0x300
Kernel unaligned access at TPC[44894c] dma_4v_alloc_coherent+0x1ac/0x300
Kernel unaligned access at TPC[44894c] dma_4v_alloc_coherent+0x1ac/0x300
i40e :03:00.0: fw 5.1.40981 api 1.5 nvm 5.04 0x80002548 0.0.0

This can be fixed with get_unaligned/put_unaligned(). However no
reference in driver shows that 'struct i40e_dma_mem' directly shoved
into NIC hardware. But instead fields of the struct are being read and
used for hardware. Therefore, __packed is unnecessary for 'struct
i40e_dma_mem'.

In addition, although 'struct i40e_virt_mem' doesn't cause any
unaligned access, keeping it packed is unnecessary as well because
of aforementioned reason.

This change make 'struct i40e_dma_mem' and 'struct i40e_virt_mem'
unpacked.

Signed-off-by: Tushar Dave 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e_osdep.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h 
b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
index 5b6feb7..be74bcf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
@@ -55,7 +55,7 @@ struct i40e_dma_mem {
void *va;
dma_addr_t pa;
u32 size;
-} __packed;
+};
 
 #define i40e_allocate_dma_mem(h, m, unused, s, a) \
i40e_allocate_dma_mem_d(h, m, s, a)
@@ -64,7 +64,7 @@ struct i40e_dma_mem {
 struct i40e_virt_mem {
void *va;
u32 size;
-} __packed;
+};
 
 #define i40e_allocate_virt_mem(h, m, s) i40e_allocate_virt_mem_d(h, m, s)
 #define i40e_free_virt_mem(h, m) i40e_free_virt_mem_d(h, m)
-- 
2.10.2



[net-next v2 04/14] i40e: rename i40e_put_mac_in_vlan and i40e_del_mac_all_vlan

2017-02-03 Thread Jeff Kirsher
From: Jacob Keller 

These functions purpose is to add a new MAC filter correctly, whether
we're using VLANs or not. Their goal is to ensure that all active VLANs
get the new MAC filter. Rename them so that their intent is clear. They
function correctly regardless of whether we have any active VLANs or
only have I40E_VLAN_ANY filters. The new names convey how they function
in a more clear manner.

Change-ID: Iec1961f968c0223a7132724a74e26a665750b107
Signed-off-by: Jacob Keller 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40e/i40e.h |  6 +++---
 drivers/net/ethernet/intel/i40e/i40e_main.c| 24 --
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c |  4 ++--
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h 
b/drivers/net/ethernet/intel/i40e/i40e.h
index c164d50..4a64884 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -855,9 +855,9 @@ int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid);
 int i40e_vsi_add_vlan(struct i40e_vsi *vsi, u16 vid);
 void i40e_rm_vlan_all_mac(struct i40e_vsi *vsi, s16 vid);
 void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid);
-struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi,
-const u8 *macaddr);
-int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, const u8 *macaddr);
+struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi,
+   const u8 *macaddr);
+int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr);
 bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi);
 struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr);
 #ifdef I40E_FCOE
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index ab0a04b..da9f8d3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1477,18 +1477,19 @@ void i40e_del_filter(struct i40e_vsi *vsi, const u8 
*macaddr, s16 vlan)
 }
 
 /**
- * i40e_put_mac_in_vlan - Make macvlan filters from macaddrs and vlans
+ * i40e_add_mac_filter - Add a MAC filter for all active VLANs
  * @vsi: the VSI to be searched
  * @macaddr: the mac address to be filtered
  *
- * Goes through all the macvlan filters and adds a macvlan filter for each
+ * If we're not in VLAN mode, just add the filter to I40E_VLAN_ANY. Otherwise,
+ * go through all the macvlan filters and add a macvlan filter for each
  * unique vlan that already exists. If a PVID has been assigned, instead only
  * add the macaddr to that VLAN.
  *
  * Returns last filter added on success, else NULL
  **/
-struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi,
-const u8 *macaddr)
+struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi,
+   const u8 *macaddr)
 {
struct i40e_mac_filter *f, *add = NULL;
struct hlist_node *h;
@@ -1513,15 +1514,16 @@ struct i40e_mac_filter *i40e_put_mac_in_vlan(struct 
i40e_vsi *vsi,
 }
 
 /**
- * i40e_del_mac_all_vlan - Remove a MAC filter from all VLANS
+ * i40e_del_mac_filter - Remove a MAC filter from all VLANs
  * @vsi: the VSI to be searched
  * @macaddr: the mac address to be removed
  *
- * Removes a given MAC address from a VSI, regardless of VLAN
+ * Removes a given MAC address from a VSI regardless of what VLAN it has been
+ * associated with.
  *
  * Returns 0 for success, or error
  **/
-int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, const u8 *macaddr)
+int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr)
 {
struct i40e_mac_filter *f;
struct hlist_node *h;
@@ -1582,8 +1584,8 @@ static int i40e_set_mac(struct net_device *netdev, void 
*p)
netdev_info(netdev, "set new mac address %pM\n", addr->sa_data);
 
spin_lock_bh(>mac_filter_hash_lock);
-   i40e_del_mac_all_vlan(vsi, netdev->dev_addr);
-   i40e_put_mac_in_vlan(vsi, addr->sa_data);
+   i40e_del_mac_filter(vsi, netdev->dev_addr);
+   i40e_add_mac_filter(vsi, addr->sa_data);
spin_unlock_bh(>mac_filter_hash_lock);
ether_addr_copy(netdev->dev_addr, addr->sa_data);
if (vsi->type == I40E_VSI_MAIN) {
@@ -1760,7 +1762,7 @@ static int i40e_addr_sync(struct net_device *netdev, 
const u8 *addr)
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
 
-   if (i40e_put_mac_in_vlan(vsi, addr))
+   if (i40e_add_mac_filter(vsi, addr))
return 0;
else
return -ENOMEM;
@@ -1779,7 +1781,7 @@ static int i40e_addr_unsync(struct net_device *netdev, 
const u8 *addr)
struct i40e_netdev_priv *np = 

[net-next v2 11/14] i40e: Remove FPK HyperV VF device ID

2017-02-03 Thread Jeff Kirsher
From: Jayaprakash Shanmugam 

Requirement for VFs to use the VMBus has been removed that's why
removing Hyper-V VF device ID.

Change-ID: I84f0964f443ee0db3e5e444b5ace996eb71b8280
Signed-off-by: Jayaprakash Shanmugam 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/i40evf/i40e_common.c | 1 -
 drivers/net/ethernet/intel/i40evf/i40e_devids.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c 
b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index aa63b7f..b5a59dd 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -64,7 +64,6 @@ i40e_status i40e_set_mac_type(struct i40e_hw *hw)
hw->mac.type = I40E_MAC_X722;
break;
case I40E_DEV_ID_X722_VF:
-   case I40E_DEV_ID_X722_VF_HV:
hw->mac.type = I40E_MAC_X722_VF;
break;
case I40E_DEV_ID_VF:
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_devids.h 
b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
index 21dcaee..d76393c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
@@ -48,7 +48,6 @@
 #define I40E_DEV_ID_10G_BASE_T_X7220x37D2
 #define I40E_DEV_ID_SFP_I_X722 0x37D3
 #define I40E_DEV_ID_X722_VF0x37CD
-#define I40E_DEV_ID_X722_VF_HV 0x37D9
 
 #define i40e_is_40G_device(d)  ((d) == I40E_DEV_ID_QSFP_A  || \
 (d) == I40E_DEV_ID_QSFP_B  || \
-- 
2.10.2



[net-next v2 00/14][pull request] 40GbE Intel Wired LAN Driver Updates 2017-02-03

2017-02-03 Thread Jeff Kirsher
This series contains updates to i40e/i40evf only.

Jake fixes up the driver to not call i40e_vsi_kill_vlan() or
i40e_vsi_add_vlan() when the PVID is set or when the VID is less than 1.
Cleaned up a check which really is not needed since there is no real
reason why we cannot just call i40e_del_mac_all_vlan() directly.  Renamed
functions to better reflect their actual purpose and how they function
in a more clear manner.

Bimmy cleans up unused/deprecated macros.

Mitch cleans up unused device ids which were intended for use when
running Linux VF drivers under Hyper-V, but found to be not needed.
Then cleaned up a function that is no longer needed since the client
open and close functions were refactored.  Adds a sleep without timeout
until the reply from the PF driver has been received since the iWARP
client cannot continue until the operation has been completed.

Tushar Dave fixes an issue seen on SPARC where the use of the 'packed'
directive was causing kernel unaligned errors.

Alex does a refactor to pull some data off of the stack and store it
in the transmit buffer info section of the transmit ring.

Alan fixes a bug which was caused by passing a bad register value to the
firmware, by refactoring the macro INTRL_USEC_TO_REG into a static
inline function.  Also added feedback to the user as to the actual
interrupt rate limit being used when it differs from the requested limit.

The following are changes since commit 8fe809a992639b2013c0d8da2ba55cdea28a959a:
  net: add LINUX_MIB_PFMEMALLOCDROP counter
and are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue 40GbE

Alan Brady (2):
  i40e: refactor macro INTRL_USEC_TO_REG
  i40e: add interrupt rate limit verbosity

Alexander Duyck (1):
  i40e: Quick refactor to start moving data off stack and into Tx buffer
info

Bimmy Pujari (1):
  i40e: Deprecating unused macro

Jacob Keller (6):
  i40e: don't allow i40e_vsi_(add|kill)_vlan to operate when VID<1
  i40e: fold the i40e_is_vsi_in_vlan check into i40e_put_mac_in_vlan
  i40e: no need to check is_vsi_in_vlan before calling
i40e_del_mac_all_vlan
  i40e: rename i40e_put_mac_in_vlan and i40e_del_mac_all_vlan
  i40e: avoid O(n^2) loop when deleting all filters
  i40e: when adding or removing MAC filters, correctly handle VLANs

Jayaprakash Shanmugam (1):
  i40e: Remove FPK HyperV VF device ID

Mitch Williams (2):
  i40evf: remove unused device ID
  i40e: remove unused function

Tushar Dave (1):
  i40e: remove unnecessary __packed

 drivers/net/ethernet/intel/i40e/i40e.h | 25 ++--
 drivers/net/ethernet/intel/i40e/i40e_client.c  | 35 
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 15 +++--
 drivers/net/ethernet/intel/i40e/i40e_main.c| 66 +++---
 drivers/net/ethernet/intel/i40e/i40e_osdep.h   |  4 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c| 47 ---
 drivers/net/ethernet/intel/i40e/i40e_txrx.h| 15 -
 drivers/net/ethernet/intel/i40e/i40e_type.h|  1 -
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 23 +++-
 drivers/net/ethernet/intel/i40evf/i40e_common.c|  1 -
 drivers/net/ethernet/intel/i40evf/i40e_devids.h|  1 -
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c  | 47 ---
 drivers/net/ethernet/intel/i40evf/i40e_type.h  |  1 -
 drivers/net/ethernet/intel/i40evf/i40evf_main.c|  1 -
 14 files changed, 129 insertions(+), 153 deletions(-)

-- 
2.10.2



Re: [PATCH net-next 2/6] net: dsa: simplify netdevice events handling

2017-02-03 Thread Florian Fainelli


On 02/03/2017 10:20 AM, Vivien Didelot wrote:
> Simplify the code handling the slave netdevice notifier call by
> providing a dsa_slave_changeupper helper for NETDEV_CHANGEUPPER, and so
> on (only this event is supported at the moment.)
> 
> Return NOTIFY_DONE when we did not care about an event, and NOTIFY_OK
> when we were concerned but no error occurred, as the API suggests.
> 
> Signed-off-by: Vivien Didelot 
> ---

>  static int dsa_slave_netdevice_event(struct notifier_block *nb,
> @@ -1529,8 +1514,11 @@ static int dsa_slave_netdevice_event(struct 
> notifier_block *nb,
>  {
>   struct net_device *dev = netdev_notifier_info_to_dev(ptr);
>  
> - if (dsa_slave_dev_check(dev))
> - return dsa_slave_port_event(dev, event, ptr);
> + if (dev->netdev_ops != _slave_netdev_ops)
> + return NOTIFY_DONE;

Why not keep the dsa_slave_dev_check() here?


-- 
Florian


Re: [PATCH 3/3] net: ethernet: bgmac: driver power manangement

2017-02-03 Thread Florian Fainelli
On 02/03/2017 01:39 PM, Jon Mason wrote:
> From: Joey Zhong 
> 
> Implements suspend/resume, external phy 54810 is assumed
> to remain powered up during deep-sleep for wake-on-lane.

s/wake-on-lane/Wake-on-LAN, are you positive phy_stop() is not
suspending the PHY and issuing BMCR_PWRDOWN write?

This also seems incomplete in that, if the device is really configured
for Wake-on-LAN (through ethtool) you should call
device_set_wakeup_capable() and then check for device_may_wakeup()
during suspend or resume to know which part of the suspend/resume
portion should be done. You could refer to bcmgenet for an example.

>  
> +int bgmac_enet_suspend(struct bgmac *bgmac)
> +{
> + netdev_info(bgmac->net_dev, "Suspending\n");

remove that message

> +
> + if (netif_running(bgmac->net_dev)) {
> + netif_stop_queue(bgmac->net_dev);
> +
> + napi_disable(>napi);
> +
> + netif_tx_lock(bgmac->net_dev);
> + netif_device_detach(bgmac->net_dev);
> + netif_tx_unlock(bgmac->net_dev);
> +
> + bgmac_chip_intrs_off(bgmac);
> + bgmac_chip_reset(bgmac);
> + bgmac_dma_cleanup(bgmac);
> + }

Can you change the indentation to test for netiff_running() first and
return 0 in that case?

> +
> + phy_stop(bgmac->net_dev->phydev);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(bgmac_enet_suspend);
> +
> +int bgmac_enet_resume(struct bgmac *bgmac)
> +{
> + int rc;
> +
> + netdev_info(bgmac->net_dev, "Resuming\n");

Same here, this needs to be removed.

> +
> + phy_start(bgmac->net_dev->phydev);
> +
> + if (netif_running(bgmac->net_dev)) {
> + rc = bgmac_dma_init(bgmac);
> + if (rc)
> + return rc;
> +
> + bgmac_chip_init(bgmac);
> +
> + napi_enable(>napi);
> +
> + netif_tx_lock(bgmac->net_dev);
> + netif_device_attach(bgmac->net_dev);
> + netif_tx_unlock(bgmac->net_dev);
> +
> + netif_start_queue(bgmac->net_dev);
> + }
-- 
Florian


Re: [PATCH net-next v2 00/12] bnxt_en: Add XDP support.

2017-02-03 Thread Jakub Kicinski
On Fri, 3 Feb 2017 17:32:16 -0800, Michael Chan wrote:
> On Fri, Feb 3, 2017 at 4:33 PM, Jakub Kicinski  wrote:
> > On Fri, 03 Feb 2017 16:50:54 -0500 (EST), David Miller wrote:  
> >> We don't set precedence by one driver saying "hey it's better to do
> >> things this way, forget what all the other drivers are doing."  Rather
> >> we have a "discussion" about what the appropriate thing is to do and
> >> convert all the drivers only after a decision has been made.  
> >
> > Would making sure that if xdp_adjust_head() changes the starting offset
> > (and length) of frame and the program returns XDP_PASS - the stack will
> > see the changes made by xdp_adjust_head() fall under the same follow
> > the precedent rule?  That is what Martin did for mlx4 and mlx5, and what
> > John did in virtio, but not what this patch set does (see my comment on
> > patch 11).
> >
> > I should have double checked mlx4/mlx5 and made it clearer that there is
> > a precedent here in my review comment...  
> 
> Yes, I plan to also include this change (make modified offset and
> length visible to the stack) in the next version of the patch set.

Awesome, thanks!


RE: [PATCH net-next 2/2] add one config to select relax order mode in intel NIC's Kconfig

2017-02-03 Thread maowenan


> -Original Message-
> From: Alexander Duyck [mailto:alexander.du...@gmail.com]
> Sent: Saturday, February 04, 2017 12:43 AM
> To: maowenan
> Cc: Netdev; Jeff Kirsher
> Subject: Re: [PATCH net-next 2/2] add one config to select relax order mode in
> intel NIC's Kconfig
> 
> On Fri, Feb 3, 2017 at 1:30 AM, Mao Wenan  wrote:
> > This patch allows one to enable relax order mode in intel NIC's
> > Kconfig. CONFIG_ARCH_WANT_RELAX_ORDER is a common macro for some
> CPU
> > architecture to use relax order mode in NIC's source codes.
> > CONFIG_ARCH_WANT_RELAX_ORDER can be defined in arch/xxx/Kconfig,
> such
> > as sparc system exists in arch/sparc/Kconfig, but not all of arm64
> > systems can use relax order mode, so it can't be defined in
> > arch/arm64/Kconfig. Therefore PCI_RELAX_ORDER in NIC's Kconfig provide
> > one way to define macro CONFIG_ARCH_WANT_RELAX_ORDER.
> >
> > Signed-off-by: Mao Wenan 
> > ---
> >  drivers/net/ethernet/intel/Kconfig | 15 +++
> >  1 file changed, 15 insertions(+)
> >
> > diff --git a/drivers/net/ethernet/intel/Kconfig
> > b/drivers/net/ethernet/intel/Kconfig
> > index 1349b45..b366722 100644
> > --- a/drivers/net/ethernet/intel/Kconfig
> > +++ b/drivers/net/ethernet/intel/Kconfig
> > @@ -275,4 +275,19 @@ config FM10K
> >   To compile this driver as a module, choose M here. The module
> >   will be called fm10k.  MSI-X interrupt support is required
> >
> > +config PCI_RELAX_ORDER
> > +bool "PCI relax order mode support"
> > +default n
> > +select ARCH_WANT_RELAX_ORDER
> > +---help---
> > +  This allows one to enable relax order mode in driver.
> > +  CONFIG_ARCH_WANT_RELAX_ORDER is a common macro for
> some
> > +  CPU architecture to use relax order mode in NIC's source codes.
> > +  CONFIG_ARCH_WANT_RELAX_ORDER can be defined in
> arch/xxx/Kconfig,
> > +  such as sparc system exists in arch/sparc/Kconfig, but not all
> > +  of arm64 systems can use relax order mode, so it can't be
> defined
> > +  in arch/arm64/Kconfig. Therefore PCI_RELAX_ORDER provide
> one way
> > +  to define macro CONFIG_ARCH_WANT_RELAX_ORDER. Say Y
> here if you
> > +  want to enable relax order.
> > +
> >  endif # NET_VENDOR_INTEL
> 
> 
> You can't be dropping configuration options like this in here.  The
> ARCH_WANT_RELAX_ORDER should be selected by the architecture as a
> def_bool, not by the user via a kconfig option.  In addition this option has 
> no
> business in the Intel wired LAN directory as this impacts all architectures.
> 
> This is something that would be an architecture specific PCI option and could
> impact other PCI devices beyond what is just in networking.
> If you are wanting to target the arm64 architecture you should probably drop
> this in the /arch/arm64/Kconfig, then that way anyone familiar with the arm64
> hardware can chime in if enabling relaxed ordering causes any known issues.
> 
> - Alex


I have tried to drop configuration to arch/arm64/Kconfig, but I found there will
be side effect as not all of arm64 systems really need relax order, so I put it 
in intel
Kconfig because it is a feature of PCI device and configured by NIC's register.
By the way, do you agree the first patch of this series, shall I repost it 
individually? 
[PATCH net-next 1/2] ixgbevf and 82598 relax order mode support


Re: [PATCH net-next v2 00/12] bnxt_en: Add XDP support.

2017-02-03 Thread Michael Chan
On Fri, Feb 3, 2017 at 4:33 PM, Jakub Kicinski  wrote:
> On Fri, 03 Feb 2017 16:50:54 -0500 (EST), David Miller wrote:
>> We don't set precedence by one driver saying "hey it's better to do
>> things this way, forget what all the other drivers are doing."  Rather
>> we have a "discussion" about what the appropriate thing is to do and
>> convert all the drivers only after a decision has been made.
>
> Would making sure that if xdp_adjust_head() changes the starting offset
> (and length) of frame and the program returns XDP_PASS - the stack will
> see the changes made by xdp_adjust_head() fall under the same follow
> the precedent rule?  That is what Martin did for mlx4 and mlx5, and what
> John did in virtio, but not what this patch set does (see my comment on
> patch 11).
>
> I should have double checked mlx4/mlx5 and made it clearer that there is
> a precedent here in my review comment...

Yes, I plan to also include this change (make modified offset and
length visible to the stack) in the next version of the patch set.


Re: [PATCH net] bpf: expose netns inode to bpf programs

2017-02-03 Thread Alexei Starovoitov
On Sat, Feb 04, 2017 at 12:42:31AM +0100, Daniel Borkmann wrote:
> On 02/04/2017 12:06 AM, Alexei Starovoitov wrote:
> >On Fri, Feb 03, 2017 at 10:56:43PM +0100, Daniel Borkmann wrote:
> >>On 01/26/2017 04:27 AM, Alexei Starovoitov wrote:
> >>>in cases where bpf programs are looking at sockets and packets
> >>>that belong to different netns, it could be useful to read netns inode,
> >>>so that programs can make intelligent decisions.
> >>>For example to disallow raw sockets in all non-init netns the program can 
> >>>do:
> >>>if (sk->type == SOCK_RAW && sk->netns_inum != 0xf075)
> >>>   return 0;
> >>>where 0xf075 inode comes from /proc/pid/ns/net
> >>>
> >>>Similarly TC cls_bpf/act_bpf and socket filters can do
> >>>if (skb->netns_inum == expected_inode)
> >>>
> >>>The lack of netns awareness was a concern even for socket filters,
> >>>since the application can attach the same bpf program to sockets
> >>>in a different netns. Just like tc cls_bpf program can work in
> >>>different netns as well, so it has to be addressed uniformly
> >>>across all types of bpf programs.
> >>
> >>Sorry for jumping in late, but my question is, isn't this helper
> >>really only relevant for BPF_PROG_TYPE_CGROUP_* typed programs?
> >>Thus other prog types making use of bpf_convert_ctx_access()
> >>should probably reject that in .is_valid_access() callback?
> >>
> >>Reason why I'm asking is that for sockets or tc progs, you
> >>already have a netns context where you're attached to, and f.e.
> >>skbs leaving that netns context will be orphaned. Thus, why
> >>would tc or sock filter tailor a program with such a check,
> >>if it can only match/mismatch its own netns inum eventually?
> >
> >Please see the example I provided earlier.
> 
> That example for both socket filter and tc progs specifically
> wasn't quite clear to me, hence my question wrt why it's right
> now a "concern" for these ones. (Again, clear to me for cgroups
> progs.)
> 
> >We can have the same cls_bpf attached to all netns-es.
> >Same for socket filters and everything else.
> 
> So use-case would be that someone wants to attach the very same
> prog via tc to various netdevs sitting in different netns, and
> that prog looks up a map, controlled by initns, with skb->netns_inum
> as key and the resulting value could contain allowed feature bits
> for that specific netns prog the skbs goes through? That would be
> a feature, not "concern", no? At the same time, it's up to the
> user or mgmt app what gets loaded so f.e. it might just as well
> tailor/optimize the progs individually for the devs sitting in
> netns-es to avoid such map lookup.

yes. It's partially feature and partially bugfix.
Just sent a new patch and tried to explain that bit in commit log.



[PATCH v2 net] bpf: add bpf_sk_netns_id() helper

2017-02-03 Thread Alexei Starovoitov
in cases where bpf programs are looking at sockets and packets
that belong to different netns, it could be useful to get an id
that uniquely identify a netns within the whole system.

Therefore introduce 'u64 bpf_sk_netns_id(sk);' helper. It returns
unique value that identifies netns of given socket or dev_net(skb->dev)
The upper 32-bits of the return value contain device id where namespace
filesystem resides and lower 32-bits contain inode number within that 
filesystem.
It's the same as
 struct stat st;
 stat("/proc/pid/ns/net", );
 return (st->st_dev << 32)  | st->st_ino;

For example to disallow raw sockets in all non-init netns
the bpf_type_cgroup_sock program can do:
if (sk->type == SOCK_RAW && bpf_sk_netns_id(sk) != 0x3f075)
  return 0;
where 0x3f075 comes from combination of st_dev and st_ino
of /proc/pid/ns/net

Note that all bpf programs types are global. The same socket filter
program can be attached to sockets in different netns,
just like cls_bpf can see ingress/egress packets of multiple
net_devices in different netns. The cgroup_bpf programs are
the most exposed to sockets and devices across netns,
but the need to identify netns applies to all.
For example, if bpf_type_cgroup_skb didn't exist the system wide
monitoring daemon could have used ld_preload mechanism and
attached the same program to see traffic from applications
across netns. Therefore make bpf_sk_netns_id() helper available
to all network related bpf program types.
For socket, cls_bpf and cgroup_skb programs this helper
can be considered a new feature, whereas for cgroup_sock
programs that modify sk->bound_dev_if (like 'ip vrf' does)
it's a bug fix, since 'ip vrf' needs to be netns aware.

Signed-off-by: Alexei Starovoitov 
---
Eric, I'v added proc_get_ns_devid_inum() to nsfs.c
right next to __ns_get_path(), so when it is time in the future
to make nsfs more namespace aware, it will be easy to adjust
both new_inode_pseudo(mnt->mnt_sb) line and proc_get_ns_devid_inum()
I thought about using ns->stashed, but it's obviously transient
inode and not usable. If later we decide to store dev_t into ns_common
it will be fine as well. We'll just change proc_get_ns_devid_inum()
without affecting user space.
---
 fs/nsfs.c |  7 +++
 include/linux/proc_ns.h   |  3 ++-
 include/uapi/linux/bpf.h  | 14 +-
 net/core/filter.c | 44 ++-
 samples/bpf/bpf_helpers.h |  2 ++
 samples/bpf/sock_flags_kern.c |  2 ++
 samples/bpf/sockex1_kern.c|  2 ++
 7 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/fs/nsfs.c b/fs/nsfs.c
index 8c9fb29c6673..1a604bccef86 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -49,6 +49,13 @@ static void nsfs_evict(struct inode *inode)
ns->ops->put(ns);
 }
 
+u64 proc_get_ns_devid_inum(struct ns_common *ns)
+{
+   u64 dev = new_encode_dev(nsfs_mnt->mnt_sb->s_dev);
+
+   return (dev << 32) | ns->inum;
+}
+
 static void *__ns_get_path(struct path *path, struct ns_common *ns)
 {
struct vfsmount *mnt = nsfs_mnt;
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 12cb8bd81d2d..b567b021e652 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -48,7 +48,7 @@ extern int pid_ns_prepare_proc(struct pid_namespace *ns);
 extern void pid_ns_release_proc(struct pid_namespace *ns);
 extern int proc_alloc_inum(unsigned int *pino);
 extern void proc_free_inum(unsigned int inum);
-
+extern u64 proc_get_ns_devid_inum(struct ns_common *ns);
 #else /* CONFIG_PROC_FS */
 
 static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; }
@@ -61,6 +61,7 @@ static inline int proc_alloc_inum(unsigned int *inum)
 }
 static inline void proc_free_inum(unsigned int inum) {}
 
+static u64 proc_get_ns_devid_inum(struct ns_common *ns) { return 0; }
 #endif /* CONFIG_PROC_FS */
 
 static inline int ns_alloc_inum(struct ns_common *ns)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0eb0e87dbe9f..e5b8cf16cbaf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -430,6 +430,17 @@ union bpf_attr {
  * @xdp_md: pointer to xdp_md
  * @delta: An positive/negative integer to be added to xdp_md.data
  * Return: 0 on success or negative on error
+ *
+ * u64 bpf_sk_netns_id(sk)
+ * Returns unique value that identifies netns of given socket or skb.
+ * The upper 32-bits of the return value contain device id where namespace
+ * filesystem resides and lower 32-bits contain inode number within
+ * that filesystem. It's the same value as:
+ *  struct stat st;
+ *  stat("/proc/pid/ns/net", );
+ *  return (st->st_dev << 32)  | st->st_ino;
+ * @sk: pointer to struct sock or struct __sk_buff
+ * Return: filesystem's device id | netns inode
  */
 #define __BPF_FUNC_MAPPER(FN)  \
FN(unspec), \
@@ -476,7 +487,8 @@ union bpf_attr {

Re: [PATCH net-next v2 00/12] bnxt_en: Add XDP support.

2017-02-03 Thread Jakub Kicinski
On Fri, 03 Feb 2017 16:50:54 -0500 (EST), David Miller wrote:
> We don't set precedence by one driver saying "hey it's better to do
> things this way, forget what all the other drivers are doing."  Rather
> we have a "discussion" about what the appropriate thing is to do and
> convert all the drivers only after a decision has been made.

Would making sure that if xdp_adjust_head() changes the starting offset
(and length) of frame and the program returns XDP_PASS - the stack will
see the changes made by xdp_adjust_head() fall under the same follow
the precedent rule?  That is what Martin did for mlx4 and mlx5, and what
John did in virtio, but not what this patch set does (see my comment on
patch 11).

I should have double checked mlx4/mlx5 and made it clearer that there is
a precedent here in my review comment...


Re: [PATCH 2/3] net: ethernet: bgmac: unify code of the same family

2017-02-03 Thread Rafał Miłecki

On 2017-02-03 22:39, Jon Mason wrote:

BCM471X and BCM535X are of the same family (from what I can derive from
internal documents).  Group them into the case statement together, 
which

results in more code reuse.

Also, use existing helper variables to make the code a little more
readable too.

Signed-off-by: Jon Mason 


I'd like to review it / test it on few devices. Please give me weekend 
for

that.


Re: [PATCH v2 2/2] net: ethernet: bgmac: mac address change bug

2017-02-03 Thread Rafał Miłecki

On 02/03/2017 10:08 PM, Jon Mason wrote:

From: Hari Vyas 

ndo_set_mac_address() passes struct sockaddr * as 2nd parameter to
bgmac_set_mac_address() but code assumed u8 *.  This caused two bytes
chopping and the wrong mac address was configured.

Signed-off-by: Hari Vyas 
Signed-off-by: Jon Mason 
Fixes: 4e209001b86 ("bgmac: write mac address to hardware in 
ndo_set_mac_address")


I think you were going to Cc stable?


Fw: [Bug 193911] New: net_prio.ifpriomap is not aware of the network namespace, and discloses all network interface

2017-02-03 Thread Stephen Hemminger


Begin forwarded message:

Date: Fri, 03 Feb 2017 21:14:28 +
From: bugzilla-dae...@bugzilla.kernel.org
To: step...@networkplumber.org
Subject: [Bug 193911] New: net_prio.ifpriomap is not aware of the network 
namespace, and discloses all network interface


https://bugzilla.kernel.org/show_bug.cgi?id=193911

Bug ID: 193911
   Summary: net_prio.ifpriomap is not aware of the network
namespace, and discloses all network interface
   Product: Networking
   Version: 2.5
Kernel Version: 4.9
  Hardware: All
OS: Linux
  Tree: Mainline
Status: NEW
  Severity: normal
  Priority: P1
 Component: Other
  Assignee: step...@networkplumber.org
  Reporter: xga...@email.wm.edu
Regression: No

The pseudo file net_prio.ifpriomap (under /sys/fs/cgroup/net_prio) contains a
map of the priorities assigned to traffic starting from processes in a cgroup
and leaving the system on various interfaces. The data format is in the form of
[ifname priority]. 

We find that the kernel handler function hooked at net_prio.ifpriomap is not
aware of the network namespace, and thus it discloses all network interfaces on
the physical machine to the containerized applications. 

To be more specific, the read operation of net_prio.ifpriomap is handled by the
function read_priomap. Tracing from this function, we can find it invokes
for_each_netdev_rcu and set the first parameter as the address of init_net. It
iterates all network devices of the host regardless of the network namespace.
Thus, from the view of a container, it can read the names of all network
devices of the host.

Here is an example. I checked it on Linux kernel 4.4 with Docker version
1.12.1. I do not have the latest kernel at hand. But there is no code change
between 4.4 and 4.9 for this function. It should be reproducible in the latest
kernel. 

I initiated a Docker container and checked the net_prio.ifpriomap inside the
container. It displayed all network interfaces information on the host.

Container: 
root@25e25d553c3b:/# cat /sys/fs/cgroup/net_prio/net_prio.ifpriomap 
lo 0
eth0 0
eth1 0
xenbr0 0
lxdbr0 0
virbr0 0
virbr0-nic 0
docker0 0
vnet0 0
vnet1 0
veth132de4a 0

Host:
@:~$ cat /sys/fs/cgroup/net_prio/net_prio.ifpriomap 
lo 0
eth0 0
eth1 0
xenbr0 0
lxdbr0 0
virbr0 0
virbr0-nic 0
docker0 0
vnet0 0
vnet1 0
veth132de4a 0

From the information displayed above, this file exposes the same network
interface information in a container and on a host, which we considered to be a
leakage for the network namespace.

-- 
You are receiving this mail because:
You are the assignee for the bug.


Re: [PATCH net] bpf: expose netns inode to bpf programs

2017-02-03 Thread Daniel Borkmann

On 02/04/2017 12:06 AM, Alexei Starovoitov wrote:

On Fri, Feb 03, 2017 at 10:56:43PM +0100, Daniel Borkmann wrote:

On 01/26/2017 04:27 AM, Alexei Starovoitov wrote:

in cases where bpf programs are looking at sockets and packets
that belong to different netns, it could be useful to read netns inode,
so that programs can make intelligent decisions.
For example to disallow raw sockets in all non-init netns the program can do:
if (sk->type == SOCK_RAW && sk->netns_inum != 0xf075)
   return 0;
where 0xf075 inode comes from /proc/pid/ns/net

Similarly TC cls_bpf/act_bpf and socket filters can do
if (skb->netns_inum == expected_inode)

The lack of netns awareness was a concern even for socket filters,
since the application can attach the same bpf program to sockets
in a different netns. Just like tc cls_bpf program can work in
different netns as well, so it has to be addressed uniformly
across all types of bpf programs.


Sorry for jumping in late, but my question is, isn't this helper
really only relevant for BPF_PROG_TYPE_CGROUP_* typed programs?
Thus other prog types making use of bpf_convert_ctx_access()
should probably reject that in .is_valid_access() callback?

Reason why I'm asking is that for sockets or tc progs, you
already have a netns context where you're attached to, and f.e.
skbs leaving that netns context will be orphaned. Thus, why
would tc or sock filter tailor a program with such a check,
if it can only match/mismatch its own netns inum eventually?


Please see the example I provided earlier.


That example for both socket filter and tc progs specifically
wasn't quite clear to me, hence my question wrt why it's right
now a "concern" for these ones. (Again, clear to me for cgroups
progs.)


We can have the same cls_bpf attached to all netns-es.
Same for socket filters and everything else.


So use-case would be that someone wants to attach the very same
prog via tc to various netdevs sitting in different netns, and
that prog looks up a map, controlled by initns, with skb->netns_inum
as key and the resulting value could contain allowed feature bits
for that specific netns prog the skbs goes through? That would be
a feature, not "concern", no? At the same time, it's up to the
user or mgmt app what gets loaded so f.e. it might just as well
tailor/optimize the progs individually for the devs sitting in
netns-es to avoid such map lookup.


All bpf programs are global.


True, but for socket filter and tc they are hooked/attached under
a given netns context.


They can all share info via maps and so on.



When making this effort to lookup and hardcode the dev/inode
num into the prog, wouldn't it be easier for these types if


we cannot hardcode dev/inode. They are dynamic and depends
where program runs.


Was referring to the test from above provided example:

>>> if (skb->netns_inum == expected_inode)


I'll send a patch shortly that exposes both.


Thanks,
Daniel


Re: [PATCH v2 net-next] bpf: enable verifier to add 0 to packet ptr

2017-02-03 Thread William Tu
Thanks. I got it. I will resubmit v3 patch!

On Fri, Feb 3, 2017 at 2:53 PM, Alexei Starovoitov
 wrote:
> On Fri, Feb 03, 2017 at 11:29:19PM +0100, Daniel Borkmann wrote:
>> On 02/03/2017 10:10 PM, William Tu wrote:
>> >Hi Alexei,
>> >
>> >why it is bogus? on my system, it fails without the patch applied.
>> >
>> >--William
>> >
>> >On Fri, Feb 3, 2017 at 12:55 PM, Alexei Starovoitov
>> > wrote:
>> >>On Fri, Feb 03, 2017 at 09:22:45AM -0800, William Tu wrote:
>> >>>The patch fixes the case when adding a zero value to the packet
>> >>>pointer.  The verifer reports the following error:
>> >>>   [...]
>> >>> R0=imm0,min_value=0,max_value=0
>> >>> R1=pkt(id=0,off=0,r=4)
>> >>> R2=pkt_end R3=fp-12
>> >>> R4=imm4,min_value=4,max_value=4
>> >>> R5=pkt(id=0,off=4,r=4)
>> >>>   269: (bf) r2 = r0   // r2 becomes imm0
>> >>>   270: (77) r2 >>= 3
>> >>>   271: (bf) r4 = r1   // r4 becomes pkt ptr
>> >>>   272: (0f) r4 += r2  // r4 += 0
>> >>>   addition of negative constant to packet pointer is not allowed
>> >>>
>> >>>Signed-off-by: William Tu 
>> >>>Signed-off-by: Mihai Budiu 
>> [...]
>> >>>   {
>> >>>+ "direct packet access: test14 (pkt_ptr += 0, good access)",
>> >>>+ .insns = {
>> >>>+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
>> >>>+ offsetof(struct __sk_buff, data)),
>> >>>+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
>> >>>+ offsetof(struct __sk_buff, data_end)),
>> >>>+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
>> >>>+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0),
>> >>
>> >>wait. the test is bogus.
>> >>please write the proper test for the feature
>> >>and check that it fails before the patch and passes afterwards.
>>
>> But still same code path that is executed in verifier as BPF_K and
>> CONST_IMM tracked reg both share the same path under add_imm label
>> in check_packet_ptr_add(), no? So it becomes r2=pkt(id=0,off=0,r=0);
>> r0 = r2; r0 += 0 here in this test. Probably okay as well, though
>> there could be risk that in future both don't share the same path
>> for some reason. I guess you were referring to either adding tests
>> for BPF_K /and/ CONST_IMM reg or just the latter, right?
>
> yes. Sorry I wasn't clear.
> imo the 'r0 += 0' is not something that verifier should recognize,
> since such nop insns shouldn't be generated by the compiler.
> It happened that the code path in verifier covers that case
> as well, but I think we really need to test 'rX += rY' case
> where rY is recognized as imm0, since that what the original
> use case was about.
>
>


Re: [PATCH net 2/2] macvtap: read vnet_hdr_size once

2017-02-03 Thread Eric Dumazet
On Fri, 2017-02-03 at 18:20 -0500, Willem de Bruijn wrote:
> From: Willem de Bruijn 
> 
> When IFF_VNET_HDR is enabled, a virtio_net header must precede data.
> Data length is verified to be greater than or equal to expected header
> length tun->vnet_hdr_sz before copying.
> 
> Macvtap functions read the value once, but unless READ_ONCE is used,
> the compiler may ignore this and read multiple times. Enforce a single
> read and locally cached value to avoid updates between test and use.
> 
> Signed-off-by: Willem de Bruijn 
> Suggested-by: Eric Dumazet 
> ---

Acked-by: Eric Dumazet 




Re: [PATCH net 1/2] tun: read vnet_hdr_sz once

2017-02-03 Thread Eric Dumazet
On Fri, 2017-02-03 at 18:20 -0500, Willem de Bruijn wrote:
> From: Willem de Bruijn 
> 
> When IFF_VNET_HDR is enabled, a virtio_net header must precede data.
> Data length is verified to be greater than or equal to expected header
> length tun->vnet_hdr_sz before copying.
> 
> Read this value once and cache locally, as it can be updated between
> the test and use (TOCTOU).
> 
> Signed-off-by: Willem de Bruijn 
> Reported-by: Dmitry Vyukov 
> CC: Eric Dumazet 
> ---

Acked-by: Eric Dumazet 




Re: [PATCH net] netlabel: out of bound access in cipso_v4_validate()

2017-02-03 Thread Paul Moore
On Fri, Feb 3, 2017 at 3:03 AM, Eric Dumazet  wrote:
> From: Eric Dumazet 
>
> syzkaller found another out of bound access in ip_options_compile(),
> or more exactly in cipso_v4_validate()
>
> Fixes: 20e2a8648596 ("cipso: handle CIPSO options correctly when NetLabel is 
> disabled")
> Fixes: 446fda4f2682 ("[NetLabel]: CIPSOv4 engine")
> Signed-off-by: Eric Dumazet 
> Reported-by: Dmitry Vyukov  
> Cc: Paul Moore 
> ---
>  include/net/cipso_ipv4.h |4 
>  net/ipv4/cipso_ipv4.c|4 
>  2 files changed, 8 insertions(+)

Thanks guys.

Acked-by: Paul Moore 

> diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
> index 
> 3ebb168b9afc68ad639b5d32f6182a845c83d759..a34b141f125f0032662f147b598c9fef4fb4bcef
>  100644
> --- a/include/net/cipso_ipv4.h
> +++ b/include/net/cipso_ipv4.h
> @@ -309,6 +309,10 @@ static inline int cipso_v4_validate(const struct sk_buff 
> *skb,
> }
>
> for (opt_iter = 6; opt_iter < opt_len;) {
> +   if (opt_iter + 1 == opt_len) {
> +   err_offset = opt_iter;
> +   goto out;
> +   }
> tag_len = opt[opt_iter + 1];
> if ((tag_len == 0) || (tag_len > (opt_len - opt_iter))) {
> err_offset = opt_iter + 1;
> diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
> index 
> 72d6f056d863603c959e1d04b9f863909a37c758..ae206163c273381ba6e8bd8a24fa050619a4a6ae
>  100644
> --- a/net/ipv4/cipso_ipv4.c
> +++ b/net/ipv4/cipso_ipv4.c
> @@ -1587,6 +1587,10 @@ int cipso_v4_validate(const struct sk_buff *skb, 
> unsigned char **option)
> goto validate_return_locked;
> }
>
> +   if (opt_iter + 1 == opt_len) {
> +   err_offset = opt_iter;
> +   goto validate_return_locked;
> +   }
> tag_len = tag[1];
> if (tag_len > (opt_len - opt_iter)) {
> err_offset = opt_iter + 1;
>
>



-- 
paul moore
www.paul-moore.com


Re: Potential issues (security and otherwise) with the current cgroup-bpf API

2017-02-03 Thread Alexei Starovoitov
On Fri, Feb 03, 2017 at 01:07:39PM -0800, Andy Lutomirski wrote:
> 
> Is there any plan to address this?  If not, I'll try to write that
> patch this weekend.

yes. I'm working on 'disallow program override' flag.
It got stalled, because netns discussion got stalled.
Later today will send a patch for dev_id+inode and
will continue on the flag patch.



[PATCH net 0/2] read vnet_hdr_sz once

2017-02-03 Thread Willem de Bruijn
From: Willem de Bruijn 

Tuntap devices allow concurrent use and update of field vnet_hdr_sz.
Read the field once to avoid TOCTOU.

Willem de Bruijn (2):
  tun: read vnet_hdr_sz once
  macvtap: read vnet_hdr_size once

 drivers/net/macvtap.c |  4 ++--
 drivers/net/tun.c | 10 ++
 2 files changed, 8 insertions(+), 6 deletions(-)

-- 
2.11.0.483.g087da7b7c-goog



[PATCH net 1/2] tun: read vnet_hdr_sz once

2017-02-03 Thread Willem de Bruijn
From: Willem de Bruijn 

When IFF_VNET_HDR is enabled, a virtio_net header must precede data.
Data length is verified to be greater than or equal to expected header
length tun->vnet_hdr_sz before copying.

Read this value once and cache locally, as it can be updated between
the test and use (TOCTOU).

Signed-off-by: Willem de Bruijn 
Reported-by: Dmitry Vyukov 
CC: Eric Dumazet 
---
 drivers/net/tun.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2cd10b26b650..bfabe180053e 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1170,9 +1170,11 @@ static ssize_t tun_get_user(struct tun_struct *tun, 
struct tun_file *tfile,
}
 
if (tun->flags & IFF_VNET_HDR) {
-   if (len < tun->vnet_hdr_sz)
+   int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
+
+   if (len < vnet_hdr_sz)
return -EINVAL;
-   len -= tun->vnet_hdr_sz;
+   len -= vnet_hdr_sz;
 
if (!copy_from_iter_full(, sizeof(gso), from))
return -EFAULT;
@@ -1183,7 +1185,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, 
struct tun_file *tfile,
 
if (tun16_to_cpu(tun, gso.hdr_len) > len)
return -EINVAL;
-   iov_iter_advance(from, tun->vnet_hdr_sz - sizeof(gso));
+   iov_iter_advance(from, vnet_hdr_sz - sizeof(gso));
}
 
if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) {
@@ -1335,7 +1337,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
vlan_hlen = VLAN_HLEN;
 
if (tun->flags & IFF_VNET_HDR)
-   vnet_hdr_sz = tun->vnet_hdr_sz;
+   vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
 
total = skb->len + vlan_hlen + vnet_hdr_sz;
 
-- 
2.11.0.483.g087da7b7c-goog



[PATCH net 2/2] macvtap: read vnet_hdr_size once

2017-02-03 Thread Willem de Bruijn
From: Willem de Bruijn 

When IFF_VNET_HDR is enabled, a virtio_net header must precede data.
Data length is verified to be greater than or equal to expected header
length tun->vnet_hdr_sz before copying.

Macvtap functions read the value once, but unless READ_ONCE is used,
the compiler may ignore this and read multiple times. Enforce a single
read and locally cached value to avoid updates between test and use.

Signed-off-by: Willem de Bruijn 
Suggested-by: Eric Dumazet 
---
 drivers/net/macvtap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 402618565838..c27011bbe30c 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -681,7 +681,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, 
struct msghdr *m,
size_t linear;
 
if (q->flags & IFF_VNET_HDR) {
-   vnet_hdr_len = q->vnet_hdr_sz;
+   vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
 
err = -EINVAL;
if (len < vnet_hdr_len)
@@ -820,7 +820,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
 
if (q->flags & IFF_VNET_HDR) {
struct virtio_net_hdr vnet_hdr;
-   vnet_hdr_len = q->vnet_hdr_sz;
+   vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
if (iov_iter_count(iter) < vnet_hdr_len)
return -EINVAL;
 
-- 
2.11.0.483.g087da7b7c-goog



Re: [next-net 00/19][pull request] Intel Wired LAN Driver Updates 2017-02-02

2017-02-03 Thread Jeff Kirsher
On Fri, 2017-02-03 at 16:28 -0500, David Miller wrote:
> Pretty sloppy submission Jeff.
> 
> First of all, things are getting backlogged way too much.  19 patches
> is too large, you need to keep it closer down to 10 and the very very
> low teens.  You need to send me pull requests more often so that this
> does not happen.
> 
> Second of all, all of your Subjects are messed up and have "queue"
> misspelled and things like this.
> 
> Finally, there is nothing to even pull at the URL you provided:
> 
> [davem@dhcp-10-15-49-210 net-next]$ git pull --no-ff
> git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue master
> From git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue
>  * branch    master -> FETCH_HEAD
> Already up-to-date.

I truly apologize, I had to move my kernel patch scripts to a new system
and did not catch the mis-spelled tree, which caused the mis-spellings in
the patches and the reason that it did not appear on my kernel.org tree.

I am also sorry for sending 19 patches in an attempt to "catch up" for my
being incommunicado over the last couple of months.  This is all my fault
and I am not going to go into all the personal crap I have been going
through which caused me to drop the ball over the last several months.

I will fix up this submission and reduce the number of patches.

signature.asc
Description: This is a digitally signed message part


Re: [PATCH net] bpf: expose netns inode to bpf programs

2017-02-03 Thread Alexei Starovoitov
On Fri, Feb 03, 2017 at 01:00:47PM -0800, Andy Lutomirski wrote:
> 
> ISTM any ability to migrate namespaces and to migrate eBPF programs
> that know about namespaces needs to have the eBPF program firmly
> rooted in some namespace (or perhaps cgroup in this case) so that it

programs are already global. We cannot break that.

> can see a namespaced view of the world.  For this to work, presumably
> we need to make sure that eBPF programs that are installed by programs
> that are in a container don't see traffic that isn't in that
> container.

such approach will break existing users.



Re: [PATCH net] bpf: expose netns inode to bpf programs

2017-02-03 Thread Alexei Starovoitov
On Fri, Feb 03, 2017 at 10:56:43PM +0100, Daniel Borkmann wrote:
> On 01/26/2017 04:27 AM, Alexei Starovoitov wrote:
> >in cases where bpf programs are looking at sockets and packets
> >that belong to different netns, it could be useful to read netns inode,
> >so that programs can make intelligent decisions.
> >For example to disallow raw sockets in all non-init netns the program can do:
> >if (sk->type == SOCK_RAW && sk->netns_inum != 0xf075)
> >   return 0;
> >where 0xf075 inode comes from /proc/pid/ns/net
> >
> >Similarly TC cls_bpf/act_bpf and socket filters can do
> >if (skb->netns_inum == expected_inode)
> >
> >The lack of netns awareness was a concern even for socket filters,
> >since the application can attach the same bpf program to sockets
> >in a different netns. Just like tc cls_bpf program can work in
> >different netns as well, so it has to be addressed uniformly
> >across all types of bpf programs.
> 
> Sorry for jumping in late, but my question is, isn't this helper
> really only relevant for BPF_PROG_TYPE_CGROUP_* typed programs?
> Thus other prog types making use of bpf_convert_ctx_access()
> should probably reject that in .is_valid_access() callback?
> 
> Reason why I'm asking is that for sockets or tc progs, you
> already have a netns context where you're attached to, and f.e.
> skbs leaving that netns context will be orphaned. Thus, why
> would tc or sock filter tailor a program with such a check,
> if it can only match/mismatch its own netns inum eventually?

Please see the example I provided earlier.
We can have the same cls_bpf attached to all netns-es.
Same for socket filters and everything else.
All bpf programs are global.
They can all share info via maps and so on.

> When making this effort to lookup and hardcode the dev/inode
> num into the prog, wouldn't it be easier for these types if

we cannot hardcode dev/inode. They are dynamic and depends
where program runs.
I'll send a patch shortly that exposes both.



Re: [PATCH net-next] net: remove support for per driver ndo_busy_poll()

2017-02-03 Thread Eric Dumazet
On Fri, 2017-02-03 at 17:28 -0500, David Miller wrote:

> Actually, one more driver needs converting, "enic".
> 
> I did a quick and dirty conversion:
> 
> 
> From 7a655c6324a8968ea2f027bf3660c87c42ac3de4 Mon Sep 17 00:00:00 2001
> From: "David S. Miller" 
> Date: Fri, 3 Feb 2017 17:28:21 -0500
> Subject: [PATCH] enic: Remove local ndo_busy_poll() implementation.
> 
> We do polling generically these days.
> 
> Signed-off-by: David S. Miller 

Oh sorry about that. Looks fine, thanks a lot !





Re: [PATCH net-next] net: remove support for per driver ndo_busy_poll()

2017-02-03 Thread Eric Dumazet
On Fri, 2017-02-03 at 17:18 -0500, David Miller wrote:
> From: Eric Dumazet 
> Date: Thu, 02 Feb 2017 18:43:28 -0800
> 
> > From: Eric Dumazet 
> > 
> > We added generic support for busy polling in NAPI layer in linux-4.5
> > 
> > No network driver uses ndo_busy_poll() anymore, we can get rid
> > of the pointer in struct net_device_ops, and its use in sk_busy_loop()
> > 
> > Saves NETIF_F_BUSY_POLL features bit.
> > 
> > Signed-off-by: Eric Dumazet 
> 
> Applied.

Nice ! Thanks !




[PATCH net] tcp: avoid infinite loop in tcp_splice_read()

2017-02-03 Thread Eric Dumazet
From: Eric Dumazet 

Splicing from TCP socket is vulnerable when a packet with URG flag is
received and stored into receive queue.

__tcp_splice_read() returns 0, and sk_wait_data() immediately
returns since there is the problematic skb in queue.

This is a nice way to burn cpu (aka infinite loop) and trigger
soft lockups.

Again, this gem was found by syzkaller tool.

Fixes: 9c55e01c0cc8 ("[TCP]: Splice receive support.")
Signed-off-by: Eric Dumazet 
Reported-by: Dmitry Vyukov  
Cc: Willy Tarreau 
---
 net/ipv4/tcp.c |6 ++
 1 file changed, 6 insertions(+)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 
4a044964da6670829e5c47fef52d2cd76360b59f..0efb4c7f6704f662b6c762e48698a41564add2a4
 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -770,6 +770,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
ret = -EAGAIN;
break;
}
+   /* if __tcp_splice_read() got nothing while we have
+* an skb in receive queue, we do not want to loop.
+* This might happen with URG data.
+*/
+   if (!skb_queue_empty(>sk_receive_queue))
+   break;
sk_wait_data(sk, , NULL);
if (signal_pending(current)) {
ret = sock_intr_errno(timeo);




Re: [PATCH v2 net-next] bpf: enable verifier to add 0 to packet ptr

2017-02-03 Thread Alexei Starovoitov
On Fri, Feb 03, 2017 at 11:29:19PM +0100, Daniel Borkmann wrote:
> On 02/03/2017 10:10 PM, William Tu wrote:
> >Hi Alexei,
> >
> >why it is bogus? on my system, it fails without the patch applied.
> >
> >--William
> >
> >On Fri, Feb 3, 2017 at 12:55 PM, Alexei Starovoitov
> > wrote:
> >>On Fri, Feb 03, 2017 at 09:22:45AM -0800, William Tu wrote:
> >>>The patch fixes the case when adding a zero value to the packet
> >>>pointer.  The verifer reports the following error:
> >>>   [...]
> >>> R0=imm0,min_value=0,max_value=0
> >>> R1=pkt(id=0,off=0,r=4)
> >>> R2=pkt_end R3=fp-12
> >>> R4=imm4,min_value=4,max_value=4
> >>> R5=pkt(id=0,off=4,r=4)
> >>>   269: (bf) r2 = r0   // r2 becomes imm0
> >>>   270: (77) r2 >>= 3
> >>>   271: (bf) r4 = r1   // r4 becomes pkt ptr
> >>>   272: (0f) r4 += r2  // r4 += 0
> >>>   addition of negative constant to packet pointer is not allowed
> >>>
> >>>Signed-off-by: William Tu 
> >>>Signed-off-by: Mihai Budiu 
> [...]
> >>>   {
> >>>+ "direct packet access: test14 (pkt_ptr += 0, good access)",
> >>>+ .insns = {
> >>>+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
> >>>+ offsetof(struct __sk_buff, data)),
> >>>+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
> >>>+ offsetof(struct __sk_buff, data_end)),
> >>>+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
> >>>+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0),
> >>
> >>wait. the test is bogus.
> >>please write the proper test for the feature
> >>and check that it fails before the patch and passes afterwards.
> 
> But still same code path that is executed in verifier as BPF_K and
> CONST_IMM tracked reg both share the same path under add_imm label
> in check_packet_ptr_add(), no? So it becomes r2=pkt(id=0,off=0,r=0);
> r0 = r2; r0 += 0 here in this test. Probably okay as well, though
> there could be risk that in future both don't share the same path
> for some reason. I guess you were referring to either adding tests
> for BPF_K /and/ CONST_IMM reg or just the latter, right?

yes. Sorry I wasn't clear.
imo the 'r0 += 0' is not something that verifier should recognize,
since such nop insns shouldn't be generated by the compiler.
It happened that the code path in verifier covers that case
as well, but I think we really need to test 'rX += rY' case
where rY is recognized as imm0, since that what the original
use case was about.




Re: [PATCH v2 1/2] net: ethernet: bgmac: init sequence bug

2017-02-03 Thread Jon Mason
On Fri, Feb 3, 2017 at 4:41 PM, Rafał Miłecki  wrote:
> On 02/03/2017 10:08 PM, Jon Mason wrote:
>>
>> @@ -61,15 +60,20 @@ static bool platform_bgmac_clk_enabled(struct bgmac
>> *bgmac)
>>
>>  static void platform_bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
>>  {
>> -   bgmac_idm_write(bgmac, BCMA_IOCTL,
>> -   (BCMA_IOCTL_CLK | BCMA_IOCTL_FGC | flags));
>> +   u32 val;
>> +
>> +   val = bgmac_idm_read(bgmac, BCMA_IOCTL);
>> +   /* Some bits of BCMA_IOCTL set by HW/ATF and should not change */
>> +   val |= flags & ~(BGMAC_AWCACHE | BGMAC_ARCACHE | BGMAC_AWUSER |
>> +BGMAC_ARUSER);
>> +   val |= BGMAC_CLK_EN;
>> bgmac_idm_read(bgmac, BCMA_IOCTL);
>
>
> This read was previously following write op most likely to flush it or
> something. I don't think it makes any sense to read after read.

Actually, that is sloppy coding on my part.  It should have a write
prior to the read to match what was there before.

I find it odd that it worked when I tested this patch.  It makes me
wonder if this "modify, reset, modify" series is really necessary
after all.  The docs indicate that writing a 0 to the reset brings it
out of reset.  I do not see any code that puts the HW in reset.  So,
unless the bootloader puts the HW in reset or it is in the reset state
by default, this seems like unnecessary code.  I can add some CYA
logic to read and see if it is in reset, toggle the bit, and then just
do the CLK enable.  Thoughts?

Thanks,
Jon


Re: [PATCH net-next] net: skb_needs_check() accepts CHECKSUM_NONE for tx

2017-02-03 Thread David Miller
From: Eric Dumazet 
Date: Fri, 03 Feb 2017 14:29:42 -0800

> From: Eric Dumazet 
> 
> My recent change missed fact that UFO would perform a complete
> UDP checksum before segmenting in frags.
> 
> In this case skb->ip_summed is set to CHECKSUM_NONE.
> 
> We need to add this valid case to skb_needs_check()
> 
> Fixes: b2504a5dbef3 ("net: reduce skb_warn_bad_offload() noise")
> Signed-off-by: Eric Dumazet 

Applied.


Re: [PATCH 2/3] net: ethernet: bgmac: unify code of the same family

2017-02-03 Thread Jon Mason
On Fri, Feb 3, 2017 at 4:48 PM, Rafał Miłecki  wrote:
> On 2017-02-03 22:39, Jon Mason wrote:
>>
>> BCM471X and BCM535X are of the same family (from what I can derive from
>> internal documents).  Group them into the case statement together, which
>> results in more code reuse.
>>
>> Also, use existing helper variables to make the code a little more
>> readable too.
>>
>> Signed-off-by: Jon Mason 
>
>
> I'd like to review it / test it on few devices. Please give me weekend for
> that.

Yes, please test this as much as you can.  The code move was pretty
innocuous, and those are always the times when it comes back to bite
me.

Thanks,
Jon


[PATCH net-next] net: skb_needs_check() accepts CHECKSUM_NONE for tx

2017-02-03 Thread Eric Dumazet
From: Eric Dumazet 

My recent change missed fact that UFO would perform a complete
UDP checksum before segmenting in frags.

In this case skb->ip_summed is set to CHECKSUM_NONE.

We need to add this valid case to skb_needs_check()

Fixes: b2504a5dbef3 ("net: reduce skb_warn_bad_offload() noise")
Signed-off-by: Eric Dumazet 
Cc: Willem de Bruijn 
---
 net/core/dev.c |7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 
727b6fda0e8c6497ee42dc6d3065e326e9192c21..2ba5ef33e83909871fcf69a036062e00f0cf86c7
 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2637,9 +2637,10 @@ EXPORT_SYMBOL(skb_mac_gso_segment);
 static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
 {
if (tx_path)
-   return skb->ip_summed != CHECKSUM_PARTIAL;
-   else
-   return skb->ip_summed == CHECKSUM_NONE;
+   return skb->ip_summed != CHECKSUM_PARTIAL &&
+  skb->ip_summed != CHECKSUM_NONE;
+
+   return skb->ip_summed == CHECKSUM_NONE;
 }
 
 /**




Re: [PATCH v2 net-next] bpf: enable verifier to add 0 to packet ptr

2017-02-03 Thread Daniel Borkmann

On 02/03/2017 10:10 PM, William Tu wrote:

Hi Alexei,

why it is bogus? on my system, it fails without the patch applied.

--William

On Fri, Feb 3, 2017 at 12:55 PM, Alexei Starovoitov
 wrote:

On Fri, Feb 03, 2017 at 09:22:45AM -0800, William Tu wrote:

The patch fixes the case when adding a zero value to the packet
pointer.  The verifer reports the following error:
   [...]
 R0=imm0,min_value=0,max_value=0
 R1=pkt(id=0,off=0,r=4)
 R2=pkt_end R3=fp-12
 R4=imm4,min_value=4,max_value=4
 R5=pkt(id=0,off=4,r=4)
   269: (bf) r2 = r0   // r2 becomes imm0
   270: (77) r2 >>= 3
   271: (bf) r4 = r1   // r4 becomes pkt ptr
   272: (0f) r4 += r2  // r4 += 0
   addition of negative constant to packet pointer is not allowed

Signed-off-by: William Tu 
Signed-off-by: Mihai Budiu 

[...]

   {
+ "direct packet access: test14 (pkt_ptr += 0, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0),


wait. the test is bogus.
please write the proper test for the feature
and check that it fails before the patch and passes afterwards.


But still same code path that is executed in verifier as BPF_K and
CONST_IMM tracked reg both share the same path under add_imm label
in check_packet_ptr_add(), no? So it becomes r2=pkt(id=0,off=0,r=0);
r0 = r2; r0 += 0 here in this test. Probably okay as well, though
there could be risk that in future both don't share the same path
for some reason. I guess you were referring to either adding tests
for BPF_K /and/ CONST_IMM reg or just the latter, right?


Re: [PATCH net-next v2 00/12] bnxt_en: Add XDP support.

2017-02-03 Thread David Miller
From: Tom Herbert 
Date: Fri, 3 Feb 2017 14:25:00 -0800

> On Fri, Feb 3, 2017 at 2:02 PM, David Miller  wrote:
>> From: Tom Herbert 
>> Date: Fri, 3 Feb 2017 13:58:56 -0800
>>
>>> On Fri, Feb 3, 2017 at 1:50 PM, David Miller  wrote:
 From: Michael Chan 
 Date: Fri, 3 Feb 2017 13:13:47 -0800

> On Fri, Feb 3, 2017 at 12:49 PM, David Miller  wrote:
>>
>> Please _DO NOT_ guard XDP support with an ifdef the user
>> can modify.
>>
>> Treat it like any other common netdev feature a driver might
>> support such as checksum offloading or GRO.
>>
>
> David, I want to make sure I understand completely.  Are you saying
> don't use Kconfig option for XDP?  Have it always available?

 Yes.

 I don't see a similar config option used in any other driver.

 What's really driving me completely mad about driver XDP adoption
 is that there is so much inconsistency.

 If you do not see another XDP supporting driver do something, don't be
 tempted to blaze your own trail and handle something in a unique way.

 We don't set precedence by one driver saying "hey it's better to do
 things this way, forget what all the other drivers are doing."  Rather
 we have a "discussion" about what the appropriate thing is to do and
 convert all the drivers only after a decision has been made.

 Meanwhile we keep the status quo.
>>>
>>> I am working on some API changes that will hopefully get a little
>>> consistency across these drivers (this includes feature flag
>>> NETIF_F_XDP). This will reduce code some and should be good cleanup,
>>> but XDP is currently very intertwined with the critical data path so
>>> we might need to be looking at this for a while. There's now more
>>> drivers with XDP support than when I started this work, so I don't
>>> think bnxt should wait for this cleanup-- it's just one more driver
>>> we'll have to retrofit.
>>
>> Of course.
>>
>> Michael just respin with the Kconfig change and I'll apply your
>> series.  In fact I was about to until I noticed the XDP Kconfig knob
>> :)
> 
> Meaning no Kconfig and no features flag I assume...

For now, yes.


Re: [PATCH net-next] net: remove support for per driver ndo_busy_poll()

2017-02-03 Thread David Miller
From: David Miller 
Date: Fri, 03 Feb 2017 17:18:20 -0500 (EST)

> From: Eric Dumazet 
> Date: Thu, 02 Feb 2017 18:43:28 -0800
> 
>> From: Eric Dumazet 
>> 
>> We added generic support for busy polling in NAPI layer in linux-4.5
>> 
>> No network driver uses ndo_busy_poll() anymore, we can get rid
>> of the pointer in struct net_device_ops, and its use in sk_busy_loop()
>> 
>> Saves NETIF_F_BUSY_POLL features bit.
>> 
>> Signed-off-by: Eric Dumazet 
> 
> Applied.

Actually, one more driver needs converting, "enic".

I did a quick and dirty conversion:


>From 7a655c6324a8968ea2f027bf3660c87c42ac3de4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" 
Date: Fri, 3 Feb 2017 17:28:21 -0500
Subject: [PATCH] enic: Remove local ndo_busy_poll() implementation.

We do polling generically these days.

Signed-off-by: David S. Miller 
---
 drivers/net/ethernet/cisco/enic/enic_main.c | 66 ++--
 drivers/net/ethernet/cisco/enic/vnic_rq.h   | 78 -
 2 files changed, 5 insertions(+), 139 deletions(-)

diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c 
b/drivers/net/ethernet/cisco/enic/enic_main.c
index 91e42be..c009f6d 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -43,10 +43,8 @@
 #ifdef CONFIG_RFS_ACCEL
 #include 
 #endif
-#ifdef CONFIG_NET_RX_BUSY_POLL
-#include 
-#endif
 #include 
+#include 
 
 #include "cq_enet_desc.h"
 #include "vnic_dev.h"
@@ -1191,8 +1189,7 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 
vlan_tci);
 
skb_mark_napi_id(skb, >napi[rq->index]);
-   if (enic_poll_busy_polling(rq) ||
-   !(netdev->features & NETIF_F_GRO))
+   if (!(netdev->features & NETIF_F_GRO))
netif_receive_skb(skb);
else
napi_gro_receive(>napi[q_number], skb);
@@ -1296,15 +1293,6 @@ static int enic_poll(struct napi_struct *napi, int 
budget)
wq_work_done = vnic_cq_service(>cq[cq_wq], wq_work_to_do,
   enic_wq_service, NULL);
 
-   if (!enic_poll_lock_napi(>rq[cq_rq])) {
-   if (wq_work_done > 0)
-   vnic_intr_return_credits(>intr[intr],
-wq_work_done,
-0 /* dont unmask intr */,
-0 /* dont reset intr timer */);
-   return budget;
-   }
-
if (budget > 0)
rq_work_done = vnic_cq_service(>cq[cq_rq],
rq_work_to_do, enic_rq_service, NULL);
@@ -1323,7 +1311,6 @@ static int enic_poll(struct napi_struct *napi, int budget)
0 /* don't reset intr timer */);
 
err = vnic_rq_fill(>rq[0], enic_rq_alloc_buf);
-   enic_poll_unlock_napi(>rq[cq_rq], napi);
 
/* Buffer allocation failed. Stay in polling
 * mode so we can try to fill the ring again.
@@ -1390,34 +1377,6 @@ static void enic_set_rx_cpu_rmap(struct enic *enic)
 
 #endif /* CONFIG_RFS_ACCEL */
 
-#ifdef CONFIG_NET_RX_BUSY_POLL
-static int enic_busy_poll(struct napi_struct *napi)
-{
-   struct net_device *netdev = napi->dev;
-   struct enic *enic = netdev_priv(netdev);
-   unsigned int rq = (napi - >napi[0]);
-   unsigned int cq = enic_cq_rq(enic, rq);
-   unsigned int intr = enic_msix_rq_intr(enic, rq);
-   unsigned int work_to_do = -1; /* clean all pkts possible */
-   unsigned int work_done;
-
-   if (!enic_poll_lock_poll(>rq[rq]))
-   return LL_FLUSH_BUSY;
-   work_done = vnic_cq_service(>cq[cq], work_to_do,
-   enic_rq_service, NULL);
-
-   if (work_done > 0)
-   vnic_intr_return_credits(>intr[intr],
-work_done, 0, 0);
-   vnic_rq_fill(>rq[rq], enic_rq_alloc_buf);
-   if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
-   enic_calc_int_moderation(enic, >rq[rq]);
-   enic_poll_unlock_poll(>rq[rq]);
-
-   return work_done;
-}
-#endif /* CONFIG_NET_RX_BUSY_POLL */
-
 static int enic_poll_msix_wq(struct napi_struct *napi, int budget)
 {
struct net_device *netdev = napi->dev;
@@ -1459,8 +1418,6 @@ static int enic_poll_msix_rq(struct napi_struct *napi, 
int budget)
unsigned int work_done = 0;
int err;
 
-   if (!enic_poll_lock_napi(>rq[rq]))
-   return budget;
/* Service RQ
 */
 
@@ -1493,7 +1450,6 @@ static int enic_poll_msix_rq(struct napi_struct *napi, 
int budget)
 */
enic_calc_int_moderation(enic, >rq[rq]);
 
-   

Re: [RFC PATCH net-next 1/2] bpf: Save original ebpf instructions

2017-02-03 Thread David Ahern
On 2/3/17 2:09 PM, Daniel Borkmann wrote:
> On 02/03/2017 09:38 PM, David Ahern wrote:
>> Similar to classic bpf, support saving original ebpf instructions
>>
>> Signed-off-by: David Ahern 
> 
> Not convinced that this is in the right direction, this not only 
> *significantly*
> increases mem footprint for each and every program, but also when you dump 
> this,
> then map references from relocs inside the insns are meaningless (f.e. what 
> about
> prog arrays used in tail calls?), so things like criu also won't be able to 
> use
> this kind of interface for dump and restore. If it's just for debugging, then
> why not extend the existing tracing infrastructure around bpf that was started
> with intention to gain more visibility.


Yes, saving the original bpf increases the memory footprint. If you noticed, a 
kmemdup is used for the exact instruction size (no page round up). Right now 
programs are limited to a single page, so worst case  is an extra page per 
program. I am open to other suggestions. For example, bpf_prog is rounded up to 
a page which means there could be room at the end of the page for the original 
instructions. This is definitely true for the ip vrf programs which will be < 
32 instructions even with the namespace checking and the conversions done 
kernel side.

Tracepoints will not solve the problem for me for a number of reasons. 
Tracepoints have to be hit to return data, and there is no way the tracepoint 
can return relevant information for me to verify that the correct filter was 
downloaded. I want the original code. I want to audit what was installed. In my 
case there could be N VRFs, and I want 'ip vrf' or ifupdown2 or any other 
command to be able to verify that each cgroup has the correct program, and to 
verify that the default VRF does *not* have a program installed.

Generically, the bpf code might contain relative data but that's for the user 
or decoder program to deal with. Surely there is no harm in returning the 
original, downloaded bpf code to a properly privileged process. If I am 
debugging some weird network behavior, I want to be able to determine what bpf 
code is running where and to see what it is doing to whatever degree possible. 
Saving the original code is the first part of this.


Re: [PATCH RFC net-next 4/4] bridge: add ability to turn off fdb used updates

2017-02-03 Thread Nikolay Aleksandrov
On 03/02/17 23:24, Stephen Hemminger wrote:
> On Fri, 3 Feb 2017 19:34:19 +0100
> Nikolay Aleksandrov  wrote:
> 
>> On 03/02/17 19:28, Stephen Hemminger wrote:
>>> On Fri, 3 Feb 2017 09:30:37 +0100
>>> Nikolay Aleksandrov  wrote:
>>>   
 On 03/02/17 03:47, David Miller wrote:  
> From: Nikolay Aleksandrov 
> Date: Tue, 31 Jan 2017 16:31:58 +0100
> 
>> @@ -197,7 +197,8 @@ int br_handle_frame_finish(struct net *net, struct 
>> sock *sk, struct sk_buff *skb
>>  if (dst->is_local)
>>  return br_pass_frame_up(skb);
>>  
>> -dst->used = jiffies;
>> +if (br->used_enabled)
>> +dst->used = jiffies;
>
> Have you tried:
>
>   if (dst->used != jiffies)
>   dst->used = jiffies;
>
> If that isn't effective, you can tweak the test to decrease the
> granularity of the value.  Basically, if dst->used is within
> 1 HZ of jiffies, don't do the write.
>
> I suspect this might help a lot, and not require a new bridging
> option.
> 

 Yes, I actually have a patch titled "used granularity". :-) I've tested 
 with different
 values and it does help but it either needs to be paired with another 
 similar test for
 the "updated" field (since they share a write-heavy cache line) or they 
 need to be
 in separate cache lines to avoid that dst's source port from causing the 
 load HitM for
 all who check the value.

 I'll run some more tests and probably go this way for now.

 Thanks,
  Nik
  
>>>
>>> Since used doesn't need HZ granularity, it reports values in clock_t 
>>> resolution so
>>> storing (and doing cmp and set would mean that it would only be 100 HZ
>>>   
>>
>> Yes, exactly what I'm currently testing. Will post the new set soon.
>> Since HZ can be different a generic way to obtain the granularity for
>> both should be clock_t_to_jiffies(1) if I'm not missing something.
>>
>>
> 
> USER_HZ is set by userspace ABI to 100 hz. HZ is configurable when kernel is 
> built.
> 

Yes, the point I was trying to make is that we want to take the number of 
jiffies
we can skip by converting 1 clock_t to X jiffies because the user-space 
granularity
is clock_t and HZ can change, thus clock_t_to_jiffies(1) should give us the 
number
of updates we can skip for "used" and "updated".
By "both" I meant "used" and "updated" fields, not HZ and USER_HZ.





Re: [PATCH net-next v2 00/12] bnxt_en: Add XDP support.

2017-02-03 Thread Tom Herbert
On Fri, Feb 3, 2017 at 2:02 PM, David Miller  wrote:
> From: Tom Herbert 
> Date: Fri, 3 Feb 2017 13:58:56 -0800
>
>> On Fri, Feb 3, 2017 at 1:50 PM, David Miller  wrote:
>>> From: Michael Chan 
>>> Date: Fri, 3 Feb 2017 13:13:47 -0800
>>>
 On Fri, Feb 3, 2017 at 12:49 PM, David Miller  wrote:
>
> Please _DO NOT_ guard XDP support with an ifdef the user
> can modify.
>
> Treat it like any other common netdev feature a driver might
> support such as checksum offloading or GRO.
>

 David, I want to make sure I understand completely.  Are you saying
 don't use Kconfig option for XDP?  Have it always available?
>>>
>>> Yes.
>>>
>>> I don't see a similar config option used in any other driver.
>>>
>>> What's really driving me completely mad about driver XDP adoption
>>> is that there is so much inconsistency.
>>>
>>> If you do not see another XDP supporting driver do something, don't be
>>> tempted to blaze your own trail and handle something in a unique way.
>>>
>>> We don't set precedence by one driver saying "hey it's better to do
>>> things this way, forget what all the other drivers are doing."  Rather
>>> we have a "discussion" about what the appropriate thing is to do and
>>> convert all the drivers only after a decision has been made.
>>>
>>> Meanwhile we keep the status quo.
>>
>> I am working on some API changes that will hopefully get a little
>> consistency across these drivers (this includes feature flag
>> NETIF_F_XDP). This will reduce code some and should be good cleanup,
>> but XDP is currently very intertwined with the critical data path so
>> we might need to be looking at this for a while. There's now more
>> drivers with XDP support than when I started this work, so I don't
>> think bnxt should wait for this cleanup-- it's just one more driver
>> we'll have to retrofit.
>
> Of course.
>
> Michael just respin with the Kconfig change and I'll apply your
> series.  In fact I was about to until I noticed the XDP Kconfig knob
> :)

Meaning no Kconfig and no features flag I assume...


Re: [PATCH RFC net-next 4/4] bridge: add ability to turn off fdb used updates

2017-02-03 Thread Stephen Hemminger
On Fri, 3 Feb 2017 19:34:19 +0100
Nikolay Aleksandrov  wrote:

> On 03/02/17 19:28, Stephen Hemminger wrote:
> > On Fri, 3 Feb 2017 09:30:37 +0100
> > Nikolay Aleksandrov  wrote:
> >   
> >> On 03/02/17 03:47, David Miller wrote:  
> >>> From: Nikolay Aleksandrov 
> >>> Date: Tue, 31 Jan 2017 16:31:58 +0100
> >>> 
>  @@ -197,7 +197,8 @@ int br_handle_frame_finish(struct net *net, struct 
>  sock *sk, struct sk_buff *skb
>   if (dst->is_local)
>   return br_pass_frame_up(skb);
>   
>  -dst->used = jiffies;
>  +if (br->used_enabled)
>  +dst->used = jiffies;
> >>>
> >>> Have you tried:
> >>>
> >>>   if (dst->used != jiffies)
> >>>   dst->used = jiffies;
> >>>
> >>> If that isn't effective, you can tweak the test to decrease the
> >>> granularity of the value.  Basically, if dst->used is within
> >>> 1 HZ of jiffies, don't do the write.
> >>>
> >>> I suspect this might help a lot, and not require a new bridging
> >>> option.
> >>> 
> >>
> >> Yes, I actually have a patch titled "used granularity". :-) I've tested 
> >> with different
> >> values and it does help but it either needs to be paired with another 
> >> similar test for
> >> the "updated" field (since they share a write-heavy cache line) or they 
> >> need to be
> >> in separate cache lines to avoid that dst's source port from causing the 
> >> load HitM for
> >> all who check the value.
> >>
> >> I'll run some more tests and probably go this way for now.
> >>
> >> Thanks,
> >>  Nik
> >>  
> > 
> > Since used doesn't need HZ granularity, it reports values in clock_t 
> > resolution so
> > storing (and doing cmp and set would mean that it would only be 100 HZ
> >   
> 
> Yes, exactly what I'm currently testing. Will post the new set soon.
> Since HZ can be different a generic way to obtain the granularity for
> both should be clock_t_to_jiffies(1) if I'm not missing something.
> 
> 

USER_HZ is set by userspace ABI to 100 hz. HZ is configurable when kernel is 
built.


Re: [PATCH v2 1/2] net: ethernet: bgmac: init sequence bug

2017-02-03 Thread Rafał Miłecki

On 02/03/2017 10:08 PM, Jon Mason wrote:

@@ -61,15 +60,20 @@ static bool platform_bgmac_clk_enabled(struct bgmac *bgmac)

 static void platform_bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
 {
-   bgmac_idm_write(bgmac, BCMA_IOCTL,
-   (BCMA_IOCTL_CLK | BCMA_IOCTL_FGC | flags));
+   u32 val;
+
+   val = bgmac_idm_read(bgmac, BCMA_IOCTL);
+   /* Some bits of BCMA_IOCTL set by HW/ATF and should not change */
+   val |= flags & ~(BGMAC_AWCACHE | BGMAC_ARCACHE | BGMAC_AWUSER |
+BGMAC_ARUSER);
+   val |= BGMAC_CLK_EN;
bgmac_idm_read(bgmac, BCMA_IOCTL);


This read was previously following write op most likely to flush it or
something. I don't think it makes any sense to read after read.


Re: [PATCH net-next] ixgbevf: get rid of custom busy polling code

2017-02-03 Thread David Miller
From: Eric Dumazet 
Date: Thu, 02 Feb 2017 16:59:18 -0800

> From: Eric Dumazet 
> 
> In linux-4.5, busy polling was implemented in core
> NAPI stack, meaning that all custom implementation can
> be removed from drivers.
> 
> Not only we remove lot's of code, we also remove one lock
> operation in fast path, and allow GRO to do its job.
> 
> Signed-off-by: Eric Dumazet 

Applied.


Re: [PATCH net-next] net: remove support for per driver ndo_busy_poll()

2017-02-03 Thread David Miller
From: Eric Dumazet 
Date: Thu, 02 Feb 2017 18:43:28 -0800

> From: Eric Dumazet 
> 
> We added generic support for busy polling in NAPI layer in linux-4.5
> 
> No network driver uses ndo_busy_poll() anymore, we can get rid
> of the pointer in struct net_device_ops, and its use in sk_busy_loop()
> 
> Saves NETIF_F_BUSY_POLL features bit.
> 
> Signed-off-by: Eric Dumazet 

Applied.


Re: [PATCH net-next] ixgbe: get rid of custom busy polling code

2017-02-03 Thread David Miller
From: Eric Dumazet 
Date: Thu, 02 Feb 2017 16:26:39 -0800

> From: Eric Dumazet 
> 
> In linux-4.5, busy polling was implemented in core
> NAPI stack, meaning that all custom implementation can
> be removed from drivers.
> 
> Not only we remove lot's of code, we also remove one lock
> operation in fast path, and allow GRO to do its job.
> 
> Signed-off-by: Eric Dumazet 

Applied.


Re: [PATCH net-next 5/9] sunvnet: add memory barrier before check for tx enable

2017-02-03 Thread Eric Dumazet
On Fri, 2017-02-03 at 13:20 -0800, Shannon Nelson wrote:
> On 2/3/2017 9:56 AM, Eric Dumazet wrote:
> > On Fri, 2017-02-03 at 09:42 -0800, Shannon Nelson wrote:
> >> In order to allow the underlying LDC and outstanding memory operations
> >> to potentially catch up with the driver's Tx requests, add a memory
> >> barrier before checking again for available tx descriptors.
> >>
> >> Signed-off-by: Shannon Nelson 
> >> ---
> >>  drivers/net/ethernet/sun/sunvnet_common.c |1 +
> >>  1 files changed, 1 insertions(+), 0 deletions(-)
> >>
> >> diff --git a/drivers/net/ethernet/sun/sunvnet_common.c 
> >> b/drivers/net/ethernet/sun/sunvnet_common.c
> >> index 5d0d386..98e758e 100644
> >> --- a/drivers/net/ethernet/sun/sunvnet_common.c
> >> +++ b/drivers/net/ethernet/sun/sunvnet_common.c
> >> @@ -1467,6 +1467,7 @@ ldc_start_done:
> >>dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
> >>if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
> >>netif_tx_stop_queue(txq);
> >> +  dma_wmb();
> >
> > This does not look right.
> >
> > I believe you need smp_rmb() here.
> 
> Well, it probably should be dma_rmb(), since regardless of the number of 
> cores we think we have, we're communicating with a peer ldom that has 
> its own core(s).  Either way, on sparc they all seem to boil down to the 
> same bit of asm, but using the "rmb" part makes more logical sense. 
> I'll respin with dma_rmb().
> 

Transmit completion might happen on another cpu, regardless of ldom.

Therefore you need smp_rmb() here ( like mellanox/mlx4/en_tx.c) , or
even smp_mb() as bnx2x does.

dma_rmb() is never used in this context.





Re: [PATCH 00/27] Netfilter updates for net-next

2017-02-03 Thread David Miller
From: Pablo Neira Ayuso 
Date: Fri,  3 Feb 2017 13:25:11 +0100

> The following patchset contains Netfilter updates for your net-next
> tree, they are:
 ...
> You can pull these changes from:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git

Pulled, thanks a lot!


Re: [PATCH net-next v2 00/12] bnxt_en: Add XDP support.

2017-02-03 Thread David Miller
From: Tom Herbert 
Date: Fri, 3 Feb 2017 13:58:56 -0800

> On Fri, Feb 3, 2017 at 1:50 PM, David Miller  wrote:
>> From: Michael Chan 
>> Date: Fri, 3 Feb 2017 13:13:47 -0800
>>
>>> On Fri, Feb 3, 2017 at 12:49 PM, David Miller  wrote:

 Please _DO NOT_ guard XDP support with an ifdef the user
 can modify.

 Treat it like any other common netdev feature a driver might
 support such as checksum offloading or GRO.

>>>
>>> David, I want to make sure I understand completely.  Are you saying
>>> don't use Kconfig option for XDP?  Have it always available?
>>
>> Yes.
>>
>> I don't see a similar config option used in any other driver.
>>
>> What's really driving me completely mad about driver XDP adoption
>> is that there is so much inconsistency.
>>
>> If you do not see another XDP supporting driver do something, don't be
>> tempted to blaze your own trail and handle something in a unique way.
>>
>> We don't set precedence by one driver saying "hey it's better to do
>> things this way, forget what all the other drivers are doing."  Rather
>> we have a "discussion" about what the appropriate thing is to do and
>> convert all the drivers only after a decision has been made.
>>
>> Meanwhile we keep the status quo.
> 
> I am working on some API changes that will hopefully get a little
> consistency across these drivers (this includes feature flag
> NETIF_F_XDP). This will reduce code some and should be good cleanup,
> but XDP is currently very intertwined with the critical data path so
> we might need to be looking at this for a while. There's now more
> drivers with XDP support than when I started this work, so I don't
> think bnxt should wait for this cleanup-- it's just one more driver
> we'll have to retrofit.

Of course.

Michael just respin with the Kconfig change and I'll apply your
series.  In fact I was about to until I noticed the XDP Kconfig knob
:)


Re: [PATCH net-next v2 00/12] bnxt_en: Add XDP support.

2017-02-03 Thread Tom Herbert
On Fri, Feb 3, 2017 at 1:50 PM, David Miller  wrote:
> From: Michael Chan 
> Date: Fri, 3 Feb 2017 13:13:47 -0800
>
>> On Fri, Feb 3, 2017 at 12:49 PM, David Miller  wrote:
>>>
>>> Please _DO NOT_ guard XDP support with an ifdef the user
>>> can modify.
>>>
>>> Treat it like any other common netdev feature a driver might
>>> support such as checksum offloading or GRO.
>>>
>>
>> David, I want to make sure I understand completely.  Are you saying
>> don't use Kconfig option for XDP?  Have it always available?
>
> Yes.
>
> I don't see a similar config option used in any other driver.
>
> What's really driving me completely mad about driver XDP adoption
> is that there is so much inconsistency.
>
> If you do not see another XDP supporting driver do something, don't be
> tempted to blaze your own trail and handle something in a unique way.
>
> We don't set precedence by one driver saying "hey it's better to do
> things this way, forget what all the other drivers are doing."  Rather
> we have a "discussion" about what the appropriate thing is to do and
> convert all the drivers only after a decision has been made.
>
> Meanwhile we keep the status quo.

I am working on some API changes that will hopefully get a little
consistency across these drivers (this includes feature flag
NETIF_F_XDP). This will reduce code some and should be good cleanup,
but XDP is currently very intertwined with the critical data path so
we might need to be looking at this for a while. There's now more
drivers with XDP support than when I started this work, so I don't
think bnxt should wait for this cleanup-- it's just one more driver
we'll have to retrofit.

Tom


Re: [PATCH net] bpf: expose netns inode to bpf programs

2017-02-03 Thread Daniel Borkmann

On 01/26/2017 04:27 AM, Alexei Starovoitov wrote:

in cases where bpf programs are looking at sockets and packets
that belong to different netns, it could be useful to read netns inode,
so that programs can make intelligent decisions.
For example to disallow raw sockets in all non-init netns the program can do:
if (sk->type == SOCK_RAW && sk->netns_inum != 0xf075)
   return 0;
where 0xf075 inode comes from /proc/pid/ns/net

Similarly TC cls_bpf/act_bpf and socket filters can do
if (skb->netns_inum == expected_inode)

The lack of netns awareness was a concern even for socket filters,
since the application can attach the same bpf program to sockets
in a different netns. Just like tc cls_bpf program can work in
different netns as well, so it has to be addressed uniformly
across all types of bpf programs.


Sorry for jumping in late, but my question is, isn't this helper
really only relevant for BPF_PROG_TYPE_CGROUP_* typed programs?
Thus other prog types making use of bpf_convert_ctx_access()
should probably reject that in .is_valid_access() callback?

Reason why I'm asking is that for sockets or tc progs, you
already have a netns context where you're attached to, and f.e.
skbs leaving that netns context will be orphaned. Thus, why
would tc or sock filter tailor a program with such a check,
if it can only match/mismatch its own netns inum eventually?
When making this effort to lookup and hardcode the dev/inode
num into the prog, wouldn't it be easier for these types if
the managing app that loads these progs tailors the progs for
a given netns directly, so also such runtime check can generally
be avoided? Am I missing something wrt 'concerns'? The cgroup
ones are global, so there I can see that it could be used in
some way f.e. to restrict access, account, etc.

Thanks,
Daniel


Re: [patch net-next v2 00/19] mlxsw: Introduce TC Flower offload using TCAM

2017-02-03 Thread David Miller
From: Jiri Pirko 
Date: Fri,  3 Feb 2017 10:28:50 +0100

> This patchset introduces support for offloading TC cls_flower and actions
> to Spectrum TCAM-base policy engine.
> 
> The patchset contains patches to allow work with flexible keys and actions
> which are used in Spectrum TCAM.
> 
> It also contains in-driver infrastructure for offloading TC rules to TCAM HW.
> The TCAM management code is simple and limited for now. It is going to be
> extended as a follow-up work.
> 
> The last patch uses the previously introduced infra to allow to implement
> cls_flower offloading. Initially, only limited set of match-keys and only
> a drop and forward actions are supported.
> 
> As a dependency, this patchset introduces parman - priority array
> area manager - as a library.
> 
> ---
> v1->v2:
> - patch11:
>   - use __set_bit and __test_and_clear_bit as suggested by DaveM
> - patch16:
>   - Added documentation to the API functions as suggested by Tom Herbert
> - patch17:
>   - use __set_bit and __clear_bit as suggested by DaveM

Series applied, thanks Jiri.


Re: [RFC PATCH 0/2] mac80211: use crypto shash for AES cmac

2017-02-03 Thread Ard Biesheuvel
On 3 February 2017 at 21:47, Malinen, Jouni  wrote:
> On Fri, Feb 03, 2017 at 07:25:53PM +, Ard Biesheuvel wrote:
>> The mac80211 aes_cmac code reimplements the CMAC algorithm based on the
>> core AES cipher, which is rather restrictive in how platforms can satisfy
>> the dependency on this algorithm. For instance, SIMD implementations may
>> have a considerable setup time, which cannot be amortized over the entire
>> input when calling into the crypto API one block at a time. Also, it prevents
>> the use of more secure fixed time implementations, since not all AES drivers
>> expose the cipher interface.
>>
>> So switch aes_cmac to use a cmac(aes) shash. This requires a preparatory
>> patch so that we can remove the open coded implementation, which it shares
>> with the fils aead driver. That driver could receive the same treatment, in
>> which case we could replace patch #1 with one that carries it over first.
>>
>> Note that this is an RFC. I have no idea how I would go about testing this
>> code, but I am on a mission to remove as many dependencies on the generic
>> AES cipher as I can.
>
> Neither the BIP nor FILS cases have any real speed requirements taken
> into account how rarely they end up being used in practice (there is
> really no use case for BIP today and FILS is used only once per
> association). That said, there should be no issues with moving these to
> a more generic mechanism assuming one is available now (I don't think
> that was the case when I was working on BIP and I was too lazy to figure
> out how to convert it or the newer FILS implementation)..
>
> mac80211_hwsim show allow some of the testing to be done with wlantest
> confirming the results in user space (*). I think that would cover all
> of BIP (net/mac80211/aes_cmac.c), but not FILS.

OK, that looks like something I could figure out how to use. But are
you saying the CMAC code is never called in practice?

> For FILS, we do not
> currently have a convenient mechanism for running two different
> instances of kernel or even just mac80211 in the setup, so that would
> likely need testing with real WLAN hardware. I don't currently have a
> good setup for testing this (was using Backports-based solution in the
> past instead of full kernel build and Backports is a bit behind the
> current state..), but I guess I'll need to build something functional
> for this eventually.. Once that's in working condition on two devices,
> it would be straightforward to run a test (snapshot of hostap.git build
> to enable FILS functionality and go through one FILS authentication
> round)..
>
> Another alternative would be to extend wlantest to decrypt/validate FIPS
> AEAD use case based on keys exposed from hostapd or wpa_supplicant.
> There has not been sufficient use case for that so far and I have not
> bothered working on it yet.
>
>
> By the way, FILS AEAD uses SIV mode and I'm not sure it is supported in
> the current crypto code, so that would be one additional piece to take
> care of when considering net/mac80211/fils_aead.c conversion.
>

I did spot something peculiar when looking at the code: if I am
reading the following sequence correctly (from
fils_encrypt_assoc_req())

addr[0] = mgmt->sa;
len[0] = ETH_ALEN;
/* The AP's BSSID */
addr[1] = mgmt->da;
len[1] = ETH_ALEN;
/* The STA's nonce */
addr[2] = assoc_data->fils_nonces;
len[2] = FILS_NONCE_LEN;
/* The AP's nonce */
addr[3] = _data->fils_nonces[FILS_NONCE_LEN];
len[3] = FILS_NONCE_LEN;
/* The (Re)Association Request frame from the Capability Information
* field to the FILS Session element (both inclusive).
*/
addr[4] = capab;
len[4] = encr - capab;

crypt_len = skb->data + skb->len - encr;
skb_put(skb, AES_BLOCK_SIZE);
return aes_siv_encrypt(assoc_data->fils_kek, assoc_data->fils_kek_len,
  encr, crypt_len, 1, addr, len, encr);

the addr[]/len[] arrays are populated with 5 (addr, len) pairs, but
only one is actually passed into aes_siv_encrypt()? This is actually
the main reason I stopped looking into whether I could convert it to
CMAC, because I couldn't figure it out.


Re: [PATCH net-next 5/9] sunvnet: add memory barrier before check for tx enable

2017-02-03 Thread David Miller
From: Shannon Nelson 
Date: Fri, 3 Feb 2017 13:20:43 -0800

> On 2/3/2017 9:56 AM, Eric Dumazet wrote:
>> On Fri, 2017-02-03 at 09:42 -0800, Shannon Nelson wrote:
>>> In order to allow the underlying LDC and outstanding memory operations
>>> to potentially catch up with the driver's Tx requests, add a memory
>>> barrier before checking again for available tx descriptors.
>>>
>>> Signed-off-by: Shannon Nelson 
>>> ---
>>>  drivers/net/ethernet/sun/sunvnet_common.c |1 +
>>>  1 files changed, 1 insertions(+), 0 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/sun/sunvnet_common.c
>>> b/drivers/net/ethernet/sun/sunvnet_common.c
>>> index 5d0d386..98e758e 100644
>>> --- a/drivers/net/ethernet/sun/sunvnet_common.c
>>> +++ b/drivers/net/ethernet/sun/sunvnet_common.c
>>> @@ -1467,6 +1467,7 @@ ldc_start_done:
>>> dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
>>> if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
>>> netif_tx_stop_queue(txq);
>>> +   dma_wmb();
>>
>> This does not look right.
>>
>> I believe you need smp_rmb() here.
> 
> Well, it probably should be dma_rmb(), since regardless of the number
> of cores we think we have, we're communicating with a peer ldom that
> has its own core(s).  Either way, on sparc they all seem to boil down
> to the same bit of asm, but using the "rmb" part makes more logical
> sense. I'll respin with dma_rmb().

DMA barriers are for ordering between CPUs and devices.

SMP barriers are for ordering between CPUs, which is your situation
here.

It is completely inappropriate to use DMA barriers in a virutalization
device driver.


Re: [PATCH net-next v2 00/12] bnxt_en: Add XDP support.

2017-02-03 Thread David Miller
From: Michael Chan 
Date: Fri, 3 Feb 2017 13:13:47 -0800

> On Fri, Feb 3, 2017 at 12:49 PM, David Miller  wrote:
>>
>> Please _DO NOT_ guard XDP support with an ifdef the user
>> can modify.
>>
>> Treat it like any other common netdev feature a driver might
>> support such as checksum offloading or GRO.
>>
> 
> David, I want to make sure I understand completely.  Are you saying
> don't use Kconfig option for XDP?  Have it always available?

Yes.

I don't see a similar config option used in any other driver.

What's really driving me completely mad about driver XDP adoption
is that there is so much inconsistency.

If you do not see another XDP supporting driver do something, don't be
tempted to blaze your own trail and handle something in a unique way.

We don't set precedence by one driver saying "hey it's better to do
things this way, forget what all the other drivers are doing."  Rather
we have a "discussion" about what the appropriate thing is to do and
convert all the drivers only after a decision has been made.

Meanwhile we keep the status quo.


Re: [PATCH v2 2/2] net: ethernet: bgmac: mac address change bug

2017-02-03 Thread Florian Fainelli
On 02/03/2017 01:44 PM, Rafał Miłecki wrote:
> On 02/03/2017 10:08 PM, Jon Mason wrote:
>> From: Hari Vyas 
>>
>> ndo_set_mac_address() passes struct sockaddr * as 2nd parameter to
>> bgmac_set_mac_address() but code assumed u8 *.  This caused two bytes
>> chopping and the wrong mac address was configured.
>>
>> Signed-off-by: Hari Vyas 
>> Signed-off-by: Jon Mason 
>> Fixes: 4e209001b86 ("bgmac: write mac address to hardware in
>> ndo_set_mac_address")
> 
> I think you were going to Cc stable?

David takes care of queueing patches for -stable trees:

http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/networking/netdev-FAQ.txt#n114
-- 
Florian


Re: [RFC PATCH 0/2] mac80211: use crypto shash for AES cmac

2017-02-03 Thread Malinen, Jouni
On Fri, Feb 03, 2017 at 07:25:53PM +, Ard Biesheuvel wrote:
> The mac80211 aes_cmac code reimplements the CMAC algorithm based on the
> core AES cipher, which is rather restrictive in how platforms can satisfy
> the dependency on this algorithm. For instance, SIMD implementations may
> have a considerable setup time, which cannot be amortized over the entire
> input when calling into the crypto API one block at a time. Also, it prevents
> the use of more secure fixed time implementations, since not all AES drivers
> expose the cipher interface.
> 
> So switch aes_cmac to use a cmac(aes) shash. This requires a preparatory
> patch so that we can remove the open coded implementation, which it shares
> with the fils aead driver. That driver could receive the same treatment, in
> which case we could replace patch #1 with one that carries it over first.
> 
> Note that this is an RFC. I have no idea how I would go about testing this
> code, but I am on a mission to remove as many dependencies on the generic
> AES cipher as I can.

Neither the BIP nor FILS cases have any real speed requirements taken
into account how rarely they end up being used in practice (there is
really no use case for BIP today and FILS is used only once per
association). That said, there should be no issues with moving these to
a more generic mechanism assuming one is available now (I don't think
that was the case when I was working on BIP and I was too lazy to figure
out how to convert it or the newer FILS implementation)..

mac80211_hwsim show allow some of the testing to be done with wlantest
confirming the results in user space (*). I think that would cover all
of BIP (net/mac80211/aes_cmac.c), but not FILS. For FILS, we do not
currently have a convenient mechanism for running two different
instances of kernel or even just mac80211 in the setup, so that would
likely need testing with real WLAN hardware. I don't currently have a
good setup for testing this (was using Backports-based solution in the
past instead of full kernel build and Backports is a bit behind the
current state..), but I guess I'll need to build something functional
for this eventually.. Once that's in working condition on two devices,
it would be straightforward to run a test (snapshot of hostap.git build
to enable FILS functionality and go through one FILS authentication
round)..

Another alternative would be to extend wlantest to decrypt/validate FIPS
AEAD use case based on keys exposed from hostapd or wpa_supplicant.
There has not been sufficient use case for that so far and I have not
bothered working on it yet.


By the way, FILS AEAD uses SIV mode and I'm not sure it is supported in
the current crypto code, so that would be one additional piece to take
care of when considering net/mac80211/fils_aead.c conversion.


(*)
http://buildbot.w1.fi/hwsim/
http://w1.fi/cgit/hostap/tree/tests/hwsim/vm/example-vm-setup.txt

-- 
Jouni MalinenPGP id EFC895FA

[PULL] vhost: cleanups and fixes

2017-02-03 Thread Michael S. Tsirkin
The following changes since commit 566cf877a1fcb6d6dc0126b076aad062054c2637:

  Linux 4.10-rc6 (2017-01-29 14:25:17 -0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus

for you to fetch changes up to 79134d11d030b886106bf45a5638c1ccb1f0856c:

  MAINTAINERS: update email address for Amit Shah (2017-02-03 23:40:36 +0200)


virtio, vhost: last minute fixes

ARM DMA fix revert
vhost endian-ness fix
MAINTAINERS: email address change for Amit

Signed-off-by: Michael S. Tsirkin 


Amit Shah (1):
  MAINTAINERS: update email address for Amit Shah

Halil Pasic (1):
  vhost: fix initialization for vq->is_le

Michael S. Tsirkin (1):
  Revert "vring: Force use of DMA API for ARM-based systems with legacy 
devices"

 MAINTAINERS  |  2 +-
 drivers/vhost/vhost.c| 10 --
 drivers/virtio/virtio_ring.c |  7 ---
 3 files changed, 5 insertions(+), 14 deletions(-)


Re: [PATCH net-next] ixgbe: get rid of custom busy polling code

2017-02-03 Thread Alexander Duyck
On Thu, Feb 2, 2017 at 4:26 PM, Eric Dumazet  wrote:
> From: Eric Dumazet 
>
> In linux-4.5, busy polling was implemented in core
> NAPI stack, meaning that all custom implementation can
> be removed from drivers.
>
> Not only we remove lot's of code, we also remove one lock
> operation in fast path, and allow GRO to do its job.
>
> Signed-off-by: Eric Dumazet 
> Cc: Jeff Kirsher 

Looks good to me

Acked-by: Alexander Duyck 


Re: [PATCH net-next] ixgbevf: get rid of custom busy polling code

2017-02-03 Thread Alexander Duyck
On Thu, Feb 2, 2017 at 4:59 PM, Eric Dumazet  wrote:
> From: Eric Dumazet 
>
> In linux-4.5, busy polling was implemented in core
> NAPI stack, meaning that all custom implementation can
> be removed from drivers.
>
> Not only we remove lot's of code, we also remove one lock
> operation in fast path, and allow GRO to do its job.
>
> Signed-off-by: Eric Dumazet 
> Cc: Jeff Kirsher 

Looks good to me

Acked-by: Alexander Duyck 


[PATCH net-next 0/3] net: ethernet: bgmac: PM support and clean-ups

2017-02-03 Thread Jon Mason
Add code to support Power Management (only tested on NS2), and add some
code clean-ups

Joey Zhong (1):
  net: ethernet: bgmac: driver power manangement

Jon Mason (2):
  net: ethernet: bgmac: use #defines for MAX size
  net: ethernet: bgmac: unify code of the same family

 drivers/net/ethernet/broadcom/bgmac-bcma.c | 64 +++---
 drivers/net/ethernet/broadcom/bgmac-platform.c | 34 ++
 drivers/net/ethernet/broadcom/bgmac.c  | 53 +
 drivers/net/ethernet/broadcom/bgmac.h  |  4 +-
 4 files changed, 118 insertions(+), 37 deletions(-)

-- 
2.7.4



Re: Understanding mutual exclusion between rtnl_lock and rcu_read_lock

2017-02-03 Thread Cong Wang
On Thu, Feb 2, 2017 at 6:05 PM, Joel Cunningham  wrote:
>
> In the case of SIOCSIFHWADDR, we get a pointer to the net_device through 
> __dev_get_by_name() and then pass it to dev_set_mac_address() to modify 
> through ndo_set_mac_address().  I didn’t see any uses of RCU APIs on the 
> writer side and that’s why I figured there was something going on with 
> rtnl_lock() that I didn’t understand or that the dev_ioctl function wasn’t 
> re-entrant from another CPU
>

You are right, that RCU read lock could merely protect the netdevice from
being unregistered concurrently, can't prevent a concurrent dev_ifsioc().

I don't know why Eric changed it to RCU read lock, it is not a hot path, using
rtnl lock is fine and can guarantee a atomic read.


[PATCH 2/3] net: ethernet: bgmac: unify code of the same family

2017-02-03 Thread Jon Mason
BCM471X and BCM535X are of the same family (from what I can derive from
internal documents).  Group them into the case statement together, which
results in more code reuse.

Also, use existing helper variables to make the code a little more
readable too.

Signed-off-by: Jon Mason 
---
 drivers/net/ethernet/broadcom/bgmac-bcma.c | 64 +-
 1 file changed, 28 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c 
b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index 5ef60d4..f5c27f4 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -192,36 +192,50 @@ static int bgmac_probe(struct bcma_device *core)
goto err1;
}
 
-   bgmac->has_robosw = !!(core->bus->sprom.boardflags_lo &
-  BGMAC_BFL_ENETROBO);
+   bgmac->has_robosw = !!(sprom->boardflags_lo & BGMAC_BFL_ENETROBO);
if (bgmac->has_robosw)
dev_warn(bgmac->dev, "Support for Roboswitch not 
implemented\n");
 
-   if (core->bus->sprom.boardflags_lo & BGMAC_BFL_ENETADM)
+   if (sprom->boardflags_lo & BGMAC_BFL_ENETADM)
dev_warn(bgmac->dev, "Support for ADMtek ethernet switch not 
implemented\n");
 
/* Feature Flags */
-   switch (core->bus->chipinfo.id) {
+   switch (ci->id) {
+   /* BCM 471X/535X family */
+   case BCMA_CHIP_ID_BCM4716:
+   bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+   /* fallthrough */
+   case BCMA_CHIP_ID_BCM47162:
+   bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL2;
+   bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+   break;
case BCMA_CHIP_ID_BCM5357:
+   case BCMA_CHIP_ID_BCM53572:
bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
-   if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM47186) {
-   bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+   if (ci->pkg == BCMA_PKG_ID_BCM47188 ||
+   ci->pkg == BCMA_PKG_ID_BCM47186) {
bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
+   bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
}
-   if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM5358)
+   if (ci->pkg == BCMA_PKG_ID_BCM5358)
bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_EPHYRMII;
break;
-   case BCMA_CHIP_ID_BCM53572:
-   bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+   case BCMA_CHIP_ID_BCM53573:
bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
-   bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
-   bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
-   if (core->bus->chipinfo.pkg == BCMA_PKG_ID_BCM47188) {
-   bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
+   bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+   if (ci->pkg == BCMA_PKG_ID_BCM47189)
bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+   if (core->core_unit == 0) {
+   bgmac->feature_flags |= BGMAC_FEAT_CC4_IF_SW_TYPE;
+   if (ci->pkg == BCMA_PKG_ID_BCM47189)
+   bgmac->feature_flags |=
+   BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII;
+   } else if (core->core_unit == 1) {
+   bgmac->feature_flags |= BGMAC_FEAT_IRQ_ID_OOB_6;
+   bgmac->feature_flags |= BGMAC_FEAT_CC7_IF_TYPE_RGMII;
}
break;
case BCMA_CHIP_ID_BCM4749:
@@ -229,18 +243,11 @@ static int bgmac_probe(struct bcma_device *core)
bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1;
bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY;
-   if (core->bus->chipinfo.pkg == 10) {
+   if (ci->pkg == 10) {
bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII;
bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
}
break;
-   case BCMA_CHIP_ID_BCM4716:
-   bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
-   /* fallthrough */
-   case BCMA_CHIP_ID_BCM47162:
-   bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL2;
-   bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
-   break;
/* bcm4707_family */
case BCMA_CHIP_ID_BCM4707:
case BCMA_CHIP_ID_BCM47094:
@@ -249,21 +256,6 @@ static int bgmac_probe(struct bcma_device *core)

[PATCH 1/3] net: ethernet: bgmac: use #defines for MAX size

2017-02-03 Thread Jon Mason
The maximum frame size is really just the standard ethernet frame size
and FCS.  So use those existing defines to make the code a little more
beautiful.

Signed-off-by: Jon Mason 
---
 drivers/net/ethernet/broadcom/bgmac.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac.h 
b/drivers/net/ethernet/broadcom/bgmac.h
index 6d0b5b3..5a518fe 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -402,7 +402,7 @@
 
 #define BGMAC_WEIGHT   64
 
-#define ETHER_MAX_LEN   1518
+#define ETHER_MAX_LEN  (ETH_FRAME_LEN + ETH_FCS_LEN)
 
 /* Feature Flags */
 #define BGMAC_FEAT_TX_MASK_SETUP   BIT(0)
-- 
2.7.4



[PATCH 3/3] net: ethernet: bgmac: driver power manangement

2017-02-03 Thread Jon Mason
From: Joey Zhong 

Implements suspend/resume, external phy 54810 is assumed
to remain powered up during deep-sleep for wake-on-lane.

Signed-off-by: Joey Zhong 
Signed-off-by: Jon Mason 
---
 drivers/net/ethernet/broadcom/bgmac-platform.c | 34 +
 drivers/net/ethernet/broadcom/bgmac.c  | 53 ++
 drivers/net/ethernet/broadcom/bgmac.h  |  2 +
 3 files changed, 89 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c 
b/drivers/net/ethernet/broadcom/bgmac-platform.c
index 2d153f7..3df91e7 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -21,8 +21,12 @@
 #include 
 #include "bgmac.h"
 
+#define NICPM_PADRING_CFG  0x0004
 #define NICPM_IOMUX_CTRL   0x0008
 
+#define NICPM_PADRING_CFG_INIT_VAL 0x7400
+#define NICPM_IOMUX_CTRL_INIT_VAL_AX   0x2188
+
 #define NICPM_IOMUX_CTRL_INIT_VAL  0x3196e000
 #define NICPM_IOMUX_CTRL_SPD_SHIFT 10
 #define NICPM_IOMUX_CTRL_SPD_10M   0
@@ -108,6 +112,10 @@ static void bgmac_nicpm_speed_set(struct net_device 
*net_dev)
if (!bgmac->plat.nicpm_base)
return;
 
+   /* SET RGMII IO CONFIG */
+   writel(NICPM_PADRING_CFG_INIT_VAL,
+  bgmac->plat.nicpm_base + NICPM_PADRING_CFG);
+
val = NICPM_IOMUX_CTRL_INIT_VAL;
switch (bgmac->net_dev->phydev->speed) {
default:
@@ -239,6 +247,31 @@ static int bgmac_remove(struct platform_device *pdev)
return 0;
 }
 
+#ifdef CONFIG_PM
+static int bgmac_suspend(struct device *dev)
+{
+   struct bgmac *bgmac = dev_get_drvdata(dev);
+
+   return bgmac_enet_suspend(bgmac);
+}
+
+static int bgmac_resume(struct device *dev)
+{
+   struct bgmac *bgmac = dev_get_drvdata(dev);
+
+   return bgmac_enet_resume(bgmac);
+}
+
+static const struct dev_pm_ops bgmac_pm_ops = {
+   .suspend = bgmac_suspend,
+   .resume = bgmac_resume
+};
+
+#define BGMAC_PM_OPS (_pm_ops)
+#else
+#define BGMAC_PM_OPS NULL
+#endif /* CONFIG_PM */
+
 static const struct of_device_id bgmac_of_enet_match[] = {
{.compatible = "brcm,amac",},
{.compatible = "brcm,nsp-amac",},
@@ -252,6 +285,7 @@ static struct platform_driver bgmac_enet_driver = {
.driver = {
.name  = "bgmac-enet",
.of_match_table = bgmac_of_enet_match,
+   .pm = BGMAC_PM_OPS
},
.probe = bgmac_probe,
.remove = bgmac_remove,
diff --git a/drivers/net/ethernet/broadcom/bgmac.c 
b/drivers/net/ethernet/broadcom/bgmac.c
index bd549f8..8d3aada 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1478,6 +1478,7 @@ int bgmac_enet_probe(struct bgmac *bgmac)
 
net_dev->irq = bgmac->irq;
SET_NETDEV_DEV(net_dev, bgmac->dev);
+   dev_set_drvdata(bgmac->dev, bgmac);
 
if (!is_valid_ether_addr(bgmac->mac_addr)) {
dev_err(bgmac->dev, "Invalid MAC addr: %pM\n",
@@ -1551,5 +1552,57 @@ void bgmac_enet_remove(struct bgmac *bgmac)
 }
 EXPORT_SYMBOL_GPL(bgmac_enet_remove);
 
+int bgmac_enet_suspend(struct bgmac *bgmac)
+{
+   netdev_info(bgmac->net_dev, "Suspending\n");
+
+   if (netif_running(bgmac->net_dev)) {
+   netif_stop_queue(bgmac->net_dev);
+
+   napi_disable(>napi);
+
+   netif_tx_lock(bgmac->net_dev);
+   netif_device_detach(bgmac->net_dev);
+   netif_tx_unlock(bgmac->net_dev);
+
+   bgmac_chip_intrs_off(bgmac);
+   bgmac_chip_reset(bgmac);
+   bgmac_dma_cleanup(bgmac);
+   }
+
+   phy_stop(bgmac->net_dev->phydev);
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(bgmac_enet_suspend);
+
+int bgmac_enet_resume(struct bgmac *bgmac)
+{
+   int rc;
+
+   netdev_info(bgmac->net_dev, "Resuming\n");
+
+   phy_start(bgmac->net_dev->phydev);
+
+   if (netif_running(bgmac->net_dev)) {
+   rc = bgmac_dma_init(bgmac);
+   if (rc)
+   return rc;
+
+   bgmac_chip_init(bgmac);
+
+   napi_enable(>napi);
+
+   netif_tx_lock(bgmac->net_dev);
+   netif_device_attach(bgmac->net_dev);
+   netif_tx_unlock(bgmac->net_dev);
+
+   netif_start_queue(bgmac->net_dev);
+   }
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(bgmac_enet_resume);
+
 MODULE_AUTHOR("Rafał Miłecki");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac.h 
b/drivers/net/ethernet/broadcom/bgmac.h
index 5a518fe..741ca27 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -538,6 +538,8 @@ int bgmac_enet_probe(struct bgmac *bgmac);
 void bgmac_enet_remove(struct bgmac *bgmac);
 void bgmac_adjust_link(struct net_device *net_dev);
 int bgmac_phy_connect_direct(struct 

Re: [next-net 00/19][pull request] Intel Wired LAN Driver Updates 2017-02-02

2017-02-03 Thread David Miller

Pretty sloppy submission Jeff.

First of all, things are getting backlogged way too much.  19 patches
is too large, you need to keep it closer down to 10 and the very very
low teens.  You need to send me pull requests more often so that this
does not happen.

Second of all, all of your Subjects are messed up and have "queue"
misspelled and things like this.

Finally, there is nothing to even pull at the URL you provided:

[davem@dhcp-10-15-49-210 net-next]$ git pull --no-ff 
git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue master
>From git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue
 * branchmaster -> FETCH_HEAD
Already up-to-date.


[PATCH net-next][v2] bpf: test for AND edge cases

2017-02-03 Thread Josef Bacik
These two tests are based on the work done for f23cc643f9ba.  The first test is
just a basic one to make sure we don't allow AND'ing negative values, even if it
would result in a valid index for the array.  The second is a cleaned up version
of the original testcase provided by Jann Horn that resulted in the commit.

Acked-by: Alexei Starovoitov 
Acked-by: Daniel Borkmann 
Signed-off-by: Josef Bacik 
---
v1->v2:
-rebased onto net-next

 tools/testing/selftests/bpf/test_verifier.c | 55 +
 1 file changed, 55 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_verifier.c 
b/tools/testing/selftests/bpf/test_verifier.c
index 0d0912c..df194e1 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4370,6 +4370,61 @@ static struct bpf_test tests[] = {
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
+   {
+   "invalid and of negative number",
+   .insns = {
+   BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+   BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+   BPF_LD_MAP_FD(BPF_REG_1, 0),
+   BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+BPF_FUNC_map_lookup_elem),
+   BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+   BPF_MOV64_IMM(BPF_REG_1, 6),
+   BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4),
+   BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+   BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+   BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+  offsetof(struct test_val, foo)),
+   BPF_EXIT_INSN(),
+   },
+   .fixup_map2 = { 3 },
+   .errstr_unpriv = "R0 pointer arithmetic prohibited",
+   .errstr = "R0 min value is negative, either use unsigned index 
or do a if (index >=0) check.",
+   .result = REJECT,
+   .result_unpriv = REJECT,
+   },
+   {
+   "invalid range check",
+   .insns = {
+   BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+   BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+   BPF_LD_MAP_FD(BPF_REG_1, 0),
+   BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+BPF_FUNC_map_lookup_elem),
+   BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 12),
+   BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+   BPF_MOV64_IMM(BPF_REG_9, 1),
+   BPF_ALU32_IMM(BPF_MOD, BPF_REG_1, 2),
+   BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1),
+   BPF_ALU32_REG(BPF_AND, BPF_REG_9, BPF_REG_1),
+   BPF_ALU32_IMM(BPF_ADD, BPF_REG_9, 1),
+   BPF_ALU32_IMM(BPF_RSH, BPF_REG_9, 1),
+   BPF_MOV32_IMM(BPF_REG_3, 1),
+   BPF_ALU32_REG(BPF_SUB, BPF_REG_3, BPF_REG_9),
+   BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0x1000),
+   BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
+   BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
+   BPF_MOV64_REG(BPF_REG_0, 0),
+   BPF_EXIT_INSN(),
+   },
+   .fixup_map2 = { 3 },
+   .errstr_unpriv = "R0 pointer arithmetic prohibited",
+   .errstr = "R0 min value is negative, either use unsigned index 
or do a if (index >=0) check.",
+   .result = REJECT,
+   .result_unpriv = REJECT,
+   }
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
2.7.4



Re: [PATCH 1/1] bonding: Remove unnecessary returned value check

2017-02-03 Thread David Miller
From: Zhu Yanjun 
Date: Thu,  2 Feb 2017 23:46:21 -0500

> The function bond_info_query alwarys returns 0. As such, in the function
> bond_do_ioctl, it is not necessary to check the returned value. So the
> interface type of the function bond_info_query is changed to void. The
> redundant check is removed.
> 
> Signed-off-by: Zhu Yanjun 

Applied to net-next, thanks.


Re: [PATCH net-next 5/8] net: ethernet: annapurna: add statistics helper

2017-02-03 Thread kbuild test robot
Hi Antoine,

[auto build test WARNING on net-next/master]

url:
https://github.com/0day-ci/linux/commits/Antoine-Tenart/ARM-Alpine-Ethernet-support/20170204-022156
config: arm-allmodconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=arm 

All warnings (new ones prefixed by >>):

   drivers/net/ethernet/annapurna/al_eth.c: In function 
'al_eth_restore_ethtool_params':
   drivers/net/ethernet/annapurna/al_eth.c:2139:15: warning: unused variable 
'rx_usecs' [-Wunused-variable]
 unsigned int rx_usecs = adapter->rx_usecs;
  ^~~~
   drivers/net/ethernet/annapurna/al_eth.c:2138:15: warning: unused variable 
'tx_usecs' [-Wunused-variable]
 unsigned int tx_usecs = adapter->tx_usecs;
  ^~~~
   In file included from include/uapi/linux/posix_types.h:4:0,
from include/uapi/linux/types.h:13,
from include/linux/types.h:5,
from include/linux/list.h:4,
from include/linux/module.h:9,
from drivers/net/ethernet/annapurna/al_eth.c:9:
   drivers/net/ethernet/annapurna/al_eth.c: In function 'al_eth_get_stats64':
>> include/linux/stddef.h:7:14: warning: 'return' with a value, in function 
>> returning void
#define NULL ((void *)0)
 ^
>> drivers/net/ethernet/annapurna/al_eth.c:2398:10: note: in expansion of macro 
>> 'NULL'
  return NULL;
 ^~~~
   drivers/net/ethernet/annapurna/al_eth.c:2391:13: note: declared here
static void al_eth_get_stats64(struct net_device *netdev,
^~
>> drivers/net/ethernet/annapurna/al_eth.c:2423:9: warning: 'return' with a 
>> value, in function returning void
 return stats;
^
   drivers/net/ethernet/annapurna/al_eth.c:2391:13: note: declared here
static void al_eth_get_stats64(struct net_device *netdev,
^~

vim +/NULL +2398 drivers/net/ethernet/annapurna/al_eth.c

  2392 struct rtnl_link_stats64 *stats)
  2393  {
  2394  struct al_eth_adapter *adapter = netdev_priv(netdev);
  2395  struct al_eth_mac_stats *mac_stats = >mac_stats;
  2396  
  2397  if (!adapter->up)
> 2398  return NULL;
  2399  
  2400  al_eth_mac_stats_get(>hw_adapter, mac_stats);
  2401  
  2402  stats->rx_packets = mac_stats->aFramesReceivedOK; /* including 
pause frames */
  2403  stats->tx_packets = mac_stats->aFramesTransmittedOK; /* 
including pause frames */
  2404  stats->rx_bytes = mac_stats->aOctetsReceivedOK;
  2405  stats->tx_bytes = mac_stats->aOctetsTransmittedOK;
  2406  stats->rx_dropped = 0;
  2407  stats->multicast = mac_stats->ifInMulticastPkts;
  2408  stats->collisions = 0;
  2409  
  2410  stats->rx_length_errors = (mac_stats->etherStatsUndersizePkts + 
/* good but short */
  2411 mac_stats->etherStatsFragments + /* 
short and bad*/
  2412 mac_stats->etherStatsJabbers + /* 
with crc errors */
  2413 mac_stats->etherStatsOversizePkts);
  2414  stats->rx_crc_errors = mac_stats->aFrameCheckSequenceErrors;
  2415  stats->rx_frame_errors = mac_stats->aAlignmentErrors;
  2416  stats->rx_fifo_errors = mac_stats->etherStatsDropEvents;
  2417  stats->rx_missed_errors = 0;
  2418  stats->tx_window_errors = 0;
  2419  
  2420  stats->rx_errors = mac_stats->ifInErrors;
  2421  stats->tx_errors = mac_stats->ifOutErrors;
  2422  
> 2423  return stats;
  2424  }
  2425  
  2426  static void al_eth_get_drvinfo(struct net_device *dev,

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH net-next] tcp: clear pfmemalloc on outgoing skb

2017-02-03 Thread David Miller
From: Eric Dumazet 
Date: Thu, 02 Feb 2017 20:40:08 -0800

> From: Eric Dumazet 
> 
> Josef Bacik diagnosed following problem :
> 
>I was seeing random disconnects while testing NBD over loopback.
>This turned out to be because NBD sets pfmemalloc on it's socket,
>however the receiving side is a user space application so does not
>have pfmemalloc set on its socket. This means that
>sk_filter_trim_cap will simply drop this packet, under the
>assumption that the other side will simply retransmit. Well we do
>retransmit, and then the packet is just dropped again for the same
>reason.
> 
> It seems the better way to address this problem is to clear pfmemalloc
> in the TCP transmit path. pfmemalloc strict control really makes sense
> on the receive path. 
> 
> Signed-off-by: Eric Dumazet 
> Acked-by: Josef Bacik 

Applied.


Re: [PATCH net-next] bpf: test for AND edge cases

2017-02-03 Thread Josef Bacik
On Fri, 2017-02-03 at 16:03 -0500, David Miller wrote:
> From: Josef Bacik 
> Date: Thu, 2 Feb 2017 12:00:38 -0500
> 
> > 
> > These two tests are based on the work done for f23cc643f9ba.  The
> > first test is
> > just a basic one to make sure we don't allow AND'ing negative
> > values, even if it
> > would result in a valid index for the array.  The second is a
> > cleaned up version
> > of the original testcase provided by Jann Horn that resulted in the
> > commit.
> > 
> > Signed-off-by: Josef Bacik 
> This doesn't apply cleanly to net-next, please respin.

Ugh sorry did it on the wrong branch, I'll send an updated one shortly.
 Thanks,

Josef


Re: [PATCH net-next] ixgbe: get rid of custom busy polling code

2017-02-03 Thread David Miller

Intel folks please review these two patches, as I need to apply them in order
to add Eric's final patch which removes the busy polling NDO op altogether.

http://patchwork.ozlabs.org/patch/723356/

Thank you.


Re: [PATCH] r8152: Allocate interrupt buffer as part of struct r8152

2017-02-03 Thread Guenter Roeck
On Tue, Jan 31, 2017 at 02:53:47PM -0500, Alan Stern wrote:
> On Tue, 31 Jan 2017, Guenter Roeck wrote:
> 
> > When unloading the r8152 driver using the 'unbind' sysfs attribute
> > in a system with KASAN enabled, the following error message is seen
> > on a regular basis.
> 
> ...
> 
> > The two-byte allocation in conjunction with code analysis suggests that
> > the interrupt buffer has been overwritten. Added instrumentation in the
> > driver shows that the interrupt handler is called after RTL8152_UNPLUG
> > was set, and that this event is associated with the error message above.
> > This suggests that there are situations where the interrupt buffer is used
> > after it has been freed.
> > 
> > To avoid the problem, allocate the interrupt buffer as part of struct
> > r8152.
> > 
> > Cc: Hayes Wang 
> > Signed-off-by: Guenter Roeck 
> > ---
> > The problem is seen in chromeos-4.4, but there is not reason to believe
> > that it does not occur with the upstream kernel. It is still seen in
> > chromeos-4.4 after all patches from upstream and linux-next have been
> > applied to the driver.
> > 
> > While relatively simple, I am not really convinced that this is the best
> > (or even an acceptable) solution for this problem. I am open to suggestions
> > for a better fix.
> 
> The proper approach is to keep the allocation as it is, but _before_
> deallocating the buffer, make sure that the interrupt buffer won't be
> accessed any more.  This may involve calling usb_kill_urb(), or
> synchronize_irq(), or something similar.
> 

Just to keep everyone up to date, the problem was that the usb subsystem,
due to bad platform code in chromeos-4.4, did not properly stop DMA from
the hardware when the driver was removed. This resulted in a DMA transfer
into the freed buffer. The r8152 driver is completely innocent.

Sorry for the noise.

Guenter


Re: [PATCH net-next 9/9] ldmvsw: disable tso and gso for bridge operations

2017-02-03 Thread Shannon Nelson

On 2/3/2017 9:59 AM, Eric Dumazet wrote:

On Fri, 2017-02-03 at 09:42 -0800, Shannon Nelson wrote:

The ldmvsw driver is specifically for supporting the ldom virtual
networking by running in the primary ldom and using the LDC to connect
the remaining ldoms to the outside world via a bridge.  With TSO and GSO
supported while connected the bridge, things tend to misbehave as seen in
our case by delayed packets, enough to begin triggering retransmits and
affecting overall throughput.  By turning off advertised support for TSO
and GSO we restore stable traffic flow through the bridge.

Orabug: 23293104

Signed-off-by: Shannon Nelson 
---
 drivers/net/ethernet/sun/ldmvsw.c |5 ++---
 drivers/net/ethernet/sun/sunvnet_common.c |3 ++-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/sun/ldmvsw.c 
b/drivers/net/ethernet/sun/ldmvsw.c
index 552c0a9..bd2cfbc 100644
--- a/drivers/net/ethernet/sun/ldmvsw.c
+++ b/drivers/net/ethernet/sun/ldmvsw.c
@@ -299,8 +299,7 @@ static struct net_device *vsw_alloc_netdev(u8 hwaddr[],
dev->ethtool_ops = _ethtool_ops;
dev->watchdog_timeo = VSW_TX_TIMEOUT;

-   dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE |
-  NETIF_F_HW_CSUM | NETIF_F_SG;
+   dev->hw_features = NETIF_F_HW_CSUM;



You also removed NETIF_F_SG


Hmmm - yep, I'll put that back in for the respin.

Thanks,
sln



[RFC] igmp: address pmc kmemleak from on igmpv3_del_delrec()

2017-02-03 Thread Luis R. Rodriguez
When we igmpv3_add_delrec() we kzalloc the pmc, but when users
calligmpv3_del_delrec() we never free the pmc. This was caught
by the following kmemleak splat:

unreferenced object 0x99666ff43b40 (size 192):
  comm "systemd-resolve", pid 1258, jiffies 4309905600 (age 2138.352s)
  hex dump (first 32 bytes):
00 6a 64 72 66 99 ff ff e0 00 00 fc 00 00 00 00  .jdrf...
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
  backtrace:
[] kmemleak_alloc+0x4a/0xa0
[] kmem_cache_alloc_trace+0x107/0x240
[] igmp_group_dropped+0xfd/0x270
[] ip_mc_dec_group+0xaf/0x110
[] ip_mc_leave_group+0xb6/0x140
[] do_ip_setsockopt.isra.13+0x4c7/0xed0
[] ip_setsockopt+0x34/0xb0
[] udp_setsockopt+0x1b/0x30
[] sock_common_setsockopt+0x14/0x20
[] SyS_setsockopt+0x80/0xe0
[] do_syscall_64+0x5b/0xc0
[] return_from_SYSCALL_64+0x0/0x6a
[] 0x

Signed-off-by: Luis R. Rodriguez 
---

I can reproduce this over time on a qemu box running next-20170125.
After running this for a while I no longer see the splat. This needs
confirmation form folks more familiar with the code, hence RFC. If
this is a real fix we need appropriate tags for the patch.

 net/ipv4/igmp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 5b15459955f8..44fd86de2823 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1172,6 +1172,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, 
struct ip_mc_list *im)
psf->sf_crcount = im->crcount;
}
in_dev_put(pmc->interface);
+   kfree(pmc);
}
spin_unlock_bh(>lock);
 }
-- 
2.11.0



Re: [PATCH net-next 5/9] sunvnet: add memory barrier before check for tx enable

2017-02-03 Thread Shannon Nelson

On 2/3/2017 9:56 AM, Eric Dumazet wrote:

On Fri, 2017-02-03 at 09:42 -0800, Shannon Nelson wrote:

In order to allow the underlying LDC and outstanding memory operations
to potentially catch up with the driver's Tx requests, add a memory
barrier before checking again for available tx descriptors.

Signed-off-by: Shannon Nelson 
---
 drivers/net/ethernet/sun/sunvnet_common.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/sun/sunvnet_common.c 
b/drivers/net/ethernet/sun/sunvnet_common.c
index 5d0d386..98e758e 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.c
+++ b/drivers/net/ethernet/sun/sunvnet_common.c
@@ -1467,6 +1467,7 @@ ldc_start_done:
dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
if (unlikely(vnet_tx_dring_avail(dr) < 1)) {
netif_tx_stop_queue(txq);
+   dma_wmb();


This does not look right.

I believe you need smp_rmb() here.


Well, it probably should be dma_rmb(), since regardless of the number of 
cores we think we have, we're communicating with a peer ldom that has 
its own core(s).  Either way, on sparc they all seem to boil down to the 
same bit of asm, but using the "rmb" part makes more logical sense. 
I'll respin with dma_rmb().


Good catch, thanks,
sln





if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr))
netif_tx_wake_queue(txq);
}





Re: [PATCH net-next] cxgb4: get rid of custom busy poll code

2017-02-03 Thread David Miller
From: Eric Dumazet 
Date: Thu, 02 Feb 2017 11:44:27 -0800

> From: Eric Dumazet 
> 
> In linux-4.5, busy polling was implemented in core
> NAPI stack, meaning that all custom implementation can
> be removed from drivers.
> 
> Not only we remove lot of code, we also remove one spin_lock()
> from driver fast path.
> 
> Signed-off-by: Eric Dumazet 

Applied.


Re: [PATCH net-next] myri10ge: get rid of custom busy poll code

2017-02-03 Thread David Miller
From: Eric Dumazet 
Date: Thu, 02 Feb 2017 10:50:48 -0800

> From: Eric Dumazet 
> 
> Compared to custom busy_poll, the generic NAPI one is simpler and
> removes a lot of code. It removes one atomic in the fast path (when
> busy poll is not in action) since we do not have to use an extra
> spinlock.
> 
> Signed-off-by: Eric Dumazet 

Applied.


Re: [PATCH net] net: use a work queue to defer net_disable_timestamp() work

2017-02-03 Thread David Miller
From: Eric Dumazet 
Date: Thu, 02 Feb 2017 10:31:35 -0800

> From: Eric Dumazet 
> 
> Dmitry reported a warning [1] showing that we were calling 
> net_disable_timestamp() -> static_key_slow_dec() from a non
> process context.
> 
> Grabbing a mutex while holding a spinlock or rcu_read_lock()
> is not allowed.
> 
> As Cong suggested, we now use a work queue.
> 
> It is possible netstamp_clear() exits while netstamp_needed_deferred
> is not zero, but it is probably not worth trying to do better than that.
> 
> netstamp_needed_deferred atomic tracks the exact number of deferred
> decrements.
 ...
> Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context")
> Suggested-by: Cong Wang 
> Reported-by: Dmitry Vyukov 
> Signed-off-by: Eric Dumazet 

Applied and queued up for -stable, thanks Eric.


Re: [PATCH net-next v2 00/12] bnxt_en: Add XDP support.

2017-02-03 Thread Michael Chan
On Fri, Feb 3, 2017 at 12:49 PM, David Miller  wrote:
>
> Please _DO NOT_ guard XDP support with an ifdef the user
> can modify.
>
> Treat it like any other common netdev feature a driver might
> support such as checksum offloading or GRO.
>

David, I want to make sure I understand completely.  Are you saying
don't use Kconfig option for XDP?  Have it always available?


Re: [PATCH net] bpf: expose netns inode to bpf programs

2017-02-03 Thread Eric W. Biederman
Andy Lutomirski  writes:

> On Thu, Feb 2, 2017 at 8:33 PM, Eric W. Biederman  
> wrote:
>> Alexei Starovoitov  writes:
>>
>>> On 1/26/17 11:07 AM, Andy Lutomirski wrote:
 On Thu, Jan 26, 2017 at 10:32 AM, Alexei Starovoitov  wrote:
> On 1/26/17 10:12 AM, Andy Lutomirski wrote:
>>
>> On Thu, Jan 26, 2017 at 9:46 AM, Alexei Starovoitov  wrote:
>>>
>>> On 1/26/17 8:37 AM, Andy Lutomirski wrote:
>
>
> Think of bpf programs as safe kernel modules. They don't have
> confined boundaries and program authors, if not careful, can shoot
> themselves in the foot. We're not trying to prevent that because
> it's impossible to check that the program is sane. Just like
> it's impossible to check that kernel module is sane.
> But in case of bpf we check that bpf program is _safe_ from the kernel
> point of view. If it's doing some garbage, it's program's business.
> Does it make more sense now?
>

 With all due respect, I think this is not an acceptable way to think
 about BPF at all.  If you think of BPF this way, I think there needs
 to be a real discussion at KS or similar as to whether this is okay.
 The reason is simple: the kernel promises a stable ABI to userspace
 but not to kernel modules.  By thinking of BPF as more like a module,
 you're taking a big shortcut that will either result in ABI breakage
 down the road or in committing to a problematic stable ABI.
>>>
>>>
>>>
>>> you misunderstood the analogy.
>>> bpf abi is certainly stable. that's why we were careful of not
>>> exposing anything to it that is not already stable.
>>>
>>
>> In that case I don't understand what you're trying to say.  Eric
>> thinks your patch exposes a bad interface.  A bad interface for
>> userspace is a very different thing from a bad interface available to
>> kernel modules.  Are you saying that BPF is kernel-module-like in that
>> the ABI exposed to BPF programs doesn't need to meet the same quality
>> standards as userspace ABIs?
>
>
> of course not.
> ns.inum is already exposed to user space as a value.
> This patch exposes it to bpf program in a convenient and stable way,

 Here's what I'm imaging Eric is thinking:

 ns.inum is currently exposed to userspace via procfs.  In principle,
 the value could be local to a namespace, though, which would enable
 CRIU to be able to preserve namespace inode numbers across a
 checkpoint+restore operation.  If this happened, the contained and
 restored procfs would see a different inode number than the outermost
 procfs.
>>>
>>> sure. there are many different ways for the program to see inode
>>> that either was already reused or disappeared.
>>> What I'm saying that it is expected. We cannot prevent that from
>>> bpf side. Just like ifindex value read by the program can be bogus
>>> as in the example I just provided.
>>
>> The point is that we can make the inode number stable across migration
>> and the user space API for namespaces has been designed with that
>> possibility in mind.
>
> How does it help if BPF starts exposing both inode number and device
> number?

Adding the device number comparison helps in that it is explicit what is
being compared against.  That gives me at least a bit of a namespace
for the namespaces, and a program from a sufficiently wrong context will
have it's comparisons fail rather than having a match.

I think the operation that is exported in the BPF should be a full
comparison operation of device and inode number so that it could be
optimized/compiled to something else depending upon the context.

AKA the compilation of the bpf program would have the opportunity to
remove the namespace dependency and make the program work in a global
context.  So we don't have to carry namespace information around at run
time.

> ISTM any ability to migrate namespaces and to migrate eBPF programs
> that know about namespaces needs to have the eBPF program firmly
> rooted in some namespace (or perhaps cgroup in this case) so that it
> can see a namespaced view of the world.  For this to work, presumably
> we need to make sure that eBPF programs that are installed by programs
> that are in a container don't see traffic that isn't in that
> container.  This is part of why I think that we should consider
> preventing programs that aren't in the root namespace (perhaps *all*
> the root namespaces) from installing bpf+cgroup programs in the first
> place until there's a clearer understanding of how this all fits
> together.

Andy I agree.  At least to the point those programs are
reading attributes that are in a namespace.  Something that should be
straight forward to verify in the bpf checker when installing the

Re: [PATCH v2 net-next] bpf: enable verifier to add 0 to packet ptr

2017-02-03 Thread William Tu
Hi Alexei,

why it is bogus? on my system, it fails without the patch applied.

--William

On Fri, Feb 3, 2017 at 12:55 PM, Alexei Starovoitov
 wrote:
> On Fri, Feb 03, 2017 at 09:22:45AM -0800, William Tu wrote:
>> The patch fixes the case when adding a zero value to the packet
>> pointer.  The verifer reports the following error:
>>   [...]
>> R0=imm0,min_value=0,max_value=0
>> R1=pkt(id=0,off=0,r=4)
>> R2=pkt_end R3=fp-12
>> R4=imm4,min_value=4,max_value=4
>> R5=pkt(id=0,off=4,r=4)
>>   269: (bf) r2 = r0   // r2 becomes imm0
>>   270: (77) r2 >>= 3
>>   271: (bf) r4 = r1   // r4 becomes pkt ptr
>>   272: (0f) r4 += r2  // r4 += 0
>>   addition of negative constant to packet pointer is not allowed
>>
>> Signed-off-by: William Tu 
>> Signed-off-by: Mihai Budiu 
>> ---
>>  kernel/bpf/verifier.c   |  2 +-
>>  tools/testing/selftests/bpf/test_verifier.c | 15 +++
>>  2 files changed, 16 insertions(+), 1 deletion(-)
>>
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index fb3513b..1a754e5 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -1397,7 +1397,7 @@ static int check_packet_ptr_add(struct 
>> bpf_verifier_env *env,
>>   imm = insn->imm;
>>
>>  add_imm:
>> - if (imm <= 0) {
>> + if (imm < 0) {
>>   verbose("addition of negative constant to packet 
>> pointer is not allowed\n");
>>   return -EACCES;
>>   }
>> diff --git a/tools/testing/selftests/bpf/test_verifier.c 
>> b/tools/testing/selftests/bpf/test_verifier.c
>> index 0d0912c..a2b5c7e 100644
>> --- a/tools/testing/selftests/bpf/test_verifier.c
>> +++ b/tools/testing/selftests/bpf/test_verifier.c
>> @@ -2404,6 +2404,21 @@ static struct bpf_test tests[] = {
>>   .prog_type = BPF_PROG_TYPE_SCHED_CLS,
>>   },
>>   {
>> + "direct packet access: test14 (pkt_ptr += 0, good access)",
>> + .insns = {
>> + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
>> + offsetof(struct __sk_buff, data)),
>> + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
>> + offsetof(struct __sk_buff, data_end)),
>> + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
>> + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0),
>
> wait. the test is bogus.
> please write the proper test for the feature
> and check that it fails before the patch and passes afterwards.
>
>> + BPF_MOV64_IMM(BPF_REG_0, 1),
>> + BPF_EXIT_INSN(),
>> + },
>> + .result = ACCEPT,
>> + .prog_type = BPF_PROG_TYPE_SCHED_CLS,
>> + },
>> + {
>>   "helper access to packet: test1, valid packet_ptr range",
>>   .insns = {
>>   BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
>> --
>> 2.7.4
>>


Re: [RFC PATCH net-next 1/2] bpf: Save original ebpf instructions

2017-02-03 Thread Daniel Borkmann

On 02/03/2017 09:38 PM, David Ahern wrote:

Similar to classic bpf, support saving original ebpf instructions

Signed-off-by: David Ahern 


Not convinced that this is in the right direction, this not only *significantly*
increases mem footprint for each and every program, but also when you dump this,
then map references from relocs inside the insns are meaningless (f.e. what 
about
prog arrays used in tail calls?), so things like criu also won't be able to use
this kind of interface for dump and restore. If it's just for debugging, then
why not extend the existing tracing infrastructure around bpf that was started
with intention to gain more visibility.

Thanks,
Daniel


[PATCH v2 1/2] net: ethernet: bgmac: init sequence bug

2017-02-03 Thread Jon Mason
From: Zac Schroff 

Fix a bug in the 'bgmac' driver init sequence that blind writes for init
sequence where it should preserve most bits other than the ones it is
deliberately manipulating.

Signed-off-by: Zac Schroff 
Signed-off-by: Jon Mason 
Fixes: f6a95a24957 ("net: ethernet: bgmac: Add platform device support")
---
 drivers/net/ethernet/broadcom/bgmac-platform.c | 14 +-
 drivers/net/ethernet/broadcom/bgmac.h  | 16 
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c 
b/drivers/net/ethernet/broadcom/bgmac-platform.c
index 6f736c1..a626dce 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -51,8 +51,7 @@ static void platform_bgmac_idm_write(struct bgmac *bgmac, u16 
offset, u32 value)
 
 static bool platform_bgmac_clk_enabled(struct bgmac *bgmac)
 {
-   if ((bgmac_idm_read(bgmac, BCMA_IOCTL) &
-(BCMA_IOCTL_CLK | BCMA_IOCTL_FGC)) != BCMA_IOCTL_CLK)
+   if ((bgmac_idm_read(bgmac, BCMA_IOCTL) & BGMAC_CLK_EN) != BGMAC_CLK_EN)
return false;
if (bgmac_idm_read(bgmac, BCMA_RESET_CTL) & BCMA_RESET_CTL_RESET)
return false;
@@ -61,15 +60,20 @@ static bool platform_bgmac_clk_enabled(struct bgmac *bgmac)
 
 static void platform_bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
 {
-   bgmac_idm_write(bgmac, BCMA_IOCTL,
-   (BCMA_IOCTL_CLK | BCMA_IOCTL_FGC | flags));
+   u32 val;
+
+   val = bgmac_idm_read(bgmac, BCMA_IOCTL);
+   /* Some bits of BCMA_IOCTL set by HW/ATF and should not change */
+   val |= flags & ~(BGMAC_AWCACHE | BGMAC_ARCACHE | BGMAC_AWUSER |
+BGMAC_ARUSER);
+   val |= BGMAC_CLK_EN;
bgmac_idm_read(bgmac, BCMA_IOCTL);
 
bgmac_idm_write(bgmac, BCMA_RESET_CTL, 0);
bgmac_idm_read(bgmac, BCMA_RESET_CTL);
udelay(1);
 
-   bgmac_idm_write(bgmac, BCMA_IOCTL, (BCMA_IOCTL_CLK | flags));
+   bgmac_idm_write(bgmac, BCMA_IOCTL, val);
bgmac_idm_read(bgmac, BCMA_IOCTL);
udelay(1);
 }
diff --git a/drivers/net/ethernet/broadcom/bgmac.h 
b/drivers/net/ethernet/broadcom/bgmac.h
index 71f493f..c8d33eb 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -213,6 +213,22 @@
 /* BCMA GMAC core specific IO Control (BCMA_IOCTL) flags */
 #define BGMAC_BCMA_IOCTL_SW_CLKEN  0x0004  /* PHY Clock 
Enable */
 #define BGMAC_BCMA_IOCTL_SW_RESET  0x0008  /* PHY Reset */
+/* The IOCTL values appear to be different in NS, NSP, and NS2, and do not 
match
+ * the values directly above
+ */
+#define BGMAC_CLK_EN   BIT(0)
+#define BGMAC_RESERVED_0   BIT(1)
+#define BGMAC_SOURCE_SYNC_MODE_EN  BIT(2)
+#define BGMAC_DEST_SYNC_MODE_ENBIT(3)
+#define BGMAC_TX_CLK_OUT_INVERT_EN BIT(4)
+#define BGMAC_DIRECT_GMII_MODE BIT(5)
+#define BGMAC_CLK_250_SEL  BIT(6)
+#define BGMAC_AWCACHE  (0xf << 7)
+#define BGMAC_RESERVED_1   (0x1f << 11)
+#define BGMAC_ARCACHE  (0xf << 16)
+#define BGMAC_AWUSER   (0x3f << 20)
+#define BGMAC_ARUSER   (0x3f << 26)
+#define BGMAC_RESERVED BIT(31)
 
 /* BCMA GMAC core specific IO status (BCMA_IOST) flags */
 #define BGMAC_BCMA_IOST_ATTACHED   0x0800
-- 
2.7.4



  1   2   3   >