[PATCH v5 net-next 4/4] ila: Add generic ILA translation facility

2015-12-14 Thread Tom Herbert
This patch implements an ILA tanslation table. This table can be
configured with identifier to locator mappings, and can be be queried
to resolve a mapping. Queries can be parameterized based on interface,
direction (incoming or outoing), and matching locator.  The table is
implemented using rhashtable and is configured via netlink (through
"ip ila .." in iproute).

The table may be used as alternative means to do do ILA tanslations
other than the lw tunnels

Signed-off-by: Tom Herbert 
---
 include/net/ila.h |  18 ++
 include/uapi/linux/ila.h  |  22 ++
 net/ipv6/ila/Makefile |   2 +-
 net/ipv6/ila/ila.h|   2 +
 net/ipv6/ila/ila_common.c |   8 +
 net/ipv6/ila/ila_xlat.c   | 680 ++
 6 files changed, 731 insertions(+), 1 deletion(-)
 create mode 100644 include/net/ila.h
 create mode 100644 net/ipv6/ila/ila_xlat.c

diff --git a/include/net/ila.h b/include/net/ila.h
new file mode 100644
index 000..9f4f43e
--- /dev/null
+++ b/include/net/ila.h
@@ -0,0 +1,18 @@
+/*
+ * ILA kernel interface
+ *
+ * Copyright (c) 2015 Tom Herbert 
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_ILA_H
+#define _NET_ILA_H
+
+int ila_xlat_outgoing(struct sk_buff *skb);
+int ila_xlat_incoming(struct sk_buff *skb);
+
+#endif /* _NET_ILA_H */
diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h
index 7ed9e67..abde7bb 100644
--- a/include/uapi/linux/ila.h
+++ b/include/uapi/linux/ila.h
@@ -3,13 +3,35 @@
 #ifndef _UAPI_LINUX_ILA_H
 #define _UAPI_LINUX_ILA_H
 
+/* NETLINK_GENERIC related info */
+#define ILA_GENL_NAME  "ila"
+#define ILA_GENL_VERSION   0x1
+
 enum {
ILA_ATTR_UNSPEC,
ILA_ATTR_LOCATOR,   /* u64 */
+   ILA_ATTR_IDENTIFIER,/* u64 */
+   ILA_ATTR_LOCATOR_MATCH, /* u64 */
+   ILA_ATTR_IFINDEX,   /* s32 */
+   ILA_ATTR_DIR,   /* u32 */
 
__ILA_ATTR_MAX,
 };
 
 #define ILA_ATTR_MAX   (__ILA_ATTR_MAX - 1)
 
+enum {
+   ILA_CMD_UNSPEC,
+   ILA_CMD_ADD,
+   ILA_CMD_DEL,
+   ILA_CMD_GET,
+
+   __ILA_CMD_MAX,
+};
+
+#define ILA_CMD_MAX(__ILA_CMD_MAX - 1)
+
+#define ILA_DIR_IN (1 << 0)
+#define ILA_DIR_OUT(1 << 1)
+
 #endif /* _UAPI_LINUX_ILA_H */
diff --git a/net/ipv6/ila/Makefile b/net/ipv6/ila/Makefile
index 31d136b..4b32e59 100644
--- a/net/ipv6/ila/Makefile
+++ b/net/ipv6/ila/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_IPV6_ILA) += ila.o
 
-ila-objs := ila_common.o ila_lwt.o
+ila-objs := ila_common.o ila_lwt.o ila_xlat.o
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index b94081f..28542cb 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -42,5 +42,7 @@ void update_ipv6_locator(struct sk_buff *skb, struct 
ila_params *p);
 
 int ila_lwt_init(void);
 void ila_lwt_fini(void);
+int ila_xlat_init(void);
+void ila_xlat_fini(void);
 
 #endif /* __ILA_H */
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 64e1904..32dc9aa 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -80,12 +80,20 @@ static int __init ila_init(void)
if (ret)
goto fail_lwt;
 
+   ret = ila_xlat_init();
+   if (ret)
+   goto fail_xlat;
+
+   return 0;
+fail_xlat:
+   ila_lwt_fini();
 fail_lwt:
return ret;
 }
 
 static void __exit ila_fini(void)
 {
+   ila_xlat_fini();
ila_lwt_fini();
 }
 
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
new file mode 100644
index 000..1e1eaf3
--- /dev/null
+++ b/net/ipv6/ila/ila_xlat.c
@@ -0,0 +1,680 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "ila.h"
+
+struct ila_xlat_params {
+   struct ila_params ip;
+   __be64 identifier;
+   int ifindex;
+   unsigned int dir;
+};
+
+struct ila_map {
+   struct ila_xlat_params p;
+   struct rhash_head node;
+   struct ila_map __rcu *next;
+   struct rcu_head rcu;
+};
+
+static unsigned int ila_net_id;
+
+struct ila_net {
+   struct rhashtable rhash_table;
+   spinlock_t *locks; /* Bucket locks for entry manipulation */
+   unsigned int locks_mask;
+   bool hooks_registered;
+};
+
+#defineLOCKS_PER_CPU 10
+
+static int alloc_ila_locks(struct ila_net *ilan, gfp_t gfp)
+{
+   unsigned int i, size;
+   unsigned int nr_pcpus = num_possible_cpus();
+
+   nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
+   size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU);
+
+   if (sizeof(spinlock_t) != 0) {
+#ifdef CONFIG_NUMA
+   if (size * sizeof(spinlock_t) > PAGE_SIZE &&
+   

[PATCH v5 net-next 1/4] ila: Create net/ipv6/ila directory

2015-12-14 Thread Tom Herbert
Create ila directory in preparation for supporting other hooks in the
kernel than LWT for doing ILA. This includes:
  - Moving ila.c to ila/ila_lwt.c
  - Splitting out some common functions into ila_common.c

Signed-off-by: Tom Herbert 
---
 net/ipv6/Makefile |   2 +-
 net/ipv6/ila.c| 229 --
 net/ipv6/ila/Makefile |   7 ++
 net/ipv6/ila/ila.h|  46 ++
 net/ipv6/ila/ila_common.c |  95 +++
 net/ipv6/ila/ila_lwt.c| 152 ++
 6 files changed, 301 insertions(+), 230 deletions(-)
 delete mode 100644 net/ipv6/ila.c
 create mode 100644 net/ipv6/ila/Makefile
 create mode 100644 net/ipv6/ila/ila.h
 create mode 100644 net/ipv6/ila/ila_common.c
 create mode 100644 net/ipv6/ila/ila_lwt.c

diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 2c900c7..2fbd90b 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -34,7 +34,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
 obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
 obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
 obj-$(CONFIG_IPV6_MIP6) += mip6.o
-obj-$(CONFIG_IPV6_ILA) += ila.o
+obj-$(CONFIG_IPV6_ILA) += ila/
 obj-$(CONFIG_NETFILTER)+= netfilter/
 
 obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
diff --git a/net/ipv6/ila.c b/net/ipv6/ila.c
deleted file mode 100644
index 1a6852e..000
--- a/net/ipv6/ila.c
+++ /dev/null
@@ -1,229 +0,0 @@
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-struct ila_params {
-   __be64 locator;
-   __be64 locator_match;
-   __wsum csum_diff;
-};
-
-static inline struct ila_params *ila_params_lwtunnel(
-   struct lwtunnel_state *lwstate)
-{
-   return (struct ila_params *)lwstate->data;
-}
-
-static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
-{
-   __be32 diff[] = {
-   ~from[0], ~from[1], to[0], to[1],
-   };
-
-   return csum_partial(diff, sizeof(diff), 0);
-}
-
-static inline __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
-{
-   if (*(__be64 *)>daddr == p->locator_match)
-   return p->csum_diff;
-   else
-   return compute_csum_diff8((__be32 *)>daddr,
- (__be32 *)>locator);
-}
-
-static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
-{
-   __wsum diff;
-   struct ipv6hdr *ip6h = ipv6_hdr(skb);
-   size_t nhoff = sizeof(struct ipv6hdr);
-
-   /* First update checksum */
-   switch (ip6h->nexthdr) {
-   case NEXTHDR_TCP:
-   if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr {
-   struct tcphdr *th = (struct tcphdr *)
-   (skb_network_header(skb) + nhoff);
-
-   diff = get_csum_diff(ip6h, p);
-   inet_proto_csum_replace_by_diff(>check, skb,
-   diff, true);
-   }
-   break;
-   case NEXTHDR_UDP:
-   if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr {
-   struct udphdr *uh = (struct udphdr *)
-   (skb_network_header(skb) + nhoff);
-
-   if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
-   diff = get_csum_diff(ip6h, p);
-   inet_proto_csum_replace_by_diff(>check, skb,
-   diff, true);
-   if (!uh->check)
-   uh->check = CSUM_MANGLED_0;
-   }
-   }
-   break;
-   case NEXTHDR_ICMP:
-   if (likely(pskb_may_pull(skb,
-nhoff + sizeof(struct icmp6hdr {
-   struct icmp6hdr *ih = (struct icmp6hdr *)
-   (skb_network_header(skb) + nhoff);
-
-   diff = get_csum_diff(ip6h, p);
-   inet_proto_csum_replace_by_diff(>icmp6_cksum, skb,
-   diff, true);
-   }
-   break;
-   }
-
-   /* Now change destination address */
-   *(__be64 *)>daddr = p->locator;
-}
-
-static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
-   struct dst_entry *dst = skb_dst(skb);
-
-   if (skb->protocol != htons(ETH_P_IPV6))
-   goto drop;
-
-   update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
-
-   return dst->lwtstate->orig_output(net, sk, skb);
-
-drop:
-   kfree_skb(skb);
-   return -EINVAL;
-}
-
-static int ila_input(struct sk_buff *skb)
-{
- 

[PATCH v5 net-next 2/4] rhashtable: add function to replace an element

2015-12-14 Thread Tom Herbert
Add the rhashtable_replace_fast function. This replaces one object in
the table with another atomically. The hashes of the new and old objects
must be equal.

Signed-off-by: Tom Herbert 
---
 include/linux/rhashtable.h | 82 ++
 1 file changed, 82 insertions(+)

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 843ceca..77deece 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -819,4 +819,86 @@ out:
return err;
 }
 
+/* Internal function, please use rhashtable_replace_fast() instead */
+static inline int __rhashtable_replace_fast(
+   struct rhashtable *ht, struct bucket_table *tbl,
+   struct rhash_head *obj_old, struct rhash_head *obj_new,
+   const struct rhashtable_params params)
+{
+   struct rhash_head __rcu **pprev;
+   struct rhash_head *he;
+   spinlock_t *lock;
+   unsigned int hash;
+   int err = -ENOENT;
+
+   /* Minimally, the old and new objects must have same hash
+* (which should mean identifiers are the same).
+*/
+   hash = rht_head_hashfn(ht, tbl, obj_old, params);
+   if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
+   return -EINVAL;
+
+   lock = rht_bucket_lock(tbl, hash);
+
+   spin_lock_bh(lock);
+
+   pprev = >buckets[hash];
+   rht_for_each(he, tbl, hash) {
+   if (he != obj_old) {
+   pprev = >next;
+   continue;
+   }
+
+   rcu_assign_pointer(obj_new->next, obj_old->next);
+   rcu_assign_pointer(*pprev, obj_new);
+   err = 0;
+   break;
+   }
+
+   spin_unlock_bh(lock);
+
+   return err;
+}
+
+/**
+ * rhashtable_replace_fast - replace an object in hash table
+ * @ht:hash table
+ * @obj_old:   pointer to hash head inside object being replaced
+ * @obj_new:   pointer to hash head inside object which is new
+ * @params:hash table parameters
+ *
+ * Replacing an object doesn't affect the number of elements in the hash table
+ * or bucket, so we don't need to worry about shrinking or expanding the
+ * table here.
+ *
+ * Returns zero on success, -ENOENT if the entry could not be found,
+ * -EINVAL if hash is not the same for the old and new objects.
+ */
+static inline int rhashtable_replace_fast(
+   struct rhashtable *ht, struct rhash_head *obj_old,
+   struct rhash_head *obj_new,
+   const struct rhashtable_params params)
+{
+   struct bucket_table *tbl;
+   int err;
+
+   rcu_read_lock();
+
+   tbl = rht_dereference_rcu(ht->tbl, ht);
+
+   /* Because we have already taken (and released) the bucket
+* lock in old_tbl, if we find that future_tbl is not yet
+* visible then that guarantees the entry to still be in
+* the old tbl if it exists.
+*/
+   while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
+   obj_new, params)) &&
+  (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
+   ;
+
+   rcu_read_unlock();
+
+   return err;
+}
+
 #endif /* _LINUX_RHASHTABLE_H */
-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 net-next 3/4] netlink: add a start callback for starting a netlink dump

2015-12-14 Thread Tom Herbert
The start callback allows the caller to set up a context for the
dump callbacks. Presumably, the context can then be destroyed in
the done callback.

Signed-off-by: Tom Herbert 
---
 include/linux/netlink.h  |  2 ++
 include/net/genetlink.h  |  2 ++
 net/netlink/af_netlink.c |  4 
 net/netlink/genetlink.c  | 16 
 4 files changed, 24 insertions(+)

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 639e9b8..0b41959 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -131,6 +131,7 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 struct netlink_callback {
struct sk_buff  *skb;
const struct nlmsghdr   *nlh;
+   int (*start)(struct netlink_callback *);
int (*dump)(struct sk_buff * skb,
struct netlink_callback *cb);
int (*done)(struct netlink_callback *cb);
@@ -153,6 +154,7 @@ struct nlmsghdr *
 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int 
flags);
 
 struct netlink_dump_control {
+   int (*start)(struct netlink_callback *);
int (*dump)(struct sk_buff *skb, struct netlink_callback *);
int (*done)(struct netlink_callback *);
void *data;
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 1b6b6dc..43c0e77 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -114,6 +114,7 @@ static inline void genl_info_net_set(struct genl_info 
*info, struct net *net)
  * @flags: flags
  * @policy: attribute validation policy
  * @doit: standard command callback
+ * @start: start callback for dumps
  * @dumpit: callback for dumpers
  * @done: completion callback for dumps
  * @ops_list: operations list
@@ -122,6 +123,7 @@ struct genl_ops {
const struct nla_policy *policy;
int(*doit)(struct sk_buff *skb,
   struct genl_info *info);
+   int(*start)(struct netlink_callback *cb);
int(*dumpit)(struct sk_buff *skb,
 struct netlink_callback *cb);
int(*done)(struct netlink_callback *cb);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 59651af..81dc1bb 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2915,6 +2915,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff 
*skb,
 
cb = >cb;
memset(cb, 0, sizeof(*cb));
+   cb->start = control->start;
cb->dump = control->dump;
cb->done = control->done;
cb->nlh = nlh;
@@ -2927,6 +2928,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff 
*skb,
 
mutex_unlock(nlk->cb_mutex);
 
+   if (cb->start)
+   cb->start(cb);
+
ret = netlink_dump(sk);
sock_put(sk);
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index bc0e504..8e63662 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -513,6 +513,20 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
 }
 EXPORT_SYMBOL(genlmsg_put);
 
+static int genl_lock_start(struct netlink_callback *cb)
+{
+   /* our ops are always const - netlink API doesn't propagate that */
+   const struct genl_ops *ops = cb->data;
+   int rc = 0;
+
+   if (ops->start) {
+   genl_lock();
+   rc = ops->start(cb);
+   genl_unlock();
+   }
+   return rc;
+}
+
 static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 {
/* our ops are always const - netlink API doesn't propagate that */
@@ -577,6 +591,7 @@ static int genl_family_rcv_msg(struct genl_family *family,
.module = family->module,
/* we have const, but the netlink API doesn't */
.data = (void *)ops,
+   .start = genl_lock_start,
.dump = genl_lock_dumpit,
.done = genl_lock_done,
};
@@ -588,6 +603,7 @@ static int genl_family_rcv_msg(struct genl_family *family,
} else {
struct netlink_dump_control c = {
.module = family->module,
+   .start = ops->start,
.dump = ops->dumpit,
.done = ops->done,
};
-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 net-next 0/4] ila: Optimization to preserve value of early demux

2015-12-14 Thread Tom Herbert
In the current implementation of ILA, LWT is used to perform
translation on both the input and output paths. This is functional,
however there is a big performance hit in the receive path. Early
demux occurs before the routing lookup (a hit actually obviates the
route lookup). Therefore the stack currently performs early
demux before translation so that a local connection with ILA
addresses is never matched. Note that this issue is not just
with ILA, but pretty much any translated or encapsulated packet
handled by LWT would miss the opportunity for early demux. Solving
the general problem seems non trivial since we would need to move
the route lookup before early demx thereby mitigating the value.

This patch set addresses the issue for ILA by adding a fast locator
lookup that occurs before early demux. This done by hooking in to
NF_INET_PRE_ROUTING

For the backend we implement an rhashtable that contains identifier
to locator to mappings. The table also allows more specific matches
that include original locator and interface.

This patch set:
 - Add an rhashtable function to atomically replace and element.
   This is useful to implement sub-trees from a table entry
   without needing to use a special anchor structure as the
   table entry.
 - Add a start callback for starting a netlink dump.
 - Creates an ila directory under net/ipv6 and moves ila.c to it.
   ila.c is split into ila_common.c and ila_lwt.c.
 - Implement a table to do identifier->locator mapping. This is
   an rhashtable (in ila_xlat.c).
 - Configuration for the table with netlink.
 - Add a hook into NF_INET_PRE_ROUTING to perform ILA translation
   before early demux.

Changes in v2:
 - Use iptables targets instead of a new xfrm function

Changes in v3:
 - Add __rcu to next pointer in struct ila_map

Changes in v4:
 - Use hook for NF_INET_PRE_ROUTING

Changed in v5:
 - Register hooks per namespace using nf_register_net_hooks
 - Only register hooks when first mapping is actually added

Testing:
   Running 200 netperf TCP_RR streams

No ILA, baseline
   79.26% CPU utilization
   1678282 tps
   104/189/390 50/90/99% latencies

ILA before fix (LWT on both input and output)
   81.91% CPU utilization
   1464723 tps (-14.5% from baseline)
   121/215/411 50/90/99% latencies

ILA after fix
   80.62% CPU utilization
   1622985 (-3.4% from baseline)
   110/191/347 50/90/99% latencies

Tom Herbert (4):
  ila: Create net/ipv6/ila directory
  rhashtable: add function to replace an element
  netlink: add a start callback for starting a netlink dump
  ila: Add generic ILA translation facility

 include/linux/netlink.h|   2 +
 include/linux/rhashtable.h |  82 ++
 include/net/genetlink.h|   2 +
 include/net/ila.h  |  18 ++
 include/uapi/linux/ila.h   |  22 ++
 net/ipv6/Makefile  |   2 +-
 net/ipv6/ila.c | 229 ---
 net/ipv6/ila/Makefile  |   7 +
 net/ipv6/ila/ila.h |  48 
 net/ipv6/ila/ila_common.c  | 103 +++
 net/ipv6/ila/ila_lwt.c | 152 ++
 net/ipv6/ila/ila_xlat.c| 680 +
 net/netlink/af_netlink.c   |   4 +
 net/netlink/genetlink.c|  16 ++
 14 files changed, 1137 insertions(+), 230 deletions(-)
 create mode 100644 include/net/ila.h
 delete mode 100644 net/ipv6/ila.c
 create mode 100644 net/ipv6/ila/Makefile
 create mode 100644 net/ipv6/ila/ila.h
 create mode 100644 net/ipv6/ila/ila_common.c
 create mode 100644 net/ipv6/ila/ila_lwt.c
 create mode 100644 net/ipv6/ila/ila_xlat.c

-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] net: emac: emac gigabit ethernet controller driver

2015-12-14 Thread Florian Fainelli
On 14/12/15 16:19, Gilad Avidov wrote:

[snip]

> + "sgmii_irq";
> + qcom,emac-gpio-mdc = < 123 0>;
> + qcom,emac-gpio-mdio = < 124 0>;
> + qcom,emac-tstamp-en;
> + qcom,emac-ptp-frac-ns-adj = <12500 1>;
> + phy-addr = <0>;

Please use the standard Ethernet PHY and MDIO device tree bindings to
describe your MAC to PHY connection here, that includes using a
phy-connection-type property to describe the (x)MII lanes.

[snip]

> +/* EMAC_MAC_CTRL */
> +#define SINGLE_PAUSE_MODE   0x1000
> +#define DEBUG_MODE   0x800
> +#define BROAD_EN 0x400
> +#define MULTI_ALL0x200
> +#define RX_CHKSUM_EN 0x100
> +#define HUGE  0x80
> +#define SPEED_BMSK0x30
> +#define SPEED_SHFT  20
> +#define SIMR   0x8
> +#define TPAUSE 0x1
> +#define PROM_MODE   0x8000
> +#define VLAN_STRIP  0x4000
> +#define PRLEN_BMSK  0x3c00
> +#define PRLEN_SHFT  10
> +#define HUGEN0x200
> +#define FLCHK0x100
> +#define PCRCE 0x80
> +#define CRCE  0x40
> +#define FULLD 0x20
> +#define MAC_LP_EN 0x10
> +#define RXFC   0x8
> +#define TXFC   0x4
> +#define RXEN   0x2
> +#define TXEN   0x1

BIT(x)? which would avoid making this reverse christmas tree, I know
this is the time of year though.

[snip]

> +/* DMA address */
> +#define DMA_ADDR_HI_MASK 0xULL
> +#define DMA_ADDR_LO_MASK 0xULL
> +
> +#define EMAC_DMA_ADDR_HI(_addr)  \
> + ((u32)(((u64)(_addr) & DMA_ADDR_HI_MASK) >> 32))
> +#define EMAC_DMA_ADDR_LO(_addr)  \
> + ((u32)((u64)(_addr) & DMA_ADDR_LO_MASK))

The kernel provides helpers for that: upper_32bits and lower_32bits().

[snip]

> +struct emac_skb_cb {
> + u32   tpd_idx;
> + unsigned long jiffies;
> +};
> +
> +struct emac_tx_ts_cb {
> + u32 sec;
> + u32 ns;
> +};
> +
> +#define EMAC_SKB_CB(skb) ((struct emac_skb_cb *)(skb)->cb)
> +#define EMAC_TX_TS_CB(skb)   ((struct emac_tx_ts_cb *)(skb)->cb)

Should not these two have different offsets within skb->cb in case they
both end-up being added to the same SKB?

[snip]

> +static void emac_mac_irq_enable(struct emac_adapter *adpt)
> +{
> + int i;
> +
> + for (i = 0; i < EMAC_NUM_CORE_IRQ; i++) {
> + struct emac_irq *irq = >irq[i];
> + const struct emac_irq_config*irq_cfg = _irq_cfg_tbl[i];
> +
> + writel_relaxed(~DIS_INT, adpt->base + irq_cfg->status_reg);
> + writel_relaxed(irq->mask, adpt->base + irq_cfg->mask_reg);
> + }
> +
> + wmb(); /* ensure that irq and ptp setting are flushed to HW */

Would not using writel() make the appropriate thing here instead of
using _relaxed which has no barrier?

[snip]

> + mta = readl_relaxed(adpt->base + EMAC_HASH_TAB_REG0 + (reg << 2));
> + mta |= (0x1 << bit);
> + writel_relaxed(mta, adpt->base + EMAC_HASH_TAB_REG0 + (reg << 2));
> + wmb(); /* ensure that the mac address is flushed to HW */

This is getting too much here, just use the correct I/O accessor for
your platform, period.

[snip]

> +
> + /* enable RX/TX Flow Control */
> + switch (phy->cur_fc_mode) {
> + case EMAC_FC_FULL:
> + mac |= (TXFC | RXFC);
> + break;
> + case EMAC_FC_RX_PAUSE:
> + mac |= RXFC;
> + break;
> + case EMAC_FC_TX_PAUSE:
> + mac |= TXFC;
> + break;
> + default:
> + break;
> + }
> +
> + /* setup link speed */
> + mac &= ~SPEED_BMSK;
> + switch (phy->link_speed) {
> + case EMAC_LINK_SPEED_1GB_FULL:
> + mac |= ((emac_mac_speed_1000 << SPEED_SHFT) & SPEED_BMSK);
> + csr1 |= FREQ_MODE;
> + 

Re: [PATCH net] pptp: validate sockaddr_len before binding

2015-12-14 Thread Hannes Frederic Sowa
On 14.12.2015 23:58, Cong Wang wrote:
> On Mon, Dec 14, 2015 at 2:45 PM, Hannes Frederic Sowa
>  wrote:
>> diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
>> index fc69e41d09506e..f9ffdf070ad807 100644
>> --- a/drivers/net/ppp/pptp.c
>> +++ b/drivers/net/ppp/pptp.c
>> @@ -419,6 +419,9 @@ static int pptp_bind(struct socket *sock, struct 
>> sockaddr *uservaddr,
>> struct pptp_opt *opt = >proto.pptp;
>> int error = 0;
>>
>> +   if (sockaddr_len < sizeof(*sp))
>> +   return -EINVAL;
>> +
> 
> I sent a very similar patch:
> https://patchwork.ozlabs.org/patch/556663/

Ah, thanks. Did not notice. The connect() part is also already in my
queue, but I don't think it solves the use-after-free. The RCU
implementation of callid_sock seems broken to me.

David, discard my patch.

Thanks,
Hannes


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


WARN trace - skb_warn_bad_offload - vxlan - large udp packet - udp checksum disabled

2015-12-14 Thread Nelson, Shannon
Using a slightly modified version of udpspam (see diff below - hopefully not 
mangled by corporate email servers), where I set the SO_NO_CHECK socket option 
and can specify a large buffer size, I can reliably get the following WARN 
trace.  I have reproduced this on both ixgbe and i40e drivers using 
"udpspam-no-check  6000".

It looks to me like this is in the Tx path before we get to the actual NIC 
drivers, but I may be wrong.


[ 1757.644324] [ cut here ]
[ 1757.644333] WARNING: CPU: 22 PID: 5537 at net/core/dev.c:2423 
skb_warn_bad_offload+0x104/0x111()
[ 1757.644340] ixgbe: caps=(0x080660314bb3, 0x) len=6092 
data_len=6000 gso_size=1528 gso_type=1026 ip_summed=0
[ 1757.644343] Modules linked in: nfnetlink_queue nfnetlink_log nfnetlink vxlan 
ip6_udp_tunnel udp_tunnel rfcomm xt_CHECKSUM bnep bluetooth rfkill tun fuse 
ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 
nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ebtable_nat 
ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_mangle 
ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_mangle 
iptable_security iptable_raw x86_pkg_temp_thermal coretemp kvm_intel kvm joydev 
iTCO_wdt ipmi_devintf iTCO_vendor_support irqbypass crct10dif_pclmul ixgbe igb 
crc32_pclmul ipmi_si ptp pps_core sb_edac lpc_ich crc32c_intel pcspkr edac_core 
i2c_i801 mfd_core mdio ipmi_msghandler mei_me shpchp tpm_tis ioatdma mei dca 
wmi tpm binfmt_misc uinput mgag200 i2c_algo_bit drm_kms_helper
[ 1757.63]  ttm drm isci libsas firewire_ohci firewire_core i2c_core 
scsi_transport_sas crc_itu_t
[ 1757.644475] CPU: 22 PID: 5537 Comm: udpspam-no-chec Tainted: GW  
 4.4.0-rc3+ #1
[ 1757.644480] Hardware name: Intel Corporation S2600CO/S2600CO, BIOS 
SE5C600.86B.02.03.0003.041920141333 04/19/2014
[ 1757.644482]   c92b03e4 88081907b410 
8138f918
[ 1757.644488]  88081907b458 88081907b448 8109c036 
8808196e9500
[ 1757.644494]  880816f6 0402  
88080e8eb2ac
[ 1757.644499] Call Trace:
[ 1757.644509]  [] dump_stack+0x44/0x5c
[ 1757.644514]  [] warn_slowpath_common+0x86/0xc0
[ 1757.644518]  [] warn_slowpath_fmt+0x5c/0x80
[ 1757.644523]  [] ? ___ratelimit+0x8c/0xf0
[ 1757.644539]  [] skb_warn_bad_offload+0x104/0x111
[ 1757.644549]  [] __skb_gso_segment+0x7f/0xd0
[ 1757.644563]  [] 
validate_xmit_skb.isra.104.part.105+0x11f/0x2a0
[ 1757.644572]  [] validate_xmit_skb_list+0x3b/0x60
[ 1757.644579]  [] sch_direct_xmit+0xc1/0x1f0
[ 1757.644585]  [] __dev_queue_xmit+0x21b/0x510
[ 1757.644589]  [] dev_queue_xmit+0x10/0x20
[ 1757.644593]  [] ip_finish_output2+0x23f/0x310
[ 1757.644598]  [] ip_finish_output+0x139/0x1f0
[ 1757.644605]  [] ? nf_hook_slow+0x76/0xd0
[ 1757.644610]  [] ip_output+0x6e/0xe0
[ 1757.644615]  [] ? __ip_local_out+0x42/0x100
[ 1757.644620]  [] ? ip_fragment.constprop.49+0x80/0x80
[ 1757.644627]  [] ip_local_out+0x35/0x40
[ 1757.644634]  [] iptunnel_xmit+0x12d/0x150
[ 1757.644640]  [] udp_tunnel_xmit_skb+0xea/0x100 [udp_tunnel]
[ 1757.644648]  [] vxlan_xmit_one+0xac6/0x1280 [vxlan]
[ 1757.644659]  [] ? vprintk_emit+0x2f2/0x4f0
[ 1757.644675]  [] ? printk+0x5d/0x74
[ 1757.644681]  [] ? warn_slowpath_common+0x95/0xc0
[ 1757.644688]  [] vxlan_xmit+0x172/0xd44 [vxlan]
[ 1757.644694]  [] ? inet_gso_segment+0x163/0x360
[ 1757.644711]  [] dev_hard_start_xmit+0x22e/0x3b0
[ 1757.644721]  [] __dev_queue_xmit+0x414/0x510
[ 1757.644734]  [] dev_queue_xmit+0x10/0x20
[ 1757.644747]  [] ip_finish_output2+0x23f/0x310
[ 1757.644758]  [] ip_finish_output+0x139/0x1f0
[ 1757.644763]  [] ? nf_hook_slow+0x76/0xd0
[ 1757.644768]  [] ip_output+0x6e/0xe0
[ 1757.644775]  [] ? __ip_local_out+0x42/0x100
[ 1757.644780]  [] ? ip_fragment.constprop.49+0x80/0x80
[ 1757.644785]  [] ip_local_out+0x35/0x40
[ 1757.644793]  [] ip_send_skb+0x19/0x40
[ 1757.644800]  [] udp_send_skb+0x16d/0x270
[ 1757.644807]  [] udp_sendmsg+0x2c8/0x9a0
[ 1757.644812]  [] ? ip_reply_glue_bits+0x60/0x60
[ 1757.644825]  [] inet_sendmsg+0x67/0xa0
[ 1757.644838]  [] sock_sendmsg+0x38/0x50
[ 1757.644852]  [] SYSC_sendto+0x102/0x190
[ 1757.644860]  [] ? __audit_syscall_entry+0xaf/0x100
[ 1757.644867]  [] ? do_audit_syscall_entry+0x66/0x70
[ 1757.644873]  [] ? syscall_trace_enter_phase1+0x11f/0x140
[ 1757.644879]  [] ? syscall_slow_exit_work+0x3f/0x9f
[ 1757.644883]  [] SyS_sendto+0xe/0x10
[ 1757.644890]  [] entry_SYSCALL_64_fastpath+0x12/0x71
[ 1757.644895] ---[ end trace ec9dfd887c59f41f ]---


Here are the udpspam.c diffs from the original that I found at
http://oss.sgi.com/archives/netdev/2001-10/txtmtEDzF63p0.txt


--- udpspam.c   2015-12-14 16:56:18.287053786 -0800
+++ udpspam-no-check.c  2015-12-14 17:02:21.979047972 -0800
@@ -42,8 +42,8 @@ typedef unsigned long dword;
 
 /* Globals */
 
-#define PAYLOAD_SIZE 4
-static char payload[PAYLOAD_SIZE];
+static int payload_size = 4;
+static char *payload;
 
 /* Socket functions 

RE: [PATCH net-next] net, cgroup: cgroup_sk_updat_lock was missing initializer

2015-12-14 Thread Dexuan Cui
> -Original Message-
> From: David Miller [mailto:da...@davemloft.net]
> Sent: Tuesday, December 15, 2015 3:21
> To: t...@kernel.org
> Cc: Dexuan Cui ; pa...@netfilter.org; ka...@trash.net;
> kad...@blackhole.kfki.hu; dan...@iogearbox.net; daniel.wag...@bmw-carit.de;
> nhor...@tuxdriver.com; lize...@huawei.com; han...@cmpxchg.org;
> netdev@vger.kernel.org; netfilter-de...@vger.kernel.org;
> coret...@netfilter.org; cgro...@vger.kernel.org; linux-
> ker...@vger.kernel.org; kernel-t...@fb.com; nin...@fb.com
> Subject: Re: [PATCH net-next] net, cgroup: cgroup_sk_updat_lock was missing
> initializer
> 
> From: Tejun Heo 
> Date: Mon, 14 Dec 2015 11:24:06 -0500
> 
> > bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") added global
> > spinlock cgroup_sk_update_lock but erroneously skipped initializer
> > leading to uninitialized spinlock warning.  Fix it by using
> > DEFINE_SPINLOCK().
> >
> > Signed-off-by: Tejun Heo 
> > Reported-by: Dexuan Cui 
> > Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup")
> 
> Applied, thanks.

Thanks! I can confirm it fixed the issue.

Thanks,
-- Dexuan
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] gianfar: Don't enable RX Filer if not supported

2015-12-14 Thread Hamish Martin
After commit 15bf176db1fb ("gianfar: Don't enable the Filer w/o the
Parser"), 'TSEC' model controllers (for example as seen on MPC8541E)
always have 8 bytes stripped from the front of received frames.
Only 'eTSEC' gianfar controllers have the RX Filer capability (amongst
other enhancements). Previously this was treated as always enabled
for both 'TSEC' and 'eTSEC' controllers.
In commit 15bf176db1fb ("gianfar: Don't enable the Filer w/o the Parser")
a subtle change was made to the setting of 'uses_rxfcb' to effectively
always set it (since 'rx_filer_enable' was always true). This had the
side-effect of always stripping 8 bytes from the front of received frames
on 'TSEC' type controllers.

We now only enable the RX Filer capability on controller types that
support it, thereby avoiding the issue for 'TSEC' type controllers.

Reviewed-by: Chris Packham 
Reviewed-by: Mark Tomlinson 
Signed-off-by: Hamish Martin 
---
 drivers/net/ethernet/freescale/gianfar.c | 8 +---
 drivers/net/ethernet/freescale/gianfar.h | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c 
b/drivers/net/ethernet/freescale/gianfar.c
index 7cf8984..3e233d9 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -894,7 +894,8 @@ static int gfar_of_init(struct platform_device *ofdev, 
struct net_device **pdev)
 FSL_GIANFAR_DEV_HAS_VLAN |
 FSL_GIANFAR_DEV_HAS_MAGIC_PACKET |
 FSL_GIANFAR_DEV_HAS_EXTENDED_HASH |
-FSL_GIANFAR_DEV_HAS_TIMER;
+FSL_GIANFAR_DEV_HAS_TIMER |
+FSL_GIANFAR_DEV_HAS_RX_FILER;
 
err = of_property_read_string(np, "phy-connection-type", );
 
@@ -1396,8 +1397,9 @@ static int gfar_probe(struct platform_device *ofdev)
priv->rx_queue[i]->rxic = DEFAULT_RXIC;
}
 
-   /* always enable rx filer */
-   priv->rx_filer_enable = 1;
+   /* Always enable rx filer if available */
+   priv->rx_filer_enable =
+   (priv->device_flags & FSL_GIANFAR_DEV_HAS_RX_FILER) ? 1 : 0;
/* Enable most messages by default */
priv->msg_enable = (NETIF_MSG_IFUP << 1 ) - 1;
/* use pritority h/w tx queue scheduling for single queue devices */
diff --git a/drivers/net/ethernet/freescale/gianfar.h 
b/drivers/net/ethernet/freescale/gianfar.h
index f266b20..cb77667 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -923,6 +923,7 @@ struct gfar {
 #define FSL_GIANFAR_DEV_HAS_BUF_STASHING   0x0400
 #define FSL_GIANFAR_DEV_HAS_TIMER  0x0800
 #define FSL_GIANFAR_DEV_HAS_WAKE_ON_FILER  0x1000
+#define FSL_GIANFAR_DEV_HAS_RX_FILER   0x2000
 
 #if (MAXGROUPS == 2)
 #define DEFAULT_MAPPING0xAA
-- 
2.6.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


linux-next: manual merge of the net-next tree with the net tree

2015-12-14 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the net-next tree got a conflict in:

  drivers/net/geneve.c

between commit:

  a322a1bcf329 ("geneve: Fix IPv6 xmit stats update.")

from the net tree and commit:

  abe492b4f50c ("geneve: UDP checksum configuration via netlink")

from the net-next tree.

I fixed it up (see below) and can carry the fix as necessary (no action
is required).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

diff --cc drivers/net/geneve.c
index c2b79f5d1c89,0750d7a93878..
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@@ -966,7 -984,10 +984,8 @@@ static netdev_tx_t geneve6_xmit_skb(str
}
err = udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
   , , prio, ttl,
-  sport, geneve->dst_port, !udp_csum);
+  sport, geneve->dst_port,
+  !!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX));
 -
 -  iptunnel_xmit_stats(err, >stats, dev->tstats);
return NETDEV_TX_OK;
  
  tx_error:
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 1/1] net: sctp: dynamically enable or disable pf state

2015-12-14 Thread David Laight
From: zyjzyj2...@gmail.com
> Sent: 11 December 2015 09:06
...
> +pf_enable - INTEGER
> + Enable or disable pf state. A value of pf_retrans > path_max_retrans
> + also disables pf state. That is, one of both pf_enable and
> + pf_retrans > path_max_retrans can disable pf state. Since pf_retrans
> + and path_max_retrans can be changed by userspace application, sometimes
> + user expects to disable pf state by the value of
> + pf_retrans > path_max_retrans, but ocassionally the value of pf_retrans
> + or path_max_retrans is changed by the user application, this pf state is
> + enabled. As such, it is necessary to add this to dynamically enable
> + and disable pf state.
> +
> + 1: Enable pf.
> +
> + 0: Disable pf.
> +
> + Default: 1

You ought to say what 'pf' is short for.

David

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: use-after-free in sctp_do_sm

2015-12-14 Thread David Laight
From: Vlad Yasevich
> Sent: 11 December 2015 18:38
...
> > Found a similar place in abort primitive handling like in this last
> > patch update, it's probably the issue you're still triggering.
> >
> > Also found another place that may lead to this use after free, in case
> > we receive a packet with a chunk that has no data.
> >
> > Oh my.. :)
> 
> Yes.  This is what I was worried about...  Anything that triggers
> a DELTE_TCB command has to return a code that we can trap.
> 
> The other way is to do what Dmitri suggested, but even there, we
> need to be very careful.

I'm always wary of anything that queues actions up for later processing.
It is far too easy (as found here) to end up processing actions
in invalid states, or to process actions in 'unusual' orders when
specific events happen close together.

I wonder how much fallout there'd be from getting the sctp code
to immediately action things, instead of queuing the actions for later.
It would certainly remove a lot of the unusual combinations of events.

David


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] ath9k_htc: fix handling return value of ath9k_hw_calibrate

2015-12-14 Thread Andrzej Hajda
The function can return negative values in case of error.
Its result should be then tested for such case.

The problem has been detected using proposed semantic patch
scripts/coccinelle/tests/assign_signed_to_unsigned.cocci [1].

[1]: http://permalink.gmane.org/gmane.linux.kernel/2046107

Signed-off-by: Andrzej Hajda 
---
 drivers/net/wireless/ath/ath9k/htc_drv_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c 
b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
index a680a97..fe1fd1a 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
@@ -834,7 +834,7 @@ void ath9k_htc_ani_work(struct work_struct *work)
if (longcal || shortcal)
common->ani.caldone =
ath9k_hw_calibrate(ah, ah->curchan,
-  ah->rxchainmask, longcal);
+   ah->rxchainmask, longcal) > 0;
 
ath9k_htc_ps_restore(priv);
}
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] net/mlx4_core: fix handling return value of mlx4_slave_convert_port

2015-12-14 Thread Andrzej Hajda
The function can return negative values, so its result should
be assigned to signed variable.

The problem has been detected using proposed semantic patch
scripts/coccinelle/tests/assign_signed_to_unsigned.cocci [1].

[1]: http://permalink.gmane.org/gmane.linux.kernel/2046107

Signed-off-by: Andrzej Hajda 
---
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c 
b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index da7f578..b46dbe2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -4331,9 +4331,10 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev 
*dev, int slave,
return -EOPNOTSUPP;
 
ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf;
-   ctrl->port = mlx4_slave_convert_port(dev, slave, ctrl->port);
-   if (ctrl->port <= 0)
+   err = mlx4_slave_convert_port(dev, slave, ctrl->port);
+   if (err <= 0)
return -EINVAL;
+   ctrl->port = err;
qpn = be32_to_cpu(ctrl->qpn) & 0xff;
err = get_res(dev, slave, qpn, RES_QP, );
if (err) {
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: sbc8641: drop bogus PHY IRQ entries from DTS file

2015-12-14 Thread Michael Ellerman
On Tue, 2015-08-12 at 22:44:02 UTC, Paul Gortmaker wrote:
> This file was originally cloned off of the MPC8641D-HPCN reference
> platform, which actually had a PHY IRQ line connected.  However
> this board does not.  The bogus entry was largely inert and went
> undetected until commit 321beec5047af83db90c88114b7e664b156f49fe
> ("net: phy: Use interrupts when available in NOLINK state") was
> added to the tree.
> 
> With the above commit, the board fails to NFS boot since it sits
> waiting for a PHY IRQ event that of course never arrives.  Removing
> the bogus entries from the DTS file fixes the issue.
> 
> Cc: Andrew Lunn 
> Signed-off-by: Paul Gortmaker 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/5b01310cfc8d2302dcca1d8d

cheers
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Information leak in pptp_bind

2015-12-14 Thread Dmitry Vyukov
Hello,

The following program leak various uninit garbage including kernel
addresses and whatever is on kernel stack, in particular defeating
ASLR. The issue is in pptp_bind which does not verify sockaddr_len

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

int main(void)
{
struct sockaddr sa;
unsigned len, i, try;
int fd;

for (try = 0; try < 5; try++) {
fd = socket(AF_PPPOX, SOCK_RAW, PX_PROTO_PPTP);
if (fd == -1)
return;
memset(, 0, sizeof(sa));
bind(fd, , 0);
len = sizeof(sa);
getsockname(fd, , );
for (i = 0; i < len; i++)
printf("%02x", ((unsigned char*))[i]);
printf("\n");
}
return 0;
}

# ./a.out
180002004700c012833d0088b002405eddc66d2b
180002004800408bf13a0088b002405eddc66d2b
18000200490080a5f13a0088b002405eddc66d2b
180002004a8ff13a0088b002405eddc66d2b
180002004b008096f13a0088b002405eddc66d2b
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/6] netfilter: nf_dup: add missing dependencies with NF_CONNTRACK

2015-12-14 Thread Pablo Neira Ayuso
CONFIG_NF_CONNTRACK=m
CONFIG_NF_DUP_IPV4=y

results in:

   net/built-in.o: In function `nf_dup_ipv4':
>> (.text+0xd434f): undefined reference to `nf_conntrack_untracked'

Reported-by: kbuild test robot 
Signed-off-by: Pablo Neira Ayuso 
---
 net/ipv4/netfilter/Kconfig | 1 +
 net/ipv6/netfilter/Kconfig | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index a355841..c187c60 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -60,6 +60,7 @@ config NFT_REJECT_IPV4
 
 config NFT_DUP_IPV4
tristate "IPv4 nf_tables packet duplication support"
+   depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV4
help
  This module enables IPv4 packet duplication support for nf_tables.
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index f6a024e..e10a04c 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -49,6 +49,7 @@ config NFT_REJECT_IPV6
 
 config NFT_DUP_IPV6
tristate "IPv6 nf_tables packet duplication support"
+   depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV6
help
  This module enables IPv6 packet duplication support for nf_tables.
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/6] netfilter: nf_tables: use reverse traversal commit_list in nf_tables_abort

2015-12-14 Thread Pablo Neira Ayuso
From: Xin Long 

When we use 'nft -f' to submit rules, it will build multiple rules into
one netlink skb to send to kernel, kernel will process them one by one.
meanwhile, it add the trans into commit_list to record every commit.
if one of them's return value is -EAGAIN, status |= NFNL_BATCH_REPLAY
will be marked. after all the process is done. it will roll back all the
commits.

now kernel use list_add_tail to add trans to commit, and use
list_for_each_entry_safe to roll back. which means the order of adding
and rollback is the same. that will cause some cases cannot work well,
even trigger call trace, like:

1. add a set into table foo  [return -EAGAIN]:
   commit_list = 'add set trans'
2. del foo:
   commit_list = 'add set trans' -> 'del set trans' -> 'del tab trans'
then nf_tables_abort will be called to roll back:
firstly process 'add set trans':
   case NFT_MSG_NEWSET:
trans->ctx.table->use--;
list_del_rcu(_trans_set(trans)->list);

  it will del the set from the table foo, but it has removed when del
  table foo [step 2], then the kernel will panic.

the right order of rollback should be:
  'del tab trans' -> 'del set trans' -> 'add set trans'.
which is opposite with commit_list order.

so fix it by rolling back commits with reverse order in nf_tables_abort.

Signed-off-by: Xin Long 
Signed-off-by: Pablo Neira Ayuso 
---
 net/netfilter/nf_tables_api.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f1002dc..2cb429d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4024,7 +4024,8 @@ static int nf_tables_abort(struct sk_buff *skb)
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
 
-   list_for_each_entry_safe(trans, next, >nft.commit_list, list) {
+   list_for_each_entry_safe_reverse(trans, next, >nft.commit_list,
+list) {
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v6 0/4] stmmac: create of compatible mdio bus for stmacc driver

2015-12-14 Thread David Miller
From: Phil Reid 
Date: Mon, 14 Dec 2015 11:31:58 +0800

> Provide ability to specify a fixed phy in the device tree and
> retain the mdio bus if no phy is found. This is needed where 
> a dsa is connected via a fixed phy and uses the mdio bus for config.
> Fixed ptp ref clock calculatins for the stmmac when ptp ref clock
> is running at <= 50Mhz. Also add device tree setting to config
> ptp clk source on socfpga platforms.

Series applied to net-next, thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[net-next 7/9] e1000e: Do not read ICR in Other interrupt

2015-12-14 Thread Jeff Kirsher
From: Benjamin Poirier 

Removes the ICR read in the other interrupt handler, uses EIAC to
autoclear the Other bit from ICR and IMS. This allows us to avoid
interference with Rx and Tx interrupts in the Other interrupt handler.

The information read from ICR is not needed. IMS is configured such that
the only interrupt cause that can trigger the Other interrupt is Link
Status Change.

Signed-off-by: Benjamin Poirier 
Tested-by: Aaron Brown 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 22 +++---
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 26cf183..56bc422 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1905,24 +1905,15 @@ static irqreturn_t e1000_msix_other(int __always_unused 
irq, void *data)
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = >hw;
-   u32 icr = er32(ICR);
 
-   if (icr & adapter->eiac_mask)
-   ew32(ICS, (icr & adapter->eiac_mask));
+   hw->mac.get_link_status = true;
 
-   if (icr & E1000_ICR_OTHER) {
-   if (!(icr & E1000_ICR_LSC))
-   goto no_link_interrupt;
-   hw->mac.get_link_status = true;
-   /* guard against interrupt when we're going down */
-   if (!test_bit(__E1000_DOWN, >state))
-   mod_timer(>watchdog_timer, jiffies + 1);
+   /* guard against interrupt when we're going down */
+   if (!test_bit(__E1000_DOWN, >state)) {
+   mod_timer(>watchdog_timer, jiffies + 1);
+   ew32(IMS, E1000_IMS_OTHER);
}
 
-no_link_interrupt:
-   if (!test_bit(__E1000_DOWN, >state))
-   ew32(IMS, E1000_IMS_LSC | E1000_IMS_OTHER);
-
return IRQ_HANDLED;
 }
 
@@ -2021,6 +2012,7 @@ static void e1000_configure_msix(struct e1000_adapter 
*adapter)
   hw->hw_addr + E1000_EITR_82574(vector));
else
writel(1, hw->hw_addr + E1000_EITR_82574(vector));
+   adapter->eiac_mask |= E1000_IMS_OTHER;
 
/* Cause Tx interrupts on every write back */
ivar |= (1 << 31);
@@ -2249,7 +2241,7 @@ static void e1000_irq_enable(struct e1000_adapter 
*adapter)
 
if (adapter->msix_entries) {
ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574);
-   ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC);
+   ew32(IMS, adapter->eiac_mask | E1000_IMS_LSC);
} else if ((hw->mac.type == e1000_pch_lpt) ||
   (hw->mac.type == e1000_pch_spt)) {
ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER);
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[net-next 0/9][pull request] 1GbE Intel Wired LAN Driver Updates 2015-12-14

2015-12-14 Thread Jeff Kirsher
This series contains updates to e1000e and igb.

Alex Duyck changes e1000_up() to void since it always returned 0, also
by making it void, we can drop some code since we no longer have to worry
about non-zero return values.

Aaron Sierra removes GS40G specific defines and functions since the i210
internal PHY can be accessed with the access functions shared by 82580,
i350 and i354 devices.  Also removes the code to add the PHY address into
the PCDL register address, since there is no real reason to do so.

Joe updates the cable length function reports all four pairs true min, max
and average cable length for i210.  Also updated ethtool to use enum-based
labels instead of hard coded values.

Benjamin Poirier cleans up code that is never reachable since MSI-X
interrupts are not shared in e1000e.  Also removes the ICR read in the
other interrupt handler, since the information is not needed and IMS is
configured such that the only link status change can trigger the other
interrupt handler.  Fixed in MSI-X mode, there is no handler for the LSC
interrupt so there is no point in writing that to ICS now that we always
assume other interrupts are caused by LSC.

The following are changes since commit cb4396edd84ed73081635fb933d19c1410fafaf4:
  drivers/net: fix eisa_driver probe section mismatch
and are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue 1GbE

Aaron Sierra (2):
  igb: Remove GS40G specific defines/functions
  igb: Don't add PHY address to PCDL address

Alexander Duyck (1):
  e1000e: Switch e1000e_up to void, drop code checking for error result

Benjamin Poirier (4):
  e1000e: Remove unreachable code
  e1000e: Do not read ICR in Other interrupt
  e1000e: Do not write lsc to ics in msi-x mode
  e1000e: Fix msi-x interrupt automask

Joe Schultz (2):
  igb: Improve cable length function for I210, etc.
  igb: Explicitly label self-test result indices

 drivers/net/ethernet/intel/e1000e/defines.h|   3 +-
 drivers/net/ethernet/intel/e1000e/e1000.h  |   2 +-
 drivers/net/ethernet/intel/e1000e/netdev.c |  80 ++-
 drivers/net/ethernet/intel/igb/e1000_82575.c   |  13 +--
 drivers/net/ethernet/intel/igb/e1000_defines.h |   5 +-
 drivers/net/ethernet/intel/igb/e1000_hw.h  |   1 +
 drivers/net/ethernet/intel/igb/e1000_i210.c|   5 +-
 drivers/net/ethernet/intel/igb/e1000_i210.h|   2 +-
 drivers/net/ethernet/intel/igb/e1000_phy.c | 135 +
 drivers/net/ethernet/intel/igb/e1000_phy.h |  15 +--
 drivers/net/ethernet/intel/igb/igb_ethtool.c   |  38 ---
 11 files changed, 120 insertions(+), 179 deletions(-)

-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iproute2 -next] bpf: minor fix in api and bpf_dump_error() usage

2015-12-14 Thread Daniel Borkmann
Fix a whitespace in bpf_dump_error() usage, and also a missing closing
bracket in ntohl() macro for eBPF programs.

Signed-off-by: Daniel Borkmann 
---
 include/bpf_api.h | 2 +-
 tc/tc_bpf.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/bpf_api.h b/include/bpf_api.h
index 8503b9a..0666a31 100644
--- a/include/bpf_api.h
+++ b/include/bpf_api.h
@@ -53,7 +53,7 @@
 #endif
 
 #ifndef ntohl
-# define ntohl(X)  __constant_ntohl((X)
+# define ntohl(X)  __constant_ntohl((X))
 #endif
 
 /** Section helper macros. */
diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c
index beb74be..f9b2b00 100644
--- a/tc/tc_bpf.c
+++ b/tc/tc_bpf.c
@@ -1042,7 +1042,7 @@ static int bpf_prog_attach(const char *section,
   "license:\'%s\') %s%s (%d)!\n\n",
   section, prog->type,
   prog->size / sizeof(struct bpf_insn),
-  prog->license, fd < 0 ? "rejected :" :
+  prog->license, fd < 0 ? "rejected: " :
   "loaded", fd < 0 ? strerror(errno) : "",
   fd < 0 ? errno : fd);
}
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 1/4] geneve: Add geneve udp port offload for ethernet devices

2015-12-14 Thread Singhai, Anjali



On 12/11/2015 7:11 PM, Tom Herbert wrote:

On Tue, Dec 8, 2015 at 10:12 AM, Anjali Singhai Jain
 wrote:

Add ndo_ops to add/del UDP ports to a device that supports geneve
offload.

v3: Add some more comments about the use of the new ndo ops.

Signed-off-by: Anjali Singhai Jain 
Signed-off-by: Kiran Patil 
---
  drivers/net/geneve.c  | 23 +++
  include/linux/netdevice.h | 21 -
  2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index de5c30c..b43fd56 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -371,8 +371,11 @@ static struct socket *geneve_create_sock(struct net *net, 
bool ipv6,

  static void geneve_notify_add_rx_port(struct geneve_sock *gs)
  {
+   struct net_device *dev;
 struct sock *sk = gs->sock->sk;
+   struct net *net = sock_net(sk);
 sa_family_t sa_family = sk->sk_family;
+   __be16 port = inet_sk(sk)->inet_sport;
 int err;

 if (sa_family == AF_INET) {
@@ -381,6 +384,14 @@ static void geneve_notify_add_rx_port(struct geneve_sock 
*gs)
 pr_warn("geneve: udp_add_offload failed with status 
%d\n",
 err);
 }
+
+   rcu_read_lock();
+   for_each_netdev_rcu(net, dev) {
+   if (dev->netdev_ops->ndo_add_geneve_port)
+   dev->netdev_ops->ndo_add_geneve_port(dev, sa_family,
+port);
+   }
+   rcu_read_unlock();

What about IPv6 case?


The driver still gets add port calls for IPv6 and can decide to offload 
L4 RX checksum if the HW is capable.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/6] netfilter fixes for net

2015-12-14 Thread David Miller
From: Pablo Neira Ayuso 
Date: Mon, 14 Dec 2015 12:25:40 +0100

> The following patchset contains Netfilter fixes for you net tree,
> specifically for nf_tables and nfnetlink_queue, they are:

Pulled, thanks a lot Pablo.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next] net, cgroup: cgroup_sk_updat_lock was missing initializer

2015-12-14 Thread Tejun Heo
bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") added global
spinlock cgroup_sk_update_lock but erroneously skipped initializer
leading to uninitialized spinlock warning.  Fix it by using
DEFINE_SPINLOCK().

Signed-off-by: Tejun Heo 
Reported-by: Dexuan Cui 
Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup")
---
Hello, Dexuan.

Oops, sorry about that.  Somehow thought it was a different problem
which is already fixed.  This should do it.

Thanks.

 kernel/cgroup.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4f8f792..4466273f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5790,7 +5790,7 @@ EXPORT_SYMBOL_GPL(cgroup_get_from_path);
 
 #if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
 
-spinlock_t cgroup_sk_update_lock;
+DEFINE_SPINLOCK(cgroup_sk_update_lock);
 static bool cgroup_sk_alloc_disabled __read_mostly;
 
 void cgroup_sk_alloc_disable(void)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] net: vxlan: enable local checksum offload on HW_CSUM devices

2015-12-14 Thread Edward Cree
Signed-off-by: Edward Cree 
---
 drivers/net/vxlan.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 6369a57..c1660d6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1785,6 +1785,9 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock 
*sk, struct sk_buff *sk
bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM);
int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
u16 hdrlen = sizeof(struct vxlanhdr);
+   /* Is device able to do the inner checksum? */
+   bool inner_csum = skb_dst(skb) && skb_dst(skb)->dev &&
+   (skb_dst(skb)->dev->features & NETIF_F_HW_CSUM);
 
if ((vxflags & VXLAN_F_REMCSUM_TX) &&
skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1814,7 +1817,7 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock 
*sk, struct sk_buff *sk
if (WARN_ON(!skb))
return -ENOMEM;
 
-   skb = iptunnel_handle_offloads(skb, udp_sum, type);
+   skb = iptunnel_handle_offloads(skb, udp_sum && !inner_csum, type);
if (IS_ERR(skb))
return PTR_ERR(skb);
 
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] net: udp: local checksum offload for encapsulation

2015-12-14 Thread Edward Cree
The arithmetic properties of the ones-complement checksum mean that a
 correctly checksummed inner packet, including its checksum, has a ones
 complement sum depending only on whatever value was used to initialise
 the checksum field before checksumming (in the case of TCP and UDP,
 this is the ones complement sum of the pseudo header, complemented).
Consequently, if we are going to offload the inner checksum with
 CHECKSUM_PARTIAL, we can compute the outer checksum based only on the
 packed data not covered by the inner checksum, and the initial value of
 the inner checksum field.

Signed-off-by: Edward Cree 
---
 net/ipv4/udp.c | 31 +++
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0c7b0e6..07d679e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -767,12 +767,35 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
 {
struct udphdr *uh = udp_hdr(skb);
 
-   if (nocheck)
+   if (nocheck) {
uh->check = 0;
-   else if (skb_is_gso(skb))
+   } else if (skb_is_gso(skb)) {
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
-   else if (skb_dst(skb) && skb_dst(skb)->dev &&
-(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
+   } else if (skb->ip_summed == CHECKSUM_PARTIAL &&
+  skb_dst(skb) && skb_dst(skb)->dev &&
+  (skb_dst(skb)->dev->features & NETIF_F_HW_CSUM)) {
+   /* Everything from csum_start onwards will be
+* checksummed and will thus have a sum of whatever
+* we previously put in the checksum field (eg. sum
+* of pseudo-header)
+*/
+   __wsum csum;
+
+   /* Fill in our pseudo-header checksum */
+   uh->check = ~udp_v4_check(len, saddr, daddr, 0);
+   /* Start with complement of inner pseudo-header checksum */
+   csum = ~skb_checksum(skb, skb_checksum_start_offset(skb) + 
skb->csum_offset,
+2, 0);
+   /* Add in checksum of our headers (incl. pseudo-header
+* checksum filled in above)
+*/
+   csum = skb_checksum(skb, 0, skb_checksum_start_offset(skb), 
csum);
+   /* The result is the outer checksum */
+   uh->check = csum_fold(csum);
+   if (uh->check == 0)
+   uh->check = CSUM_MANGLED_0;
+   } else if (skb_dst(skb) && skb_dst(skb)->dev &&
+  (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
 
BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
 
-- 
2.4.3


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH iproute2] ip, route: fix minor compile warning

2015-12-14 Thread Phil Sutter
On Mon, Dec 14, 2015 at 04:34:29PM +0100, Daniel Borkmann wrote:
> Seems like gcc (4.8.3) doesn't catch this false positive, triggering
> after 0f7543322c5f ("route: ignore RTAX_HOPLIMIT of value -1"):
> 
>   iproute.c: In function 'print_route':
>   iproute.c:301:12: warning: 'val' may be used uninitialized in this function 
> [-Wmaybe-uninitialized]
>  features &= ~RTAX_FEATURE_ECN;
>   ^
>   iproute.c:575:10: note: 'val' was declared here
>   __u32 val;
> ^
> So just shut it up by initializing to 0.

Hmm. Interestingly, my patch shouldn't have changed anything relevant
for gcc's decision. OTOH, I don't see a warning using gcc-4.9.3.

Cheers, Phil
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH iproute2] ip, route: fix minor compile warning

2015-12-14 Thread Daniel Borkmann

On 12/14/2015 04:51 PM, Phil Sutter wrote:

On Mon, Dec 14, 2015 at 04:34:29PM +0100, Daniel Borkmann wrote:

Seems like gcc (4.8.3) doesn't catch this false positive, triggering
after 0f7543322c5f ("route: ignore RTAX_HOPLIMIT of value -1"):

   iproute.c: In function 'print_route':
   iproute.c:301:12: warning: 'val' may be used uninitialized in this function 
[-Wmaybe-uninitialized]
  features &= ~RTAX_FEATURE_ECN;
   ^
   iproute.c:575:10: note: 'val' was declared here
   __u32 val;
 ^
So just shut it up by initializing to 0.


Hmm. Interestingly, my patch shouldn't have changed anything relevant
for gcc's decision. OTOH, I don't see a warning using gcc-4.9.3.


If I revert it, the warning is gone for me ;) perhaps some heuristic issue
with that gcc version.

Cheers,
Daniel
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Another pppoe-related crash

2015-12-14 Thread Guillaume Nault
On Fri, Dec 11, 2015 at 05:07:54PM +0200, Andrew wrote:
> I've got another pppoe-related crash on one PPPoE BRAS.
> 
> Kernel is 4.1.13 with patch "pppoe: fix memory corruption in padt work
> structure"
> 
Commit 1acea4f6ce1b ("ppp: fix pppoe_dev deletion condition in
pppoe_release()") is missing from 4.1.13. Can you try with 4.1.14 (or at
least manually apply this patch)?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next 0/2] Local checksum offload for VXLAN

2015-12-14 Thread Edward Cree
When the inner packet checksum is offloaded, the outer UDP checksum is easy
 to calculate as it doesn't depend on the payload (because the inner checksum
 cancels out everything from the inner packet except the pseudo header).
Thus, transmit checksums for VXLAN (and in principle other encapsulations,
 but I haven't enabled it for / tested with those) can be offloaded on any
 device supporting NETIF_F_HW_CSUM.  Only the innermost checksum has to be
 offloaded, the rest are filled in by the stack.
Tested by hacking a driver to report NETIF_F_HW_CSUM, call skb_checksum_help
 before transmitting a packet, and not actually offload anything to the hw.
 I did it that way because I don't have any hw that can actually offload the
 inner checksum; but I should be able to get hold of some soon.

Edward Cree (2):
  net: udp: local checksum offload for encapsulation
  net: vxlan: enable local checksum offload on HW_CSUM devices

 drivers/net/vxlan.c |  5 -
 net/ipv4/udp.c  | 31 +++
 2 files changed, 31 insertions(+), 5 deletions(-)

-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iproute2] ip, route: fix minor compile warning

2015-12-14 Thread Daniel Borkmann
Seems like gcc (4.8.3) doesn't catch this false positive, triggering
after 0f7543322c5f ("route: ignore RTAX_HOPLIMIT of value -1"):

  iproute.c: In function 'print_route':
  iproute.c:301:12: warning: 'val' may be used uninitialized in this function 
[-Wmaybe-uninitialized]
 features &= ~RTAX_FEATURE_ECN;
  ^
  iproute.c:575:10: note: 'val' was declared here
  __u32 val;
^
So just shut it up by initializing to 0.

Signed-off-by: Daniel Borkmann 
---
 ip/iproute.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ip/iproute.c b/ip/iproute.c
index c42ea0b..867c8da 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -572,7 +572,7 @@ int print_route(const struct sockaddr_nl *who, struct 
nlmsghdr *n, void *arg)
mxlock = *(unsigned*)RTA_DATA(mxrta[RTAX_LOCK]);
 
for (i=2; i<= RTAX_MAX; i++) {
-   __u32 val;
+   __u32 val = 0;
 
if (mxrta[i] == NULL)
continue;
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [V2 PATCH 1/1] net: sctp: dynamically enable or disable pf state

2015-12-14 Thread Marcelo Ricardo Leitner
On Mon, Dec 14, 2015 at 02:22:19PM +0800, zyjzyj2...@gmail.com wrote:
> From: Zhu Yanjun 
> 
> As we all know, the value of pf_retrans >= max_retrans_path can
> disable pf state. The variables of pf_retrans and max_retrans_path
> can be changed by the user space application.
> 
> Sometimes the user expects to disable pf state while the 2
> variables are changed to enable pf state. So it is necessary to
> introduce a new variable to disable pf state.
> 
> According to the suggestions from Vlad Yasevich, extra1 and extra2
> are removed. The initialization of pf_enable is added.
> 
> Signed-off-by: Zhu Yanjun 
> ---
>  include/net/netns/sctp.h |7 +++
>  net/sctp/protocol.c  |3 +++
>  net/sctp/sm_sideeffect.c |5 -
>  net/sctp/sysctl.c|7 +++
>  4 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
> index 8ba379f..c501d67 100644
> --- a/include/net/netns/sctp.h
> +++ b/include/net/netns/sctp.h
> @@ -89,6 +89,13 @@ struct netns_sctp {
>   int pf_retrans;
>  
>   /*
> +  * Disable Potentially-Failed feature, the feature is enabled by default
> +  * pf_enable-  0  : disable pf
> +  *  - >0  : enable pf
> +  */
> + int pf_enable;
> +
> + /*
>* Policy for preforming sctp/socket accounting
>* 0   - do socket level accounting, all assocs share sk_sndbuf
>* 1   - do sctp accounting, each asoc may use sk_sndbuf bytes

Please add this documentation to Documentation/networking/ip-sysctl.txt
too, mentioning the RFC/draft it's about. 
https://datatracker.ietf.org/doc/draft-ietf-tsvwg-sctp-failover/

And update the text on pf_retrans mentioning this new variable as well.

  Marcelo

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net] net: add validation for the socket syscall protocol argument

2015-12-14 Thread Hannes Frederic Sowa
郭永刚 reported that one could simply crash the kernel as root by
using a simple program:

int socket_fd;
struct sockaddr_in addr;
addr.sin_port = 0;
addr.sin_addr.s_addr = INADDR_ANY;
addr.sin_family = 10;

socket_fd = socket(10,3,0x4000);
connect(socket_fd , ,16);

AF_INET, AF_INET6 sockets actually only support 8-bit protocol
identifiers. inet_sock's skc_protocol field thus is sized accordingly,
thus larger protocol identifiers simply cut off the higher bits and
store a zero in the protocol fields.

This could lead to e.g. NULL function pointer because as a result of
the cut off inet_num is zero and we call down to inet_autobind, which
is NULL for raw sockets.

kernel: Call Trace:
kernel:  [] ? inet_autobind+0x2e/0x70
kernel:  [] inet_dgram_connect+0x54/0x80
kernel:  [] SYSC_connect+0xd9/0x110
kernel:  [] ? ptrace_notify+0x5b/0x80
kernel:  [] ? syscall_trace_enter_phase2+0x108/0x200
kernel:  [] SyS_connect+0xe/0x10
kernel:  [] tracesys_phase2+0x84/0x89

I found no particular commit which introduced this problem.

CVE: CVE-2015-8543
Reported-by: 郭永刚 
Signed-off-by: Hannes Frederic Sowa 
---
 net/ipv4/af_inet.c  | 3 +++
 net/ipv6/af_inet6.c | 3 +++
 net/socket.c| 3 +++
 3 files changed, 9 insertions(+)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 11c4ca1..cfb4496 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -257,6 +257,9 @@ static int inet_create(struct net *net, struct socket 
*sock, int protocol,
int try_loading_module = 0;
int err;
 
+   if (protocol >= IPPROTO_MAX)
+   return -EINVAL;
+
sock->state = SS_UNCONNECTED;
 
/* Look for the requested type/protocol pair. */
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8ec0df7..9fb093c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -109,6 +109,9 @@ static int inet6_create(struct net *net, struct socket 
*sock, int protocol,
int try_loading_module = 0;
int err;
 
+   if (protocol >= IPPROTO_MAX)
+   return -EINVAL;
+
/* Look for the requested type/protocol pair. */
 lookup_protocol:
err = -ESOCKTNOSUPPORT;
diff --git a/net/socket.c b/net/socket.c
index 456fadb..d2f3d49 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1098,6 +1098,9 @@ int __sock_create(struct net *net, int family, int type, 
int protocol,
return -EAFNOSUPPORT;
if (type < 0 || type >= SOCK_MAX)
return -EINVAL;
+   /* upper bound should be tested by per-protocol .create callbacks */
+   if (protocol < 0)
+   return -EINVAL;
 
/* Compatibility.
 
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 net-next 4/4] ila: Add generic ILA translation facility

2015-12-14 Thread David Miller
From: Tom Herbert 
Date: Mon, 14 Dec 2015 15:56:48 -0800

> +static int alloc_ila_locks(struct ila_net *ilan, gfp_t gfp)

gfp is always GFP_KERNEL, thus this flexibility is never
necessary.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] net/macb: add support for resetting PHY using GPIO

2015-12-14 Thread Florian Fainelli
On December 14, 2015 2:56:34 PM PST, Rob Herring  wrote:
>On Fri, Dec 11, 2015 at 11:34:53AM +0100, Gregory CLEMENT wrote:
>> With device tree it is no more possible to reset the PHY at board
>> level. Furthermore, doing in the driver allow to power down the PHY
>when
>> the network interface is no more used.
>> 
>> This reset can't be done at the PHY driver level. The PHY must be
>able to
>> answer the to the mii bus scan to let the kernel creating a PHY
>device.
>> 
>> The patch introduces a new optional property "phy-reset-gpios"
>inspired
>> from the one use for the FEC.
>> 
>> Signed-off-by: Gregory CLEMENT 
>> ---
>> 
>> Since the v1, I used the gpiod functions. It allows to simplify the
>> code and to not introduce any #ifdef.
>> 
>> I also rename the property in phy-reset-gpios, even if actually the
>> gpiod will match both phy-reset-gpios and phy-reset-gpio.
>> 
>> 
>>  Documentation/devicetree/bindings/net/macb.txt | 3 +++
>>  drivers/net/ethernet/cadence/macb.c| 8 
>>  drivers/net/ethernet/cadence/macb.h| 1 +
>>  3 files changed, 12 insertions(+)
>> 
>> diff --git a/Documentation/devicetree/bindings/net/macb.txt
>b/Documentation/devicetree/bindings/net/macb.txt
>> index b5d7976..4a7fb6c 100644
>> --- a/Documentation/devicetree/bindings/net/macb.txt
>> +++ b/Documentation/devicetree/bindings/net/macb.txt
>> @@ -19,6 +19,9 @@ Required properties:
>>  Optional elements: 'tx_clk'
>>  - clocks: Phandles to input clocks.
>>  
>> +Optional properties:
>> +- phy-reset-gpios : Should specify the gpio for phy reset
>> +
>
>This alone is simple enough, but I worry that this doesn't really
>scale. 
>What if you need to enable clocks or regulators for the same reason?
>The 
>mmc folks did a pwrseq binding for similar reasons. I don't think I'd 
>recommend that here as I think it is kind of ugly. We really need a 
>pre-probe/scan hook for drivers. This is also needed for USB devices 
>mounted on boards.

In this particular case, the way Ethernet MAC drivers register their MDIO buses 
and therefore PHYs, there is always a good way to deassert the PHY GPIO line 
without requiring major core device driver changes. Worst case, there is the 
MDIO bus reset callback which could used for that matter.

In the case of PCI, USB etc. I do agree having a way to twiddle things before 
scanning/probing would be awesome. I have some boards here which have GPIO 
controlled regulator and hacking the RC driver to deal with that is 
suboptimal... 

>
>But I'm not going to hold up something simple to do all that, so:
>
>Acked-by: Rob Herring 
>
>___
>linux-arm-kernel mailing list
>linux-arm-ker...@lists.infradead.org
>http://lists.infradead.org/mailman/listinfo/linux-arm-kernel


-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Patch net] pptp: verify sockaddr_len in pptp_bind() and pptp_connect()

2015-12-14 Thread David Miller
From: Cong Wang 
Date: Mon, 14 Dec 2015 13:48:36 -0800

> Reported-by: Dmitry Vyukov 
> Signed-off-by: Cong Wang 

Applied and queued up for -stable, thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net] skbuff: Fix offset error in skb_reorder_vlan_header

2015-12-14 Thread David Miller
From: Vladislav Yasevich 
Date: Mon, 14 Dec 2015 17:44:10 -0500

> skb_reorder_vlan_header is called after the vlan header has
> been pulled.  As a result the offset of the begining of
> the mac header has been incrased by 4 bytes (VLAN_HLEN).
> When moving the mac addresses, include this incrase in
> the offset calcualation so that the mac addresses are
> copied correctly.
> 
> Fixes: a6e18ff1117 (vlan: Fix untag operations of stacked vlans with 
> REORDER_HEADER off)
> CC: Nicolas Dichtel 
> CC: Patrick McHardy 
> Signed-off-by: Vladislav Yasevich 

Applied and queued up for -stable, thanks Vlad.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] sh_eth: fix descriptor access endianness

2015-12-14 Thread David Miller
From: Sergei Shtylyov 
Date: Sun, 13 Dec 2015 23:05:07 +0300

> The driver never  calls cpu_to_edmac() when writing the descriptor address
> and edmac_to_cpu() when reading it, although it should -- fix this.
> 
> Note that the frame/buffer length descriptor field accesses also need fixing
> but since they are both 16-bit we can't  use {cpu|edmac}_to_{edmac|cpu}()...
> 
> Signed-off-by: Sergei Shtylyov 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] sh_eth: fix TX buffer byte-swapping

2015-12-14 Thread David Miller
From: Sergei Shtylyov 
Date: Sun, 13 Dec 2015 21:27:04 +0300

> For the little-endian SH771x kernels the driver has to byte-swap the RX/TX
> buffers,  however yet unset physcial address from the TX descriptor is used
> to call sh_eth_soft_swap(). Use 'skb->data' instead...
> 
> Fixes: 31fcb99d9958 ("net: sh_eth: remove __flush_purge_region")
> Signed-off-by: Sergei Shtylyov 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net] ravb: Add disable 10base

2015-12-14 Thread David Miller
From: Yoshihiro Kaneko 
Date: Mon, 14 Dec 2015 00:15:58 +0900

> From: Kazuya Mizuguchi 
> 
> Ethernet AVB does not support 10 Mbps transfer speed.
> 
> Signed-off-by: Kazuya Mizuguchi 
> Signed-off-by: Yoshihiro Kaneko 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH RFC 00/26] Phylink & SFP support

2015-12-14 Thread Dustin Byford
On Mon Dec 07 17:35, Russell King - ARM Linux wrote:
> Hi,

Hello.

> SFP modules are hot-pluggable ethernet transceivers; they can be
> detected at runtime and accordingly configured.  There are a range of
> modules offering many different features.
> 
> Some SFP modules have PHYs conventional integrated into them, others
> drive a laser diode from the Serdes bus.  Some have monitoring, others
> do not.
> 
> Some SFP modules want to use SGMII over the Serdes link, others want
> to use 1000base-X over the Serdes link.
> 
> This makes it non-trivial to support with the existing code structure.
> Not wanting to write something specific to the mvneta driver, I decided
> to have a go at coming up with something more generic.
> 
> My initial attempts were to provide a PHY driver, but I found that
> phylib's state machine got in the way, and it was hard to support two
> chained PHYs.  Conversely, having a fixed DT specified setup (via
> the fixed phy infrastructure) would allow some SFP modules to work, but
> not others.  The same is true of the "managed" in-band status (which
> is SGMII.)
> 
> The result is that I came up with phylink - an infrastructure layer
> which sits between the network driver and any attached PHY, and a
> SFP module layer detects the SFP module, and configures phylink
> accordingly.
> 
> Overall, this supports:
> 
> * switching the serdes mode at the NIC driver
> * controlling autonegotiation and autoneg results
> * allowing PHYs to be hotplugged
> * allowing SFP modules to be hotplugged with proper link indication
> * fixed-mode links without involving phylib
> * flow control
> * EEE support
> * reading SFP module EEPROMs
> 
> Overall, phylink supports several link modes, with dynamic switching
> possible between these:
> * A true fixed link mode, where the parameters are set by DT.
> * PHY mode, where we read the negotiation results from the PHY registers
>   and pass them to the NIC driver.
> * SGMII mode, where the in-band status indicates the speed, duplex and
>   flow control settings of the link partner.
> * 1000base-X mode, where the in-band status indicates only duplex and
>   flow control settings (different, incompatible bit layout from SGMII.)

I've been working on some similar code to handle interactions with a
wide range of SFF modules, 1G to 100G, on Linux network switches for
some time.  For practical reasons a lot of that was in userspace but
I've been planning and recently working on an SFF kernel driver that
does some of what's done in this series.  I think the model you're
proposing is right on, and since you're further along in implementation
I'd like to help round out support for the other SFF modules if I can.
Then make this work on the network ASICs I have access to.

Any concrete plans for QSFP or the new 25G modules?

> Ethtool support is included, as well as emulation of the MII registers
> for situations where a PHY is not attached, giving compatible emulation
> of existing user interfaces where required.
> 
> The patches here include modification of mvneta (against 4.4-rc1, so
> probably won't apply to current development tips.)  It basically
> hooks into the places where the phylib would hook into.
> 
> DT wise, the changes needed to support SFP look like this (example
> taken from Clearfog):
> 
>   ethernet@34000 {
> + managed = "in-band-status";
>   phy-mode = "sgmii";
>   status = "okay";
> -
> - fixed-link {
> - speed = <1000>;
> - full-duplex;
> - };
>   };
> ...
> + sfp: sfp {
> + compatible = "sff,sfp";
> + i2c-bus = <>;
> + los-gpio = < 12 GPIO_ACTIVE_HIGH>;
> + moddef0-gpio = < 15 GPIO_ACTIVE_LOW>;
> + sfp,ethernet = <>;

Using  is unambiguous in the this case because there's only one
serdes and one mac involved.  To specify the mac/serdes/cage
associations at the same level of detail as the gpios it might be nice
(at least for some devices) to point to a serdes node (or 4 in the case
of QSFP) instead of   Any thoughts on that?

Switch ASICs, and I imagine at least some NICs, are really flexible in
terms of how serdes are wired to a cage.  Both in the sense that the
board designer gets to pick which wires route to the cage based on
physical constraints and the user gets to pick which serdes or group of
serdes compose the ethernet device.  For example, using a breakout cable
to get 4xSFP out of a QSFP or the other way around.

Perhaps the simple case (sfp,ethernet -> ) can remain simple, but
I'd be interested in any thoughts you have on introducing a serdes
layer here.

I think adding such a layer would make it easier to 1) make serdes to
cage mappings part of the platform description (DT or ACPI) and 2) allow
automatic reconfiguration of 

[PATCH] 82xx: FCC: Fixing a bug causing to FCC port lock-up

2015-12-14 Thread Martin Roth
The patch fixes FCC port lock-up, which occurs as a result of a bug
during underrun/collision handling. Within the tx_startup() function
in mac-fcc.c, the address of last BD is not calculated correctly.
As a result of wrong calculation of the last BD address, the next
transmitted BD may be set to an area out of the transmit BD ring.
This actually causes to port lock-up and it is not recoverable.

Signed-off-by: Martin Roth 
---
 drivers/net/ethernet/freescale/fs_enet/mac-fcc.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c 
b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
index 08f5b91..52e0091 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
@@ -552,7 +552,7 @@ static void tx_restart(struct net_device *dev)
cbd_t __iomem *prev_bd;
cbd_t __iomem *last_tx_bd;
 
-   last_tx_bd = fep->tx_bd_base + (fpi->tx_ring * sizeof(cbd_t));
+   last_tx_bd = fep->tx_bd_base + ((fpi->tx_ring - 1) * sizeof(cbd_t));
 
/* get the current bd held in TBPTR  and scan back from this point */
recheck_bd = curr_tbptr = (cbd_t __iomem *)
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 net-next 4/4] ila: Add generic ILA translation facility

2015-12-14 Thread Florian Westphal
Tom Herbert  wrote:
> +static int ila_add_mapping(struct net *net, struct ila_xlat_params *p)
> +{
> + struct ila_net *ilan = net_generic(net, ila_net_id);
> + struct ila_map *ila, *head;
> + spinlock_t *lock = ila_get_lock(ilan, p->identifier);
> + int err = 0, order;
> +
> + if (!ilan->hooks_registered) {
> + /* We defer registering net hooks in the namespace until the
> +  * first mapping is added.
> +  */
> + err = nf_register_net_hooks(net, ila_nf_hook_ops,
> + ARRAY_SIZE(ila_nf_hook_ops));
> + if (err)
> + return err;

Looks as if this misses a
ilan->hooks_registered = true
> + }
> +
> + ila = kzalloc(sizeof(*ila), GFP_KERNEL);
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net] net: fix IP early demux races

2015-12-14 Thread David Miller
From: Eric Dumazet 
Date: Mon, 14 Dec 2015 14:08:53 -0800

> From: Eric Dumazet 
> 
> David Wilder reported crashes caused by dst reuse.
> 
> 
>   I am seeing a crash on a distro V4.2.3 kernel caused by a double
>   release of a dst_entry.  In ipv4_dst_destroy() the call to  
>   list_empty() finds a poisoned next pointer, indicating the dst_entry  
>   has already been removed from the list and freed. The crash occurs  
>   18 to 24 hours into a run of a network stress exerciser.
> 
> 
> Thanks to his detailed report and analysis, we were able to understand
> the core issue.
> 
> IP early demux can associate a dst to skb, after a lookup in TCP/UDP
> sockets.
> 
> When socket cache is not properly set, we want to store into
> sk->sk_dst_cache the dst for future IP early demux lookups,
> by acquiring a stable refcount on the dst.
> 
> Problem is this acquisition is simply using an atomic_inc(),
> which works well, unless the dst was queued for destruction from
> dst_release() noticing dst refcount went to zero, if DST_NOCACHE
> was set on dst.
> 
> We need to make sure current refcount is not zero before incrementing
> it, or risk double free as David reported.
> 
> This patch, being a stable candidate, adds two new helpers, and use
> them only from IP early demux problematic paths.
> 
> It might be possible to merge in net-next skb_dst_force() and
> skb_dst_force_safe(), but I prefer having the smallest patch for stable
> kernels : Maybe some skb_dst_force() callers do not expect skb->dst
> can suddenly be cleared.
> 
> Can probably be backported back to linux-3.6 kernels
> 
> Reported-by: David J. Wilder 
> Tested-by: David J. Wilder 
> Signed-off-by: Eric Dumazet 

Applied and queued up for -stable, thanks Eric.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Fwd: Query regarding Coverity tool

2015-12-14 Thread pavi1729
Hi,
  May I know if the community uses the Coverity tool and, if yes where
can I find a repo of
Coverity scans of kernels and IGNORE LIST; cause there obviously be
false positives.

Cheers,
Pavi
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[net-next 6/9] e1000e: Remove unreachable code

2015-12-14 Thread Jeff Kirsher
From: Benjamin Poirier 

msi-x interrupts are not shared so there's no need to check if the
interrupt was really from this adapter.

Signed-off-by: Benjamin Poirier 
Tested-by: Aaron Brown 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 955c8c7..26cf183 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1907,12 +1907,6 @@ static irqreturn_t e1000_msix_other(int __always_unused 
irq, void *data)
struct e1000_hw *hw = >hw;
u32 icr = er32(ICR);
 
-   if (!(icr & E1000_ICR_INT_ASSERTED)) {
-   if (!test_bit(__E1000_DOWN, >state))
-   ew32(IMS, E1000_IMS_OTHER);
-   return IRQ_NONE;
-   }
-
if (icr & adapter->eiac_mask)
ew32(ICS, (icr & adapter->eiac_mask));
 
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[net-next 9/9] e1000e: Fix msi-x interrupt automask

2015-12-14 Thread Jeff Kirsher
From: Benjamin Poirier 

Since the introduction of 82574 support in e1000e, the driver has worked
on the assumption that msi-x interrupt generation is automatically
disabled after each irq. As it turns out, this is not the case.
Currently, rx interrupts can fire multiple times before and during napi
processing. This can be a problem for users because frames that arrive
in a certain window (after adapter->clean_rx() but before
napi_complete_done() has cleared NAPI_STATE_SCHED) generate an interrupt
which does not lead to napi_schedule(). These frames sit in the rx queue
until another frame arrives (a tcp retransmit for example).

While the EIAC and CTRL_EXT registers are properly configured for irq
automask, the modification of IAM in e1000_configure_msix() is what
prevents automask from working as intended.

This patch removes that erroneous write and fixes interrupt rearming for
tx interrupts. It also clears IAME from CTRL_EXT. This is not strictly
necessary for operation of the driver but it is to avoid disruption from
potential programs that access the registers directly, like `ethregs -c`.

Reported-by: Frank Steiner 
Signed-off-by: Benjamin Poirier 
Tested-by: Aaron Brown 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index d59c0bc..c71ba1b 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1931,6 +1931,9 @@ static irqreturn_t e1000_intr_msix_tx(int __always_unused 
irq, void *data)
/* Ring was not completely cleaned, so fire another interrupt */
ew32(ICS, tx_ring->ims_val);
 
+   if (!test_bit(__E1000_DOWN, >state))
+   ew32(IMS, adapter->tx_ring->ims_val);
+
return IRQ_HANDLED;
 }
 
@@ -2020,12 +2023,8 @@ static void e1000_configure_msix(struct e1000_adapter 
*adapter)
ew32(IVAR, ivar);
 
/* enable MSI-X PBA support */
-   ctrl_ext = er32(CTRL_EXT);
-   ctrl_ext |= E1000_CTRL_EXT_PBA_CLR;
-
-   /* Auto-Mask Other interrupts upon ICR read */
-   ew32(IAM, ~E1000_EIAC_MASK_82574 | E1000_IMS_OTHER);
-   ctrl_ext |= E1000_CTRL_EXT_EIAME;
+   ctrl_ext = er32(CTRL_EXT) & ~E1000_CTRL_EXT_IAME;
+   ctrl_ext |= E1000_CTRL_EXT_PBA_CLR | E1000_CTRL_EXT_EIAME;
ew32(CTRL_EXT, ctrl_ext);
e1e_flush();
 }
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[net-next 4/9] igb: Improve cable length function for I210, etc.

2015-12-14 Thread Jeff Kirsher
From: Joe Schultz 

Previously, the PHY-specific code to get the cable length for the
I210 internal and related PHYs was reporting the cable length of a
single pair and reporting it as the min, max, and total cable length.
Update it so that all four pairs are checked so the true min, max,
and average cable lengths are reported.

Signed-off-by: Joe Schultz 
Signed-off-by: Aaron Sierra 
Tested-by: Aaron Brown 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/igb/e1000_defines.h |  5 ++-
 drivers/net/ethernet/intel/igb/e1000_hw.h  |  1 +
 drivers/net/ethernet/intel/igb/e1000_phy.c | 54 ++
 3 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h 
b/drivers/net/ethernet/intel/igb/e1000_defines.h
index a61ee94..c3c598c 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -927,7 +927,10 @@
 
 /* Intel i347-AT4 Registers */
 
-#define I347AT4_PCDL   0x10 /* PHY Cable Diagnostics Length */
+#define I347AT4_PCDL0  0x10 /* Pair 0 PHY Cable Diagnostics 
Length */
+#define I347AT4_PCDL1  0x11 /* Pair 1 PHY Cable Diagnostics 
Length */
+#define I347AT4_PCDL2  0x12 /* Pair 2 PHY Cable Diagnostics 
Length */
+#define I347AT4_PCDL3  0x13 /* Pair 3 PHY Cable Diagnostics 
Length */
 #define I347AT4_PCDC   0x15 /* PHY Cable Diagnostics Control */
 #define I347AT4_PAGE_SELECT0x16
 
diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h 
b/drivers/net/ethernet/intel/igb/e1000_hw.h
index 2003b37..4034207 100644
--- a/drivers/net/ethernet/intel/igb/e1000_hw.h
+++ b/drivers/net/ethernet/intel/igb/e1000_hw.h
@@ -441,6 +441,7 @@ struct e1000_phy_info {
u16 cable_length;
u16 max_cable_length;
u16 min_cable_length;
+   u16 pair_length[4];
 
u8 mdix;
 
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c 
b/drivers/net/ethernet/intel/igb/e1000_phy.c
index 8015f3b..5b54254 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.c
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.c
@@ -1717,6 +1717,9 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw)
struct e1000_phy_info *phy = >phy;
s32 ret_val;
u16 phy_data, phy_data2, index, default_page, is_cm;
+   int len_tot = 0;
+   u16 len_min;
+   u16 len_max;
 
switch (hw->phy.id) {
case M88E1543_E_PHY_ID:
@@ -1733,11 +1736,6 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw)
if (ret_val)
goto out;
 
-   /* Get cable length from PHY Cable Diagnostics Control Reg */
-   ret_val = phy->ops.read_reg(hw, I347AT4_PCDL, _data);
-   if (ret_val)
-   goto out;
-
/* Check if the unit of cable length is meters or cm */
ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, _data2);
if (ret_val)
@@ -1745,10 +1743,50 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw)
 
is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT);
 
+   /* Get cable length from Pair 0 length Regs */
+   ret_val = phy->ops.read_reg(hw, I347AT4_PCDL0, _data);
+   if (ret_val)
+   goto out;
+
+   phy->pair_length[0] = phy_data / (is_cm ? 100 : 1);
+   len_tot = phy->pair_length[0];
+   len_min = phy->pair_length[0];
+   len_max = phy->pair_length[0];
+
+   /* Get cable length from Pair 1 length Regs */
+   ret_val = phy->ops.read_reg(hw, I347AT4_PCDL1, _data);
+   if (ret_val)
+   goto out;
+
+   phy->pair_length[1] = phy_data / (is_cm ? 100 : 1);
+   len_tot += phy->pair_length[1];
+   len_min = min(len_min, phy->pair_length[1]);
+   len_max = max(len_max, phy->pair_length[1]);
+
+   /* Get cable length from Pair 2 length Regs */
+   ret_val = phy->ops.read_reg(hw, I347AT4_PCDL2, _data);
+   if (ret_val)
+   goto out;
+
+   phy->pair_length[2] = phy_data / (is_cm ? 100 : 1);
+   len_tot += phy->pair_length[2];
+   len_min = min(len_min, phy->pair_length[2]);
+   len_max = max(len_max, phy->pair_length[2]);
+
+   /* Get cable length from Pair 3 length Regs */
+   ret_val = phy->ops.read_reg(hw, I347AT4_PCDL3, _data);
+   if (ret_val)
+   goto out;
+
+   phy->pair_length[3] = phy_data / (is_cm ? 100 : 1);
+   len_tot += phy->pair_length[3];
+   len_min = min(len_min, phy->pair_length[3]);
+ 

[net-next 8/9] e1000e: Do not write lsc to ics in msi-x mode

2015-12-14 Thread Jeff Kirsher
From: Benjamin Poirier 

In msi-x mode, there is no handler for the lsc interrupt so there is no
point in writing that to ics now that we always assume Other interrupts
are caused by lsc.

Reviewed-by: Jasna Hodzic 
Signed-off-by: Benjamin Poirier 
Tested-by: Aaron Brown 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/e1000e/defines.h |  3 ++-
 drivers/net/ethernet/intel/e1000e/netdev.c  | 28 +---
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/defines.h 
b/drivers/net/ethernet/intel/e1000e/defines.h
index 133d407..f7c7804 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -441,12 +441,13 @@
 #define E1000_IMS_RXQ1  E1000_ICR_RXQ1  /* Rx Queue 1 Interrupt */
 #define E1000_IMS_TXQ0  E1000_ICR_TXQ0  /* Tx Queue 0 Interrupt */
 #define E1000_IMS_TXQ1  E1000_ICR_TXQ1  /* Tx Queue 1 Interrupt */
-#define E1000_IMS_OTHER E1000_ICR_OTHER /* Other Interrupts */
+#define E1000_IMS_OTHER E1000_ICR_OTHER /* Other Interrupt */
 
 /* Interrupt Cause Set */
 #define E1000_ICS_LSC   E1000_ICR_LSC   /* Link Status Change */
 #define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */
 #define E1000_ICS_RXDMT0E1000_ICR_RXDMT0/* Rx desc min. threshold */
+#define E1000_ICS_OTHER E1000_ICR_OTHER /* Other Interrupt */
 
 /* Transmit Descriptor Control */
 #define E1000_TXDCTL_PTHRESH 0x003F /* TXDCTL Prefetch Threshold */
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 56bc422..d59c0bc 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4132,10 +4132,24 @@ void e1000e_reset(struct e1000_adapter *adapter)
 
 }
 
-void e1000e_up(struct e1000_adapter *adapter)
+/**
+ * e1000e_trigger_lsc - trigger an LSC interrupt
+ * @adapter: 
+ *
+ * Fire a link status change interrupt to start the watchdog.
+ **/
+static void e1000e_trigger_lsc(struct e1000_adapter *adapter)
 {
struct e1000_hw *hw = >hw;
 
+   if (adapter->msix_entries)
+   ew32(ICS, E1000_ICS_OTHER);
+   else
+   ew32(ICS, E1000_ICS_LSC);
+}
+
+void e1000e_up(struct e1000_adapter *adapter)
+{
/* hardware has been reset, we need to reload some things */
e1000_configure(adapter);
 
@@ -4147,11 +4161,7 @@ void e1000e_up(struct e1000_adapter *adapter)
 
netif_start_queue(adapter->netdev);
 
-   /* fire a link change interrupt to start the watchdog */
-   if (adapter->msix_entries)
-   ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER);
-   else
-   ew32(ICS, E1000_ICS_LSC);
+   e1000e_trigger_lsc(adapter);
 }
 
 static void e1000e_flush_descriptors(struct e1000_adapter *adapter)
@@ -4576,11 +4586,7 @@ static int e1000_open(struct net_device *netdev)
hw->mac.get_link_status = true;
pm_runtime_put(>dev);
 
-   /* fire a link status change interrupt to start the watchdog */
-   if (adapter->msix_entries)
-   ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER);
-   else
-   ew32(ICS, E1000_ICS_LSC);
+   e1000e_trigger_lsc(adapter);
 
return 0;
 
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[net-next 3/9] igb: Don't add PHY address to PCDL address

2015-12-14 Thread Jeff Kirsher
From: Aaron Sierra 

There is no reason to add the PHY address into the PCDL register address.

Signed-off-by: Aaron Sierra 
Tested-by: Aaron Brown 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/igb/e1000_phy.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c 
b/drivers/net/ethernet/intel/igb/e1000_phy.c
index c906826..8015f3b 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.c
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.c
@@ -1734,8 +1734,7 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw)
goto out;
 
/* Get cable length from PHY Cable Diagnostics Control Reg */
-   ret_val = phy->ops.read_reg(hw, (I347AT4_PCDL + phy->addr),
-   _data);
+   ret_val = phy->ops.read_reg(hw, I347AT4_PCDL, _data);
if (ret_val)
goto out;
 
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[net-next 2/9] igb: Remove GS40G specific defines/functions

2015-12-14 Thread Jeff Kirsher
From: Aaron Sierra 

The I210 internal PHY can be accessed just as well with the access
functions shared by 82580, I350, and I354 devices. A side effect of
relying on the common functions, is that I210 cable length support
is folded back into the common case which effectively reverts the
following commit:

commit 59f301046b276f87483b3afa3201a4273def06a9
Author: Carolyn Wyborny 
Date:   Wed Oct 10 04:42:59 2012 +

igb: Update get cable length function for i210/i211

Cc: Carolyn Wyborny 
Signed-off-by: Aaron Sierra 
Tested-by: Aaron Brown 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/igb/e1000_82575.c | 13 ++---
 drivers/net/ethernet/intel/igb/e1000_i210.c  |  5 +-
 drivers/net/ethernet/intel/igb/e1000_i210.h  |  2 +-
 drivers/net/ethernet/intel/igb/e1000_phy.c   | 82 +---
 drivers/net/ethernet/intel/igb/e1000_phy.h   | 15 +
 5 files changed, 11 insertions(+), 106 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c 
b/drivers/net/ethernet/intel/igb/e1000_82575.c
index 362911d..adb33e2 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -45,8 +45,6 @@ static s32  igb_get_cfg_done_82575(struct e1000_hw *);
 static s32  igb_init_hw_82575(struct e1000_hw *);
 static s32  igb_phy_hw_reset_sgmii_82575(struct e1000_hw *);
 static s32  igb_read_phy_reg_sgmii_82575(struct e1000_hw *, u32, u16 *);
-static s32  igb_read_phy_reg_82580(struct e1000_hw *, u32, u16 *);
-static s32  igb_write_phy_reg_82580(struct e1000_hw *, u32, u16);
 static s32  igb_reset_hw_82575(struct e1000_hw *);
 static s32  igb_reset_hw_82580(struct e1000_hw *);
 static s32  igb_set_d0_lplu_state_82575(struct e1000_hw *, bool);
@@ -205,13 +203,10 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw)
case e1000_82580:
case e1000_i350:
case e1000_i354:
-   phy->ops.read_reg = igb_read_phy_reg_82580;
-   phy->ops.write_reg = igb_write_phy_reg_82580;
-   break;
case e1000_i210:
case e1000_i211:
-   phy->ops.read_reg = igb_read_phy_reg_gs40g;
-   phy->ops.write_reg = igb_write_phy_reg_gs40g;
+   phy->ops.read_reg = igb_read_phy_reg_82580;
+   phy->ops.write_reg = igb_write_phy_reg_82580;
break;
default:
phy->ops.read_reg = igb_read_phy_reg_igp;
@@ -2153,7 +2148,7 @@ void igb_vmdq_set_replication_pf(struct e1000_hw *hw, 
bool enable)
  *  Reads the MDI control register in the PHY at offset and stores the
  *  information read to data.
  **/
-static s32 igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data)
+s32 igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data)
 {
s32 ret_val;
 
@@ -2177,7 +2172,7 @@ out:
  *
  *  Writes data to MDI control register in the PHY at offset.
  **/
-static s32 igb_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data)
+s32 igb_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data)
 {
s32 ret_val;
 
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c 
b/drivers/net/ethernet/intel/igb/e1000_i210.c
index 29f59c7..8aa7987 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.c
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.c
@@ -861,10 +861,10 @@ s32 igb_pll_workaround_i210(struct e1000_hw *hw)
if (ret_val)
nvm_word = E1000_INVM_DEFAULT_AL;
tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL;
+   igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, 
E1000_PHY_PLL_FREQ_PAGE);
for (i = 0; i < E1000_MAX_PLL_TRIES; i++) {
/* check current state directly from internal PHY */
-   igb_read_phy_reg_gs40g(hw, (E1000_PHY_PLL_FREQ_PAGE |
-E1000_PHY_PLL_FREQ_REG), _word);
+   igb_read_phy_reg_82580(hw, E1000_PHY_PLL_FREQ_REG, _word);
if ((phy_word & E1000_PHY_PLL_UNCONF)
!= E1000_PHY_PLL_UNCONF) {
ret_val = 0;
@@ -896,6 +896,7 @@ s32 igb_pll_workaround_i210(struct e1000_hw *hw)
/* restore WUC register */
wr32(E1000_WUC, wuc);
}
+   igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, 0);
/* restore MDICNFG setting */
wr32(E1000_MDICNFG, mdicnfg);
return ret_val;
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h 
b/drivers/net/ethernet/intel/igb/e1000_i210.h
index eaa68a5..b2964a2 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.h
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.h
@@ -85,7 +85,7 @@ enum E1000_INVM_STRUCTURE_TYPE {
 #define E1000_PCI_PMCSR_D3   

[net-next 5/9] igb: Explicitly label self-test result indices

2015-12-14 Thread Jeff Kirsher
From: Joe Schultz 

Previously, the ethtool self-test gstrings/data arrays were accessed via
hardcoded indices, which made the code difficult to follow. This patch
replaces the hardcoded values with enum-based labels.

Signed-off-by: Joe Schultz 
Signed-off-by: Aaron Sierra 
Tested-by: Aaron Brown 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/igb/igb_ethtool.c | 38 ++--
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c 
b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 2529bc6..1d329f1 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -127,10 +127,20 @@ static const struct igb_stats igb_gstrings_net_stats[] = {
 #define IGB_STATS_LEN \
(IGB_GLOBAL_STATS_LEN + IGB_NETDEV_STATS_LEN + IGB_QUEUE_STATS_LEN)
 
+enum igb_diagnostics_results {
+   TEST_REG = 0,
+   TEST_EEP,
+   TEST_IRQ,
+   TEST_LOOP,
+   TEST_LINK
+};
+
 static const char igb_gstrings_test[][ETH_GSTRING_LEN] = {
-   "Register test  (offline)", "Eeprom test(offline)",
-   "Interrupt test (offline)", "Loopback test  (offline)",
-   "Link test   (on/offline)"
+   [TEST_REG]  = "Register test  (offline)",
+   [TEST_EEP]  = "Eeprom test(offline)",
+   [TEST_IRQ]  = "Interrupt test (offline)",
+   [TEST_LOOP] = "Loopback test  (offline)",
+   [TEST_LINK] = "Link test   (on/offline)"
 };
 #define IGB_TEST_LEN (sizeof(igb_gstrings_test) / ETH_GSTRING_LEN)
 
@@ -2002,7 +2012,7 @@ static void igb_diag_test(struct net_device *netdev,
/* Link test performed before hardware reset so autoneg doesn't
 * interfere with test result
 */
-   if (igb_link_test(adapter, [4]))
+   if (igb_link_test(adapter, [TEST_LINK]))
eth_test->flags |= ETH_TEST_FL_FAILED;
 
if (if_running)
@@ -2011,21 +2021,21 @@ static void igb_diag_test(struct net_device *netdev,
else
igb_reset(adapter);
 
-   if (igb_reg_test(adapter, [0]))
+   if (igb_reg_test(adapter, [TEST_REG]))
eth_test->flags |= ETH_TEST_FL_FAILED;
 
igb_reset(adapter);
-   if (igb_eeprom_test(adapter, [1]))
+   if (igb_eeprom_test(adapter, [TEST_EEP]))
eth_test->flags |= ETH_TEST_FL_FAILED;
 
igb_reset(adapter);
-   if (igb_intr_test(adapter, [2]))
+   if (igb_intr_test(adapter, [TEST_IRQ]))
eth_test->flags |= ETH_TEST_FL_FAILED;
 
igb_reset(adapter);
/* power up link for loopback test */
igb_power_up_link(adapter);
-   if (igb_loopback_test(adapter, [3]))
+   if (igb_loopback_test(adapter, [TEST_LOOP]))
eth_test->flags |= ETH_TEST_FL_FAILED;
 
/* restore speed, duplex, autoneg settings */
@@ -2045,16 +2055,16 @@ static void igb_diag_test(struct net_device *netdev,
dev_info(>pdev->dev, "online testing starting\n");
 
/* PHY is powered down when interface is down */
-   if (if_running && igb_link_test(adapter, [4]))
+   if (if_running && igb_link_test(adapter, [TEST_LINK]))
eth_test->flags |= ETH_TEST_FL_FAILED;
else
-   data[4] = 0;
+   data[TEST_LINK] = 0;
 
/* Online tests aren't run; pass by default */
-   data[0] = 0;
-   data[1] = 0;
-   data[2] = 0;
-   data[3] = 0;
+   data[TEST_REG] = 0;
+   data[TEST_EEP] = 0;
+   data[TEST_IRQ] = 0;
+   data[TEST_LOOP] = 0;
 
clear_bit(__IGB_TESTING, >state);
}
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[net-next 1/9] e1000e: Switch e1000e_up to void, drop code checking for error result

2015-12-14 Thread Jeff Kirsher
From: Alexander Duyck 

The function e1000e_up always returns 0.  As such we can convert it to a
void and just ignore the results.  This allows us to drop some code in a
couple spots as we no longer need to worry about non-zero return values.

Signed-off-by: Alexander Duyck 
Tested-by: Aaron Brown 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/e1000e/e1000.h  |  2 +-
 drivers/net/ethernet/intel/e1000e/netdev.c | 15 ---
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h 
b/drivers/net/ethernet/intel/e1000e/e1000.h
index 0b748d1..1dc293b 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -480,7 +480,7 @@ extern const char e1000e_driver_version[];
 void e1000e_check_options(struct e1000_adapter *adapter);
 void e1000e_set_ethtool_ops(struct net_device *netdev);
 
-int e1000e_up(struct e1000_adapter *adapter);
+void e1000e_up(struct e1000_adapter *adapter);
 void e1000e_down(struct e1000_adapter *adapter, bool reset);
 void e1000e_reinit_locked(struct e1000_adapter *adapter);
 void e1000e_reset(struct e1000_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 775e389..955c8c7 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4146,7 +4146,7 @@ void e1000e_reset(struct e1000_adapter *adapter)
 
 }
 
-int e1000e_up(struct e1000_adapter *adapter)
+void e1000e_up(struct e1000_adapter *adapter)
 {
struct e1000_hw *hw = >hw;
 
@@ -4166,8 +4166,6 @@ int e1000e_up(struct e1000_adapter *adapter)
ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER);
else
ew32(ICS, E1000_ICS_LSC);
-
-   return 0;
 }
 
 static void e1000e_flush_descriptors(struct e1000_adapter *adapter)
@@ -6633,7 +6631,7 @@ static int e1000e_pm_runtime_resume(struct device *dev)
return rc;
 
if (netdev->flags & IFF_UP)
-   rc = e1000e_up(adapter);
+   e1000e_up(adapter);
 
return rc;
 }
@@ -6824,13 +6822,8 @@ static void e1000_io_resume(struct pci_dev *pdev)
 
e1000_init_manageability_pt(adapter);
 
-   if (netif_running(netdev)) {
-   if (e1000e_up(adapter)) {
-   dev_err(>dev,
-   "can't bring device back up after reset\n");
-   return;
-   }
-   }
+   if (netif_running(netdev))
+   e1000e_up(adapter);
 
netif_device_attach(netdev);
 
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCHSET v4] netfilter, cgroup: implement cgroup2 path match in xt_cgroup

2015-12-14 Thread Dexuan Cui
iptables extension as a reply.  diffstat follows.  Thanks.
> 
>  fs/kernfs/dir.c  |   46 +++
>  include/linux/cgroup-defs.h  |  126 
> +++
>  include/linux/cgroup.h   |   66 +++-
>  include/linux/kernfs.h   |   12 ++
>  include/net/cls_cgroup.h |   11 +-
>  include/net/netprio_cgroup.h |   16 +++
>  include/net/sock.h   |   13 ---
>  include/uapi/linux/netfilter/xt_cgroup.h |   15 +++
>  kernel/cgroup.c  |  126 
> ---
>  net/Kconfig  |6 +
>  net/core/dev.c   |3
>  net/core/netclassid_cgroup.c |   11 +-
>  net/core/netprio_cgroup.c|   19 
>  net/core/scm.c   |4
>  net/core/sock.c  |   17 
>  net/netfilter/nft_meta.c |2
>  net/netfilter/xt_cgroup.c|  108 ++
>  17 files changed, 513 insertions(+), 88 deletions(-)
> 
> --
> tejun

Hi Tejun,
With today's linux-next (next-20151214), I still got the same back trace, which
was previously reported at http://lists.openwall.net/netdev/2015/11/23/80:

[   15.129701] BUG: spinlock bad magic on CPU#6, (systemd)/1012
[   15.129701]  lock: cgroup_sk_update_lock+0x0/0x40, .magic: , .owner: 
/-1, .owner_cpu: 0
[   15.129701] CPU: 6 PID: 1012 Comm: (systemd) Not tainted 
4.4.0-rc4-next-20151214+ #3
[   15.129701] Hardware name: Microsoft Corporation Virtual Machine/Virtual 
Machine, BIOS 090006  05/23/2012
[   15.129701]  ae6cddc0 8800e158bab0 ad317212 

[   15.129701]  8800e158bad0 ad0a1b8c ae6cddc0 
ad800ee6
[   15.129701]  8800e158baf0 ad0a1c06 ae6cddc0 
8800ead9f080
[   15.129701] Call Trace:
[   15.129701]  [] dump_stack+0x44/0x62
[   15.129701]  [] spin_dump+0x7c/0xd0
[   15.129701]  [] spin_bug+0x26/0x30
[   15.129701]  [] do_raw_spin_lock+0xe5/0x120
[   15.129701]  [] _raw_spin_lock+0x39/0x40
[   15.129701]  [] ? update_classid_sock+0x33/0x80
[   15.129701]  [] update_classid_sock+0x33/0x80
[   15.129701]  [] ? write_classid+0x30/0x30
[   15.129701]  [] iterate_fd+0x5a/0x90
[   15.129701]  [] update_classid+0x47/0x80
[   15.129701]  [] cgrp_attach+0x25/0x30
[   15.129701]  [] cgroup_taskset_migrate+0x14b/0x280
[   15.129701]  [] cgroup_migrate+0xbf/0x100
[   15.129701]  [] ? cgroup_migrate+0x5/0x100
[   15.129701]  [] cgroup_attach_task+0xb5/0x100
[   15.129701]  [] ? cgroup_attach_task+0x5/0x100
[   15.129701]  [] __cgroup_procs_write+0x1da/0x310
[   15.129701]  [] ? __cgroup_procs_write+0x5e/0x310
[   15.129701]  [] cgroup_procs_write+0x14/0x20
[   15.129701]  [] cgroup_file_write+0x40/0x130
[   15.129701]  [] kernfs_fop_write+0x130/0x180
[   15.129701]  [] __vfs_write+0x28/0xe0
[   15.129701]  [] ? percpu_down_read+0x3c/0x90
[   15.129701]  [] ? __sb_start_write+0xdc/0xf0
[   15.129701]  [] ? __sb_start_write+0xdc/0xf0
[   15.129701]  [] vfs_write+0xa9/0x190
[   15.129701]  [] SyS_write+0x49/0xa0
[   15.129701]  [] entry_SYSCALL_64_fastpath+0x16/0x7a

My kernel config is attached FYI.

Thanks,
-- Dexuan


kernnel.config
Description: kernnel.config


[RFCv4 bluetooth-next 2/2] ipv6: add 6co as icmpv6 userspace option

2015-12-14 Thread Alexander Aring
This patch adds the 6LoWPAN Context Option (6CO) as userspace option to
processing such options inside RA messages in userspace.

Cc: David S. Miller 
Cc: Alexey Kuznetsov 
Cc: James Morris 
Cc: Hideaki YOSHIFUJI 
Cc: Patrick McHardy 
Signed-off-by: Alexander Aring 
---
 include/net/ndisc.h | 1 +
 net/ipv6/ndisc.c| 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 2d8edaa..944258d 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -35,6 +35,7 @@ enum {
ND_OPT_ROUTE_INFO = 24, /* RFC4191 */
ND_OPT_RDNSS = 25,  /* RFC5006 */
ND_OPT_DNSSL = 31,  /* RFC6106 */
+   ND_OPT_6CO = 34,/* RFC6775 */
__ND_OPT_MAX
 };
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index d6161e1..bed154e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -188,7 +188,8 @@ static struct nd_opt_hdr *ndisc_next_option(struct 
nd_opt_hdr *cur,
 static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
 {
return opt->nd_opt_type == ND_OPT_RDNSS ||
-   opt->nd_opt_type == ND_OPT_DNSSL;
+   opt->nd_opt_type == ND_OPT_DNSSL ||
+   opt->nd_opt_type == ND_OPT_6CO;
 }
 
 static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFCv4 bluetooth-next 0/2] 6lowpan: 6co and stateful compression support

2015-12-14 Thread Alexander Aring
Hi,

this patch series adds stateful compression support and add 6co option as a new
userspace option for processing RA messages inside userspace.

I am not sure if "6CO" handling inside userspace is the best option here.

I will send also "radvd" patches which introduce a very "basic" support for
processing(non 6LBR)/manage(6LBR) 6CO option fields. These patches doesn't
support lifetime handling of contexts. There exists the question as well if
we should handle the lifetime handling inside userspace or kernelspace.

I am currently follow this approach:
If we doesn't need it inside the kernelspace, then we should handle it in
userspace.

It's difficult to figure out if we really can it handle inside userspace only.

RFC6775 describes some different roles inside the network:

 - 6LN (6LoWPAN Node)
 - 6LR (Router inside 6LoWPAN network)
 - 6LBR ($IP_NETWORK <-> 6LoWPAN network)

Processing ICMPv6 (RA/RS, NA/NS) messages may be different for each role. I
currently have not the full overlook inside RFC6775 and sometimes (as example
of ABRO field, another Option-Field for 6LoWPAN) says:

8.1.3.  Routers Processing Router Advertisements
Note: (I suppose this is for 6LR only!)
If a received RA does not contain an ABRO, then the RA MUST be silently
ignored.

---

For my knowledge such handling need to be inside kernelspace. This is filter
functionality only, processing can be handled inside userspace (which needs ABRO
also as userspace option at first), but then the kernel need to know which "role
(6LN, 6LR, 6LBR)" the interface has.

- Alex

changes since v4:
 - remove patches for adding debugfs which are already upstream.
 - add "ipv6: add 6co as icmpv6 userspace option"
 - fix transmit check on (cid) instead (sci || dci) for adding CID inline
   data. If CID is zero it will be compressed.
 - remove "dci_table, sci_table, mcast_table" we have "ctx_table" only.
 - Change enabled with "u32 flags" since we need more information than
   "enabled" only. We handle also "compression flag" now.

Alexander Aring (2):
  6lowpan: iphc: add support for stateful compression
  ipv6: add 6co as icmpv6 userspace option

 include/net/6lowpan.h |  31 
 include/net/ndisc.h   |   1 +
 net/6lowpan/core.c|   6 +-
 net/6lowpan/debugfs.c |  97 
 net/6lowpan/iphc.c| 420 +++---
 net/ipv6/ndisc.c  |   3 +-
 6 files changed, 499 insertions(+), 59 deletions(-)

-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: use-after-free in sctp_do_sm

2015-12-14 Thread Vlad Yasevich
On 12/14/2015 04:50 AM, David Laight wrote:
> From: Vlad Yasevich
>> Sent: 11 December 2015 18:38
> ...
>>> Found a similar place in abort primitive handling like in this last
>>> patch update, it's probably the issue you're still triggering.
>>>
>>> Also found another place that may lead to this use after free, in case
>>> we receive a packet with a chunk that has no data.
>>>
>>> Oh my.. :)
>>
>> Yes.  This is what I was worried about...  Anything that triggers
>> a DELTE_TCB command has to return a code that we can trap.
>>
>> The other way is to do what Dmitri suggested, but even there, we
>> need to be very careful.
> 
> I'm always wary of anything that queues actions up for later processing.
> It is far too easy (as found here) to end up processing actions
> in invalid states, or to process actions in 'unusual' orders when
> specific events happen close together.
> 
> I wonder how much fallout there'd be from getting the sctp code
> to immediately action things, instead of queuing the actions for later.
> It would certainly remove a lot of the unusual combinations of events.
> 

We've bandied this idea around for a while, but no one has had the time
to tackle this.  This would be rather time-consuming task, but in the end
might be a good idea.

-vlad

>   David
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFCv4 bluetooth-next 1/2] 6lowpan: iphc: add support for stateful compression

2015-12-14 Thread Alexander Aring
This patch introduce support for IPHC stateful address compression. It
will offer the context table via one debugfs entry.

Example to setup a context id:

A "cat /sys/kernel/debug/6lowpan/lowpan0/ctx_table" will display all
contexts which are available. Example:

ID ipv6-address/prefix-length  flags
0  :::::::/0   0
1  :::::::/0   0
2  :::::::/0   0
3  :::::::/0   0
4  :::::::/0   0
5  :::::::/0   0
6  :::::::/0   0
7  :::::::/0   0
8  :::::::/0   0
9  :::::::/0   0
10 :::::::/0   0
11 :::::::/0   0
12 :::::::/0   0
13 :::::::/0   0
14 :::::::/0   0
15 :::::::/0   0

For setting a context e.g. context id 0, context 2001::, prefix-length
64.

Hint: Simple copy one line and then maniuplate it.

echo "0 2001:::::::/64 3" >
/sys/kernel/debug/6lowpan/lowpan0/ctx_table

The flags are currently two:

 - BIT(0) - active: entry is added or deleted to the ctx_table.
 - BIT(1) - c: compression flag according rfc6775.

On transmit side:

The IPHC code will automatically search for a context which would be
match for the address. Then it will be use the context with the
best compression method. Means the longest prefix which match will be
used.

Example:

2001::/126 vs 2001::/127 - the 2001::/127 can be full compressed if the
last bit of the address which has the prefix 2001::/127 is the same like
the IID from the Encapsulating Header. A context ID can also be a
2001::1/128, which is then a full ipv6 address.

On Receive side:

If there is a context defined (when CID not available then it's the
default context 0) then it will be used, if the header doesn't set
SAC or DAC bit thens, it will be dropped.

Signed-off-by: Alexander Aring 
---
 include/net/6lowpan.h |  31 
 net/6lowpan/core.c|   6 +-
 net/6lowpan/debugfs.c |  97 
 net/6lowpan/iphc.c| 420 +++---
 4 files changed, 496 insertions(+), 58 deletions(-)

diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h
index 2f6a3f2..db636c8 100644
--- a/include/net/6lowpan.h
+++ b/include/net/6lowpan.h
@@ -75,6 +75,8 @@
 #define LOWPAN_IPHC_MAX_HC_BUF_LEN (sizeof(struct ipv6hdr) +   \
 LOWPAN_IPHC_MAX_HEADER_LEN +   \
 LOWPAN_NHC_MAX_HDR_LEN)
+/* SCI/DCI is 4 bit width, so we have maximum 16 entries */
+#define LOWPAN_IPHC_CI_TABLE_SIZE  (1 << 4)
 
 #define LOWPAN_DISPATCH_IPV6   0x41 /* 0101 = 65 */
 #define LOWPAN_DISPATCH_IPHC   0x60 /* 011x = ... */
@@ -98,9 +100,38 @@ enum lowpan_lltypes {
LOWPAN_LLTYPE_IEEE802154,
 };
 
+enum lowpan_iphc_ctx_flags {
+   LOWPAN_IPHC_CTX_FLAG_ACTIVE = BIT(0),
+   LOWPAN_IPHC_CTX_FLAG_C  = BIT(1),
+};
+
+struct lowpan_iphc_ctx {
+   u8 id;
+   struct in6_addr pfx;
+   u8 plen;
+   u32 flags;
+};
+
+struct lowpan_iphc_ctx_table {
+   spinlock_t lock;
+   const struct lowpan_iphc_ctx_ops *ops;
+   struct lowpan_iphc_ctx table[LOWPAN_IPHC_CI_TABLE_SIZE];
+};
+
+static inline bool lowpan_iphc_ctx_is_active(const struct lowpan_iphc_ctx *ctx)
+{
+   return ctx->flags & LOWPAN_IPHC_CTX_FLAG_ACTIVE;
+}
+
+static inline bool lowpan_iphc_ctx_is_c(const struct lowpan_iphc_ctx *ctx)
+{
+   return ctx->flags & LOWPAN_IPHC_CTX_FLAG_C;
+}
+
 struct lowpan_priv {
enum lowpan_lltypes lltype;
struct dentry *iface_debugfs;
+   struct lowpan_iphc_ctx_table ctx;
 
/* must be last */
u8 priv[0] __aligned(sizeof(void *));
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index c7f06f5..772f51c 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -20,7 +20,7 @@
 int lowpan_register_netdevice(struct net_device *dev,
  enum lowpan_lltypes lltype)
 {
-   int ret;
+   int i, ret;
 
dev->addr_len = EUI64_ADDR_LEN;
dev->type = ARPHRD_6LOWPAN;
@@ -29,6 +29,10 @@ int lowpan_register_netdevice(struct net_device *dev,
 
lowpan_priv(dev)->lltype = lltype;
 
+   spin_lock_init(_priv(dev)->ctx.lock);
+   for (i = 0; i < LOWPAN_IPHC_CI_TABLE_SIZE; i++)
+   lowpan_priv(dev)->ctx.table[i].id = i;
+
ret = lowpan_dev_debugfs_init(dev);
if (ret < 0)
return ret;
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
index 88eef84..5270fa1 100644
--- a/net/6lowpan/debugfs.c
+++ b/net/6lowpan/debugfs.c
@@ 

Re: [V2 PATCH 1/1] net: sctp: dynamically enable or disable pf state

2015-12-14 Thread Vlad Yasevich
On 12/14/2015 01:22 AM, zyjzyj2...@gmail.com wrote:
> From: Zhu Yanjun 
> 
> As we all know, the value of pf_retrans >= max_retrans_path can
> disable pf state. The variables of pf_retrans and max_retrans_path
> can be changed by the user space application.
> 
> Sometimes the user expects to disable pf state while the 2
> variables are changed to enable pf state. So it is necessary to
> introduce a new variable to disable pf state.
> 
> According to the suggestions from Vlad Yasevich, extra1 and extra2
> are removed. The initialization of pf_enable is added.
> 
> Signed-off-by: Zhu Yanjun 

Acked-by: Vlad Yasevich 

-vlad

> ---
>  include/net/netns/sctp.h |7 +++
>  net/sctp/protocol.c  |3 +++
>  net/sctp/sm_sideeffect.c |5 -
>  net/sctp/sysctl.c|7 +++
>  4 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
> index 8ba379f..c501d67 100644
> --- a/include/net/netns/sctp.h
> +++ b/include/net/netns/sctp.h
> @@ -89,6 +89,13 @@ struct netns_sctp {
>   int pf_retrans;
>  
>   /*
> +  * Disable Potentially-Failed feature, the feature is enabled by default
> +  * pf_enable-  0  : disable pf
> +  *  - >0  : enable pf
> +  */
> + int pf_enable;
> +
> + /*
>* Policy for preforming sctp/socket accounting
>* 0   - do socket level accounting, all assocs share sk_sndbuf
>* 1   - do sctp accounting, each asoc may use sk_sndbuf bytes
> diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
> index 4d9912f..571a631 100644
> --- a/net/sctp/protocol.c
> +++ b/net/sctp/protocol.c
> @@ -1223,6 +1223,9 @@ static int __net_init sctp_defaults_init(struct net 
> *net)
>   /* Max.Burst- 4 */
>   net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST;
>  
> + /* Enable pf state by default */
> + net->sctp.pf_enable = 1;
> +
>   /* Association.Max.Retrans  - 10 attempts
>* Path.Max.Retrans - 5  attempts (per destination address)
>* Max.Init.Retransmits - 8  attempts
> diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
> index 6098d4c..05cd164 100644
> --- a/net/sctp/sm_sideeffect.c
> +++ b/net/sctp/sm_sideeffect.c
> @@ -477,6 +477,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t 
> *commands,
>struct sctp_transport *transport,
>int is_hb)
>  {
> + struct net *net = sock_net(asoc->base.sk);
> +
>   /* The check for association's overall error counter exceeding the
>* threshold is done in the state function.
>*/
> @@ -503,7 +505,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t 
> *commands,
>* is SCTP_ACTIVE, then mark this transport as Partially Failed,
>* see SCTP Quick Failover Draft, section 5.1
>*/
> - if ((transport->state == SCTP_ACTIVE) &&
> + if (net->sctp.pf_enable &&
> +(transport->state == SCTP_ACTIVE) &&
>  (asoc->pf_retrans < transport->pathmaxrxt) &&
>  (transport->error_count > asoc->pf_retrans)) {
>  
> diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
> index 26d50c5..ccbfc93 100644
> --- a/net/sctp/sysctl.c
> +++ b/net/sctp/sysctl.c
> @@ -308,6 +308,13 @@ static struct ctl_table sctp_net_table[] = {
>   .extra1 = _autoclose_min,
>   .extra2 = _autoclose_max,
>   },
> + {
> + .procname   = "pf_enable",
> + .data   = _net.sctp.pf_enable,
> + .maxlen = sizeof(int),
> + .mode   = 0644,
> + .proc_handler   = proc_dointvec,
> + },
>  
>   { /* sentinel */ }
>  };
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] net: phy: mdio-mux: Check return value of mdiobus_alloc()

2015-12-14 Thread Tobias Klauser
mdiobus_alloc() might return NULL, but its return value is not
checked in mdio_mux_init(). This could potentially lead to a NULL
pointer dereference. Fix it by checking the return value

Fixes: 0ca2997d1452 ("netdev/of/phy: Add MDIO bus multiplexer support.")
Signed-off-by: Tobias Klauser 
---
 drivers/net/phy/mdio-mux.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
index 908e8d486342..7f8e7662e28c 100644
--- a/drivers/net/phy/mdio-mux.c
+++ b/drivers/net/phy/mdio-mux.c
@@ -149,9 +149,14 @@ int mdio_mux_init(struct device *dev,
}
cb->bus_number = v;
cb->parent = pb;
+
cb->mii_bus = mdiobus_alloc();
+   if (!cb->mii_bus) {
+   ret_val = -ENOMEM;
+   of_node_put(child_bus_node);
+   break;
+   }
cb->mii_bus->priv = cb;
-
cb->mii_bus->irq = cb->phy_irq;
cb->mii_bus->name = "mdio_mux";
snprintf(cb->mii_bus->id, MII_BUS_ID_SIZE, "%x.%x",
-- 
2.6.3.368.gf34be46.dirty


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net] openvswitch: fix trivial comment typo

2015-12-14 Thread Paolo Abeni
The commit 33db4125ec74 ("openvswitch: Rename LABEL->LABELS") left
over an old OVS_CT_ATTR_LABEL instance, fix it.

Fixes: 33db4125ec74 ("openvswitch: Rename LABEL->LABELS")
Signed-off-by: Paolo Abeni 
---
 include/uapi/linux/openvswitch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 28ccedd..a27222d 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -628,7 +628,7 @@ struct ovs_action_hash {
  * @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the
  * mask, the corresponding bit in the value is copied to the connection
  * tracking mark field in the connection.
- * @OVS_CT_ATTR_LABEL: %OVS_CT_LABELS_LEN value followed by %OVS_CT_LABELS_LEN
+ * @OVS_CT_ATTR_LABELS: %OVS_CT_LABELS_LEN value followed by %OVS_CT_LABELS_LEN
  * mask. For each bit set in the mask, the corresponding bit in the value is
  * copied to the connection tracking label field in the connection.
  * @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG.
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC radvd 2/2] radvd: rework 6CO handling

2015-12-14 Thread Alexander Aring
Current issues with the 6CO handling:
 - Doesn't work on little endian at my side because forgotten
   byteordering handling at bitfields.
 - There can be multiple 6CO options. Up to 16 6CO options at maximum.
 - It doesn't work as it should. Maybe for some use-case somebody need
   that, but 6CO contains information for header parsing and this need
   functionality to tell it the kernel. Currently we have a debugfs entry
   for that.

As an example, RFC6775 describes the 6LBR should be configurated and
managed the context entries of RFC6282.

interface lowpan0
{
Adv6LBR on;
AdvSendAdvert on;
UnicastOnly on;
AdvCurHopLimit 255;

prefix 2001::/64 {
AdvOnLink on;
AdvAutonomous on;
AdvRouterAddr on;
};

lowpanco {
ctx 0 {
AdvContextCompressionFlag on;
AdvContextLength 64;
AdvContextPrefix 2001::;
AdvLifeTime 1000;
};
};
};

If we set "Adv6LBR" to on, then the "lowpanco" contexts will be setup
during startup of radvd, otherwise all contexts are empty (non active).

I changed the parsing of contexts:
 - lowpanco contains up-to 16 contexts with _unique_ id's.
 - The id is after "ctx" specified.

What doesn't work:
 - Lifetime handling.
 - AdvContextCompressionFlag should be 0 at first to propagate "safety"
   the context inside the context. RFC6775 says here:

   New context information SHOULD be introduced into the LoWPAN with C=0,
   to ensure that it is known by all nodes that may have to perform header
   decompression based on this context information. Only when it is
   reasonable to assume that this information was successfully
   disseminated SHOULD an option with C=1 be sent, enabling the actual
   use of the context information for compression

   I know what this means, but then don't know "when" we can do "C=1",
   maybe this is out-of-scope in RFC6775.

Note:
 I ignore the ABRO for now. The ABRO need to be included and the version
 fields indicates if new context or old context information. This is
 just to begin with something to handle 6CO.

Signed-off-by: Alexander Aring 
---
 defaults.h  |  3 +++
 device-bsd44.c  |  6 ++
 device-linux.c  | 35 +++
 gram.y  | 55 ++-
 pathnames.h |  1 +
 privsep-linux.c | 55 ++-
 process.c   | 48 
 radvd.c |  6 ++
 radvd.h | 21 +++--
 scanner.l   |  4 +++-
 send.c  | 33 -
 11 files changed, 245 insertions(+), 22 deletions(-)

diff --git a/defaults.h b/defaults.h
index fedd546..a328793 100644
--- a/defaults.h
+++ b/defaults.h
@@ -125,6 +125,9 @@
 
 #define MAX_PrefixLen  128
 
+/* RFC6282 Constraints */
+#define MAX_CIDLen 16
+
 /* SLAAC (RFC4862) Constants and Derived Values */
 #define MIN_AdvValidLifetime   7200/* 2 hours in secs */
 
diff --git a/device-bsd44.c b/device-bsd44.c
index f1aacca..6d4d838 100644
--- a/device-bsd44.c
+++ b/device-bsd44.c
@@ -143,6 +143,12 @@ int set_interface_retranstimer(const char *iface, uint32_t 
rettimer)
return -1;
 }
 
+int set_interface_6ctx(const struct Interface *iface, struct AdvLowpanCtx ctx)
+{
+   dlog(LOG_DEBUG, 4, "update 6LoWPAN context not supported");
+   return 0;
+}
+
 int check_ip6_forwarding(void)
 {
dlog(LOG_DEBUG, 4, "checking ipv6 forwarding not supported");
diff --git a/device-linux.c b/device-linux.c
index 7301927..c9b516f 100644
--- a/device-linux.c
+++ b/device-linux.c
@@ -86,6 +86,30 @@ int update_device_info(int sock, struct Interface *iface)
case ARPHRD_6LOWPAN:
iface->sllao.if_hwaddr_len = 64;
iface->sllao.if_prefix_len = 64;
+
+   if (iface->state_info.configured)
+   break;
+
+   /* if nothing specified use a empy AdvLowpanCoList as default */
+   if (!iface->AdvLowpanCoList) {
+   iface->AdvLowpanCoList = malloc(sizeof(struct 
AdvLowpanCo));
+   if (iface->AdvLowpanCoList == NULL) {
+   flog(LOG_ERR, "AdvLowpanCo allocation failed");
+   return -2;
+   }
+
+   memset(iface->AdvLowpanCoList, 0, sizeof(struct 
AdvLowpanCo));
+   } else {
+   /* If the LoWPAN uses header compression [RFC6282] with 
context, then
+* the 6LBR must be configured with context information 
and related
+* CIDs. Zero all if non 6LBR.
+*/
+   if (!iface->Adv6LBR)
+  

[RFC radvd 0/2] radvd: 6lowpan 6CO testing patches

2015-12-14 Thread Alexander Aring
Hi,

this patch is for testing 6CO fields in RA messages with the help of radvd.

I tested it with the following configuration and two or more nodes which can
directly reach each other.

On one node (6LBR, if more 6LBR they need to have the same context
information!):

interface lowpan0
{
Adv6LBR on;
AdvSendAdvert on;
UnicastOnly on;
AdvCurHopLimit 255;

prefix 2001::/64 {
AdvOnLink on;
AdvAutonomous on;
AdvRouterAddr on;
};

lowpanco {
ctx 0 {
AdvContextCompressionFlag on;
AdvContextLength 64;
AdvContextPrefix 2001::;
AdvLifeTime 1000;
};
};
};

On other nodes:

interface lowpan0
{
Adv6LBR off;
AdvSendAdvert off;
};

Then you can see that the assigned global link 2001::/64 address will
be compressed. Tested with: ping6 and wireshark on $WPAN interface, when
using 802.15.4 6LoWPAN.

- Alex

Alexander Aring (2):
  device-linux: replace ARPHRD_IEEE802154 to ARPHRD_6LOWPAN
  radvd: rework 6CO handling

 defaults.h  |  3 +++
 device-bsd44.c  |  6 ++
 device-linux.c  | 46 +++---
 gram.y  | 55 ++-
 pathnames.h |  1 +
 privsep-linux.c | 55 ++-
 process.c   | 48 
 radvd.c |  6 ++
 radvd.h | 21 +++--
 scanner.l   |  4 +++-
 send.c  | 33 -
 11 files changed, 253 insertions(+), 25 deletions(-)

-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC radvd 1/2] device-linux: replace ARPHRD_IEEE802154 to ARPHRD_6LOWPAN

2015-12-14 Thread Alexander Aring
This patch changes the ARPHRD_IEEE802154 to ARPHRD_6LOWPAN. The IEEE
802.15.4 6lowpan module changed the ARPHRD_IEEE802154 type to
ARPHRD_6LOWPAN. Nowadays it's use ARPHRD_6LOWPAN which is also used by
BTLE 6LoWPAN. Both interfaces uses an EUI64 address and the handling to
get the link-layer address should be the same.

There is no backward compatibility for 802.15.4 6LoWPAN before we
changed the ARPHRD. Anyway if somebody wants that it should be patched
manually. When the ARPHRD was ARPHRD_IEEE802154 the 802.15.4 6lowpan was
anyway in a somehow unusable state.

Cc: linux-blueto...@vger.kernel.org
Cc: linux-w...@vger.kernel.org
Cc: Oleg Hahm 
Signed-off-by: Alexander Aring 
---
 device-linux.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/device-linux.c b/device-linux.c
index de83f2e..7301927 100644
--- a/device-linux.c
+++ b/device-linux.c
@@ -22,6 +22,10 @@
 #define IPV6_ADDR_LINKLOCAL   0x0020U
 #endif
 
+#ifndef ARPHRD_6LOWPAN
+#define ARPHRD_6LOWPAN 825 /* IPv6 over LoWPAN */
+#endif
+
 static char const *hwstr(unsigned short sa_family);
 
 /*
@@ -79,12 +83,10 @@ int update_device_info(int sock, struct Interface *iface)
iface->sllao.if_maxmtu = -1;
break;
 #endif /* ARPHDR_ARCNET */
-#ifdef ARPHRD_IEEE802154
-   case ARPHRD_IEEE802154:
+   case ARPHRD_6LOWPAN:
iface->sllao.if_hwaddr_len = 64;
iface->sllao.if_prefix_len = 64;
break;
-#endif
default:
iface->sllao.if_hwaddr_len = -1;
iface->sllao.if_prefix_len = -1;
@@ -382,6 +384,9 @@ static char const *hwstr(unsigned short sa_family)
rc = "ARPHRD_IEEE802154_PHY";
break;
 #endif
+   case ARPHRD_6LOWPAN:
+   rc = "ARPHRD_6LOWPAN";
+   break;
case ARPHRD_VOID:
rc = "ARPHRD_VOID";
break;
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/1] net: bonding: remove redudant brackets

2015-12-14 Thread Nicolas Dichtel

Le 12/12/2015 02:03, David Miller a écrit :

From: 



From: yzhu1 



Signed-off-by: yzhu1 


Does not apply to the net-next tree.

Also three different emails ...
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] vlan: Fix untag operations of stacked vlans with REORDER_HEADER off

2015-12-14 Thread Nicolas Dichtel

Le 16/11/2015 21:43, Vladislav Yasevich a écrit :

When we have multiple stacked vlan devices all of which have
turned off REORDER_HEADER flag, the untag operation does not
locate the ethernet addresses correctly for nested vlans.
The reason is that in case of REORDER_HEADER flag being off,
the outer vlan headers are put back and the mac_len is adjusted
to account for the presense of the header.  Then, the subsequent
untag operation, for the next level vlan, always use VLAN_ETH_HLEN
to locate the begining of the ethernet header and that ends up
being a multiple of 4 bytes short of the actuall beginning
of the mac header (the multiple depending on the how many vlan
encapsulations ethere are).

As a reslult, if there are multiple levles of vlan devices
with REODER_HEADER being off, the recevied packets end up
being dropped.

To solve this, we use skb->mac_len as the offset.  The value
is always set on receive path and starts out as a ETH_HLEN.
The value is also updated when the vlan header manupations occur
so we know it will be correct.

Signed-off-by: Vladislav Yasevich 
---
  net/core/skbuff.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fab4599..160193f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4268,7 +4268,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct 
sk_buff *skb)
return NULL;
}

-   memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+   memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len,
+   2 * ETH_ALEN);
skb->mac_header += VLAN_HLEN;
return skb;
  }


This patch breaks the following test case: a vlan packet is received by an
e1000 interface. Here is the configuration of the interface:
$ ethtool -k ntfp2 | grep "vlan\|offload"
tcp-segmentation-offload: off
udp-fragmentation-offload: off [fixed]
generic-segmentation-offload: on
generic-receive-offload: on
large-receive-offload: off [fixed]
rx-vlan-offload: off
tx-vlan-offload: off [fixed]
rx-vlan-filter: on [fixed]
vlan-challenged: off [fixed]
tx-vlan-stag-hw-insert: off [fixed]
rx-vlan-stag-hw-parse: off [fixed]
rx-vlan-stag-filter: off [fixed]
l2-fwd-offload: off [fixed]

The vlan header is not removed by the driver. It calls dev_gro_receive() which
sets the network header to +14, thus mac_len is also sets to 14 and
skb_reorder_vlan_header() do a wrong memmove() (the packet is dropped).
Not sure who is responsible to update mac_len before skb_vlan_untag() is
called. Any suggestions?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Double free of dst_entry in ipv4_dst_destroy()

2015-12-14 Thread Eric Dumazet
On Mon, 2015-12-14 at 11:28 -0500, dwil...@us.ibm.com wrote:
> Eric -
> With this patch applied the test ran clean for 2 days.
> 
> Thanks for your help.

Excellent ! 

Thanks a lot David, I will submit it formally with your 'Reported-by'
and 'Tested-by'

I have no idea why this took so long to discover this race.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net] netlink: fix boolean evaluation on bound

2015-12-14 Thread Hannes Frederic Sowa
portid may be 0, thus bound will set the flag to false for in-kernel
created netlink sockets.

Fixes: da314c9923fed55 ("netlink: Replace rhash_portid with bound")
Cc: Herbert Xu 
Signed-off-by: Hannes Frederic Sowa 
---
This patch should not affect anything and is just meant to close this
loophole in future. I based it on net, but you can also apply it to
net-next.

 net/netlink/af_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 59651af8cc2705..278e94c3c7f6d1 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1137,7 +1137,7 @@ static int netlink_insert(struct sock *sk, u32 portid)
 
/* We need to ensure that the socket is hashed and visible. */
smp_wmb();
-   nlk_sk(sk)->bound = portid;
+   nlk_sk(sk)->bound = true;
 
 err:
release_sock(sk);
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] net: udp: local checksum offload for encapsulation

2015-12-14 Thread Tom Herbert
On Mon, Dec 14, 2015 at 7:13 AM, Edward Cree  wrote:
> The arithmetic properties of the ones-complement checksum mean that a
>  correctly checksummed inner packet, including its checksum, has a ones
>  complement sum depending only on whatever value was used to initialise
>  the checksum field before checksumming (in the case of TCP and UDP,
>  this is the ones complement sum of the pseudo header, complemented).
> Consequently, if we are going to offload the inner checksum with
>  CHECKSUM_PARTIAL, we can compute the outer checksum based only on the
>  packed data not covered by the inner checksum, and the initial value of
>  the inner checksum field.
>
> Signed-off-by: Edward Cree 
> ---
>  net/ipv4/udp.c | 31 +++
>  1 file changed, 27 insertions(+), 4 deletions(-)
>
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 0c7b0e6..07d679e 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -767,12 +767,35 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
>  {
> struct udphdr *uh = udp_hdr(skb);
>
> -   if (nocheck)
> +   if (nocheck) {
> uh->check = 0;
> -   else if (skb_is_gso(skb))
> +   } else if (skb_is_gso(skb)) {
> uh->check = ~udp_v4_check(len, saddr, daddr, 0);
> -   else if (skb_dst(skb) && skb_dst(skb)->dev &&
> -(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
> +   } else if (skb->ip_summed == CHECKSUM_PARTIAL &&
> +  skb_dst(skb) && skb_dst(skb)->dev &&
> +  (skb_dst(skb)->dev->features & NETIF_F_HW_CSUM)) {
> +   /* Everything from csum_start onwards will be
> +* checksummed and will thus have a sum of whatever
> +* we previously put in the checksum field (eg. sum
> +* of pseudo-header)
> +*/
> +   __wsum csum;
> +
> +   /* Fill in our pseudo-header checksum */
> +   uh->check = ~udp_v4_check(len, saddr, daddr, 0);
> +   /* Start with complement of inner pseudo-header checksum */
> +   csum = ~skb_checksum(skb, skb_checksum_start_offset(skb) + 
> skb->csum_offset,
> +2, 0);
> +   /* Add in checksum of our headers (incl. pseudo-header
> +* checksum filled in above)
> +*/
> +   csum = skb_checksum(skb, 0, skb_checksum_start_offset(skb), 
> csum);
> +   /* The result is the outer checksum */
> +   uh->check = csum_fold(csum);
> +   if (uh->check == 0)
> +   uh->check = CSUM_MANGLED_0;
> +   } else if (skb_dst(skb) && skb_dst(skb)->dev &&
> +  (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
>
It's clever, but I'm not sure this saves much. The outer checksum
could still be offloaded to the device without the extra work. Where
this technique would be nice is if the device doesn't support checksum
offload at all, then we would definitely avoid doing multiple
checksums. That's going to be harder since we won't see
CHECKSUM_PARTIAL in that case for the inner checksum, but it would get
us to the principle that we only ever calculate the packet checksum
once or zero times.
.

> BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
>
> --
> 2.4.3
>
>
> The information contained in this message is confidential and is intended for 
> the addressee(s) only. If you have received this message in error, please 
> notify the sender immediately and delete the message. Unless you are an 
> addressee (or authorized to receive for an addressee), you may not use, copy 
> or disclose to anyone this message or any information contained in this 
> message. The unauthorized use, disclosure, copying or alteration of this 
> message is strictly prohibited.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net] openvswitch: fix trivial comment typo

2015-12-14 Thread Joe Stringer
On 14 December 2015 at 05:29, Paolo Abeni  wrote:
> The commit 33db4125ec74 ("openvswitch: Rename LABEL->LABELS") left
> over an old OVS_CT_ATTR_LABEL instance, fix it.
>
> Fixes: 33db4125ec74 ("openvswitch: Rename LABEL->LABELS")
> Signed-off-by: Paolo Abeni 

Thanks for the fix.

Acked-by: Joe Stringer 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 net] ravb: clear RIC1 in init instead of stop

2015-12-14 Thread Yoshihiro Kaneko
From: Kazuya Mizuguchi 

AVB-DMAC Receive FIFO Warning interrupt is not enabled, so it is not
necessary to disable the interrupt in ravb_close().
On the other hand, this patch disables the interrupt in ravb_dmac_init() to
prevent the possibility that the interrupt is issued by the state that
a boot loader left.

Signed-off-by: Kazuya Mizuguchi 
Signed-off-by: Yoshihiro Kaneko 
---

This patch is based on the master branch of David Miller's networking
tree.

v3 [Yoshihiro Kaneko]
* compile tested only
* As suggested by Sergei Shtylyov
  - update the subject to reflect what this patch is doing.
  - clear RIC1 in ravb_dmac_init() instead of ravb_open().

v2 [Yoshihiro Kaneko]
* compile tested only
* As suggested by Sergei Shtylyov
  - clear RIC1 in ndo_open() to disable the interrupt regardless of
a left state from a bootloader.

 drivers/net/ethernet/renesas/ravb_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c 
b/drivers/net/ethernet/renesas/ravb_main.c
index b69e0c2..b76303e 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -405,9 +405,11 @@ static int ravb_dmac_init(struct net_device *ndev)
/* Timestamp enable */
ravb_write(ndev, TCCR_TFEN, TCCR);
 
-   /* Interrupt enable: */
+   /* Interrupt init: */
/* Frame receive */
ravb_write(ndev, RIC0_FRE0 | RIC0_FRE1, RIC0);
+   /* Disable FIFO full warning */
+   ravb_write(ndev, 0, RIC1);
/* Receive FIFO full error, descriptor empty */
ravb_write(ndev, RIC2_QFE0 | RIC2_QFE1 | RIC2_RFFE, RIC2);
/* Frame transmitted, timestamp FIFO updated */
@@ -1471,7 +1473,6 @@ static int ravb_close(struct net_device *ndev)
 
/* Disable interrupts by clearing the interrupt masks. */
ravb_write(ndev, 0, RIC0);
-   ravb_write(ndev, 0, RIC1);
ravb_write(ndev, 0, RIC2);
ravb_write(ndev, 0, TIC);
 
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net] netlink: fix boolean evaluation on bound

2015-12-14 Thread Herbert Xu
On Mon, Dec 14, 2015 at 05:55:25PM +0100, Hannes Frederic Sowa wrote:
> portid may be 0, thus bound will set the flag to false for in-kernel
> created netlink sockets.
> 
> Fixes: da314c9923fed55 ("netlink: Replace rhash_portid with bound")
> Cc: Herbert Xu 
> Signed-off-by: Hannes Frederic Sowa 
> ---
> This patch should not affect anything and is just meant to close this
> loophole in future. I based it on net, but you can also apply it to
> net-next.

Nack.  The bound field only needs to be true for user-space sockets.
So please explain why you need it to be true for kernel sockets.

Thanks,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Double free of dst_entry in ipv4_dst_destroy()

2015-12-14 Thread dwilder

Eric -
With this patch applied the test ran clean for 2 days.

Thanks for your help.

Quoting Eric Dumazet :


On Fri, 2015-12-11 at 07:48 -0800, Eric Dumazet wrote:

On Fri, 2015-12-11 at 06:23 -0800, Eric Dumazet wrote:
> On Sun, 2015-12-06 at 17:58 -0800, Eric Dumazet wrote:
> > On Sun, 2015-12-06 at 13:03 -0800, Eric Dumazet wrote:
> >
> > > But then when later we promote a skb->dst to a refctounted one
> > > (skb_dst_force(), we might make sure we abort the operation  
if __refcnt

> > > == 0 ( and DST_NOCACHE is in dst->flags)
> > >
> >
> > Minimum patch would be :
> >
>
> Here is a more complete patch, it should fix the issue I think :

Hmm, I'll send a v3, I forgot to test DST_NOCACHE properly.



David, please test the following patch, thanks !

 include/net/dst.h   |   33 +
 include/net/sock.h  |2 +-
 net/ipv4/tcp_ipv4.c |5 ++---
 net/ipv6/tcp_ipv6.c |3 +--
 4 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index 1279f9b09791..c7329dcd90cc 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -322,6 +322,39 @@ static inline void skb_dst_force(struct sk_buff *skb)
}
 }

+/**
+ * dst_hold_safe - Take a reference on a dst if possible
+ * @dst: pointer to dst entry
+ *
+ * This helper returns false if it could not safely
+ * take a reference on a dst.
+ */
+static inline bool dst_hold_safe(struct dst_entry *dst)
+{
+   if (dst->flags & DST_NOCACHE)
+   return atomic_inc_not_zero(>__refcnt);
+   dst_hold(dst);
+   return true;
+}
+
+/**
+ * skb_dst_force_safe - makes sure skb dst is refcounted
+ * @skb: buffer
+ *
+ * If dst is not yet refcounted and not destroyed, grab a ref on it.
+ */
+static inline void skb_dst_force_safe(struct sk_buff *skb)
+{
+   if (skb_dst_is_noref(skb)) {
+   struct dst_entry *dst = skb_dst(skb);
+
+   if (!dst_hold_safe(dst))
+   dst = NULL;
+
+   skb->_skb_refdst = (unsigned long)dst;
+   }
+}
+

 /**
  * __skb_tunnel_rx - prepare skb for rx reinsert
diff --git a/include/net/sock.h b/include/net/sock.h
index eaef41433d7a..18322bded064 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -816,7 +816,7 @@ void sk_stream_write_space(struct sock *sk);
 static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
/* dont let skb dst not refcounted, we are going to leave rcu lock */
-   skb_dst_force(skb);
+   skb_dst_force_safe(skb);

if (!sk->sk_backlog.tail)
sk->sk_backlog.head = skb;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index db003438aaf5..d8841a2f1569 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1493,7 +1493,7 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
if (likely(sk->sk_rx_dst))
skb_dst_drop(skb);
else
-   skb_dst_force(skb);
+   skb_dst_force_safe(skb);

__skb_queue_tail(>ucopy.prequeue, skb);
tp->ucopy.memory += skb->truesize;
@@ -1721,8 +1721,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const  
struct sk_buff *skb)

 {
struct dst_entry *dst = skb_dst(skb);

-   if (dst) {
-   dst_hold(dst);
+   if (dst && dst_hold_safe(dst)) {
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e7aab561b7b4..6b8a8a9091fa 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -93,10 +93,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk,  
const struct sk_buff *skb)

 {
struct dst_entry *dst = skb_dst(skb);

-   if (dst) {
+   if (dst && dst_hold_safe(dst)) {
const struct rt6_info *rt = (const struct rt6_info *)dst;

-   dst_hold(dst);
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 net] ravb: Remove clear unhandled interrupt

2015-12-14 Thread Yoshihiro Kaneko
Hi,

2015-12-14 1:25 GMT+09:00 Sergei Shtylyov :
> Hello.
>
> On 12/13/2015 06:12 PM, Yoshihiro Kaneko wrote:
>
>The subject doesn't seem to reflect what the patch is doing.
>
>
>> From: Kazuya Mizuguchi 
>>
>> AVB-DMAC Receive FIFO Warning interrupt is not enabled, so it is not
>> necessary to disable the interrupt in ndo_close().
>> On the other hand, this patch disables the interrupt in ndo_open() to
>> prevent the possibility that the interrupt is issued by the state that
>> a boot loader left.
>>
>> Signed-off-by: Kazuya Mizuguchi 
>> Signed-off-by: Yoshihiro Kaneko 
>> ---
>>
>> This patch is based on the master branch of David Miller's networking
>> tree.
>>
>> v2 [Yoshihiro Kaneko]
>> * compile tested only
>> * As suggested by Sergei Shtylyov
>>- clear RIC1 in ndo_open() to disable the interrupt regardless of
>>  a left state from a bootloader.
>>
>>
>>   drivers/net/ethernet/renesas/ravb_main.c | 4 +++-
>>   1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/ethernet/renesas/ravb_main.c
>> b/drivers/net/ethernet/renesas/ravb_main.c
>> index b69e0c2..5e34a88 100644
>> --- a/drivers/net/ethernet/renesas/ravb_main.c
>> +++ b/drivers/net/ethernet/renesas/ravb_main.c
>> @@ -1228,6 +1228,9 @@ static int ravb_open(struct net_device *ndev)
>> goto out_free_irq2;
>> ravb_emac_init(ndev);
>>
>> +   /* Disable unhandled interrupt */
>> +   ravb_write(ndev, 0, RIC1);
>> +
>
>
>Oh, I thought the interrupt masks are programmed here but they are set in
> ravb_dmac_init() that this function calls. Please do it there instead (where
> it was originally done).

done.

>
> [...]
>
> MBR, Sergei
>

Thanks,
kaneko
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 net] ravb: clear RIC1 in init instead of stop

2015-12-14 Thread Sergei Shtylyov

On 12/14/2015 07:24 PM, Yoshihiro Kaneko wrote:


From: Kazuya Mizuguchi 

AVB-DMAC Receive FIFO Warning interrupt is not enabled, so it is not
necessary to disable the interrupt in ravb_close().
On the other hand, this patch disables the interrupt in ravb_dmac_init() to
prevent the possibility that the interrupt is issued by the state that
a boot loader left.

Signed-off-by: Kazuya Mizuguchi 
Signed-off-by: Yoshihiro Kaneko 


Acked-by: Sergei Shtylyov 


---

This patch is based on the master branch of David Miller's networking
tree.


   I think this should rather go into net-next.git though...

MBR, Sergei

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv2 net-next] ipv6: allow routes to be configured with expire values

2015-12-14 Thread Hannes Frederic Sowa
Hi,

On 14.12.2015 12:48, Xin Long wrote:
>>
>> This is the wrong way to do this.
>>
>> Currently we only ever dump rta_cacheinfo values to the user.
>>
>> If we use it to set things, we have to completely consider every
>> member of that structure as potentially having meaning either
>> intended by the user or choosen by us in the future.
>>
>> Therefore it is a poor choice to start using for specifying the
>> expires value, and some other mechanism such as a new RTNETLINK
>> attribute, should be used for this.
> 
> we did it like this to avoid adding the new RTNETLINK attribute.
> now I got your meaning, and rta_cacheinfo seems to be designed
> for dumping info, not for seting info. i guess you hope we do it like:
> 
> +   if (tb[RTA_EXPIRES]) {
> +   unsigned long timeout =
> addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
> +
> +   if (addrconf_finite_timeout(timeout)) {
> +   cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
> +   cfg->fc_flags |= RTF_EXPIRES;
> +   }
> +   }
> 
> hi Hannes, we seem to go back here again, what do you think ?

Albeit I had the same idea and wanted to introduce a new netlink
attribute, I decided to recommend this patch. It aligns with the code we
already have for adding and listing ipv4 and ipv6 addresses
(ifa_cacheinfo) and reporting routing changes for ipv4 and ipv6
(rta_cacheinfo).

We can easily switch to new attributes. Should we introduce a new
interface for this?

Thanks,
Hannes

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net] netlink: fix boolean evaluation on bound

2015-12-14 Thread Hannes Frederic Sowa
On 14.12.2015 18:06, Herbert Xu wrote:
> On Mon, Dec 14, 2015 at 05:55:25PM +0100, Hannes Frederic Sowa wrote:
>> portid may be 0, thus bound will set the flag to false for in-kernel
>> created netlink sockets.
>>
>> Fixes: da314c9923fed55 ("netlink: Replace rhash_portid with bound")
>> Cc: Herbert Xu 
>> Signed-off-by: Hannes Frederic Sowa 
>> ---
>> This patch should not affect anything and is just meant to close this
>> loophole in future. I based it on net, but you can also apply it to
>> net-next.
> 
> Nack.  The bound field only needs to be true for user-space sockets.
> So please explain why you need it to be true for kernel sockets.

I reviewed this very carefully and think this is currently a matter of
taste as it does not change current logic.

Otherwise I would recommend adding a "!!" to express that we actually
want bound set based on the portid value?

Bye,
Hannes

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 2/4] net: diag: split inet_diag_dump_one_icsk into two

2015-12-14 Thread Lorenzo Colitti
Currently, inet_diag_dump_one_icsk finds a socket and then dumps
its information to userspace. Split it into a part that finds the
socket and a part that dumps the information.

Signed-off-by: Lorenzo Colitti 
---
 include/linux/inet_diag.h |  5 +
 net/ipv4/inet_diag.c  | 42 +++---
 2 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 0e707f0..e7032f04 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -3,6 +3,7 @@
 
 #include 
 
+struct net;
 struct sock;
 struct inet_hashinfo;
 struct nlattr;
@@ -41,6 +42,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
struct sk_buff *in_skb, const struct nlmsghdr *nlh,
const struct inet_diag_req_v2 *req);
 
+struct sock *inet_diag_find_one_icsk(struct net *net,
+struct inet_hashinfo *hashinfo,
+const struct inet_diag_req_v2 *req);
+
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ab9f8a6..cfabb8f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -350,17 +350,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff 
*skb,
  nlmsg_flags, unlh);
 }
 
-int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
-   struct sk_buff *in_skb,
-   const struct nlmsghdr *nlh,
-   const struct inet_diag_req_v2 *req)
+struct sock *inet_diag_find_one_icsk(struct net *net,
+struct inet_hashinfo *hashinfo,
+const struct inet_diag_req_v2 *req)
 {
-   struct net *net = sock_net(in_skb->sk);
-   struct sk_buff *rep;
struct sock *sk;
-   int err;
 
-   err = -EINVAL;
if (req->sdiag_family == AF_INET)
sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
 req->id.idiag_dport, req->id.idiag_src[0],
@@ -375,15 +370,33 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo 
*hashinfo,
  req->id.idiag_if);
 #endif
else
-   goto out_nosk;
+   return ERR_PTR(-EINVAL);
 
-   err = -ENOENT;
if (!sk)
-   goto out_nosk;
+   return ERR_PTR(-ENOENT);
 
-   err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
-   if (err)
-   goto out;
+   if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
+   sock_gen_put(sk);
+   return ERR_PTR(-ENOENT);
+   }
+
+   return sk;
+}
+EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);
+
+int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
+   struct sk_buff *in_skb,
+   const struct nlmsghdr *nlh,
+   const struct inet_diag_req_v2 *req)
+{
+   struct net *net = sock_net(in_skb->sk);
+   struct sk_buff *rep;
+   struct sock *sk;
+   int err;
+
+   sk = inet_diag_find_one_icsk(net, hashinfo, req);
+   if (IS_ERR(sk))
+   return PTR_ERR(sk);
 
rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
if (!rep) {
@@ -409,7 +422,6 @@ out:
if (sk)
sock_gen_put(sk);
 
-out_nosk:
return err;
 }
 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
-- 
2.6.0.rc2.230.g3dd15c0

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] sh_eth: fix descriptor access endianness

2015-12-14 Thread Sergei Shtylyov

Hello.

On 12/13/2015 11:05 PM, Sergei Shtylyov wrote:


The driver never  calls cpu_to_edmac() when writing the descriptor address
and edmac_to_cpu() when reading it, although it should -- fix this.

Note that the frame/buffer length descriptor field accesses also need fixing
but since they are both 16-bit we can't  use {cpu|edmac}_to_{edmac|cpu}()...


   Changed my mind about this one: I'll add a new pair of functions to deal 
with 16-bit conversions as well.



Signed-off-by: Sergei Shtylyov 


[...]

MBR, Sergei

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 1/4] net: diag: Add the ability to destroy a socket.

2015-12-14 Thread Lorenzo Colitti
This adds a diag_destroy pointer to struct proto that allows a
socket to be administratively closed without any action from the
process owning the socket or the socket protocol.

This allows a privileged userspace process, such as a connection
manager or system administration tool, to close sockets belonging
to other apps when the network they were established on has
disconnected. It is needed on laptops and mobile hosts to ensure
that network switches / disconnects do not result in applications
being blocked for long periods of time (minutes) in read or
connect calls on TCP sockets that will never succeed because the
IP address they are bound to is no longer on the system. Closing
the sockets causes these calls to fail fast and allows the apps
to reconnect on another network.

For many years Android kernels have supported this via an
out-of-tree SIOCKILLADDR ioctl that is called on every
RTM_DELADDR event, but this solution is cleaner, more robust and
more flexible: the connection manager can iterate over all
connections on the deleted IP address and close all of them. It
can also be used to close all sockets opened by a given app
process, for example if the user has restricted that app from
using the network.

It also allows in-kernel callers to perform the same sort of
operation by invoking sk->sk_prot->diag_destroy(sk) directly.

This patch adds a SOCK_DESTROY operation, a destroy function
pointer to sock_diag_handler, and a diag_destroy function
pointer.  It does not include any implementation code.

Signed-off-by: Lorenzo Colitti 
---
 include/linux/sock_diag.h  |  2 ++
 include/net/sock.h |  1 +
 include/uapi/linux/sock_diag.h |  1 +
 net/core/sock_diag.c   | 23 ---
 4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index fddebc6..15072fc 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -15,6 +15,7 @@ struct sock_diag_handler {
__u8 family;
int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
int (*get_info)(struct sk_buff *skb, struct sock *sk);
+   int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh);
 };
 
 int sock_diag_register(const struct sock_diag_handler *h);
@@ -68,4 +69,5 @@ bool sock_diag_has_destroy_listeners(const struct sock *sk)
 }
 void sock_diag_broadcast_destroy(struct sock *sk);
 
+int sock_diag_destroy(struct sock *sk);
 #endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 0ca22b0..a1b30d7f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1060,6 +1060,7 @@ struct proto {
void(*destroy_cgroup)(struct mem_cgroup *memcg);
struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg);
 #endif
+   int (*diag_destroy)(struct sock *sk);
 };
 
 int proto_register(struct proto *prot, int alloc_slab);
diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h
index 49230d3..bae2d80 100644
--- a/include/uapi/linux/sock_diag.h
+++ b/include/uapi/linux/sock_diag.h
@@ -4,6 +4,7 @@
 #include 
 
 #define SOCK_DIAG_BY_FAMILY 20
+#define SOCK_DESTROY 21
 
 struct sock_diag_req {
__u8sdiag_family;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 0c1d58d..967d89f 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler 
*hnld)
 }
 EXPORT_SYMBOL_GPL(sock_diag_unregister);
 
-static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
int err;
struct sock_diag_req *req = nlmsg_data(nlh);
@@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct 
nlmsghdr *nlh)
hndl = sock_diag_handlers[req->sdiag_family];
if (hndl == NULL)
err = -ENOENT;
-   else
+   else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
err = hndl->dump(skb, nlh);
+   else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy)
+   err = hndl->destroy(skb, nlh);
+   else
+   err = -EOPNOTSUPP;
mutex_unlock(_diag_table_mutex);
 
return err;
@@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct 
nlmsghdr *nlh)
 
return ret;
case SOCK_DIAG_BY_FAMILY:
-   return __sock_diag_rcv_msg(skb, nlh);
+   case SOCK_DESTROY:
+   return __sock_diag_cmd(skb, nlh);
default:
return -EINVAL;
}
@@ -295,6 +300,18 @@ static int sock_diag_bind(struct net *net, int group)
return 0;
 }
 
+int sock_diag_destroy(struct sock *sk)
+{
+   if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+   return -EPERM;
+
+   if (!sk->sk_prot->diag_destroy)
+   return -EOPNOTSUPP;
+
+   return 

Re: Add a SOCK_DESTROY operation to close sockets from userspace

2015-12-14 Thread Lorenzo Colitti
Here is a an updated version. The external behaviour of this
patchset is the same as v4; for more details, see that cover
letter at http://www.spinics.net/lists/netdev/msg354303.html .

This version fixes two bugs spotted by Eric, and implements Tom's
suggestion of making the socket destroy code a per-protocol
function pointer so that in-kernel callers can use it. The
resulting code is a bit longer but a bit more generic, and
exposes fewer TCP implementation details.

The operation is still called SOCK_DESTROY, but given that its
main implementation is the TCP ABORT operation, and that the
word "destroy" is used in the inet_csk code to refer to freeing
a socket, and in the inet_diag code to refer to broadcasts about
sockets being freed, perhaps it could be renamed to SOCK_ABORT.

Tested using net_test. Tests check that TCP resets are sent in
the right states, that accept(), read() and connect() are
interrupted, that closing sockets makes the socket unusable,
and that destroying non-TCP sockets returns EOPNOTSUPP. Tests
at https://android-review.googlesource.com/#/c/187491/ .

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net] ravb: Add disable 10base

2015-12-14 Thread Sergei Shtylyov

Hello.

On 12/13/2015 06:15 PM, Yoshihiro Kaneko wrote:


From: Kazuya Mizuguchi 

Ethernet AVB does not support 10 Mbps transfer speed.

Signed-off-by: Kazuya Mizuguchi 
Signed-off-by: Yoshihiro Kaneko 
---

This patch is based on the master branch of David Miller's networking
tree.

  drivers/net/ethernet/renesas/ravb_main.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c 
b/drivers/net/ethernet/renesas/ravb_main.c
index b69e0c2..467d416 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -905,6 +905,9 @@ static int ravb_phy_init(struct net_device *ndev)
netdev_info(ndev, "limited PHY to 100Mbit/s\n");
}

+   /* 10BASE is not supported */
+   phydev->supported &= ~PHY_10BT_FEATURES;


   I wonder if we should also modify 'phydev->advertising' like 
phy_set_max_speed() does...


MBR, Sergei

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 4/4] net: diag: Support destroying TCP sockets.

2015-12-14 Thread Lorenzo Colitti
This implements SOCK_DESTROY for TCP sockets. It causes all
blocking calls on the socket to fail fast with ECONNABORTED and
causes a protocol close of the socket. It informs the other end
of the connection by sending a RST, i.e., initiating a TCP ABORT
as per RFC 793. ECONNABORTED was chosen for consistency with
FreeBSD.

Signed-off-by: Lorenzo Colitti 
---
 include/net/tcp.h   |  4 
 net/ipv4/Kconfig| 13 +
 net/ipv4/tcp.c  | 34 ++
 net/ipv4/tcp_diag.c | 19 +++
 net/ipv4/tcp_ipv4.c |  3 +++
 net/ipv6/tcp_ipv6.c |  3 +++
 6 files changed, 76 insertions(+)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f80e74c..505cef5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1170,6 +1170,10 @@ void tcp_set_state(struct sock *sk, int state);
 
 void tcp_done(struct sock *sk);
 
+#if CONFIG_INET_DIAG_DESTROY
+int tcp_abort(struct sock *sk);
+#endif
+
 static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
 {
rx_opt->dsack = 0;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 416dfa0..31c4496 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -436,6 +436,19 @@ config INET_UDP_DIAG
  Support for UDP socket monitoring interface used by the ss tool.
  If unsure, say Y.
 
+config INET_DIAG_DESTROY
+   bool "INET: allow privileged process to administratively close sockets"
+   depends on INET_DIAG && (IPV6 || IPV6=n)
+   default n
+   ---help---
+ Provides a SOCK_DESTROY operation that allows privileged processes
+ (e.g., a connection manager or a network administration tool such as
+ ss) to close sockets opened by other processes. Closing a socket in
+ this way interrupts any blocking read/writes/connect operations on
+ the socket and causes future socket calls to behave as if the socket
+ had been disconnected.
+ If unsure, say N.
+
 menuconfig TCP_CONG_ADVANCED
bool "TCP: advanced congestion control"
---help---
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c82cca1..fc5068d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3080,6 +3080,40 @@ void tcp_done(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tcp_done);
 
+#ifdef CONFIG_INET_DIAG_DESTROY
+int tcp_abort(struct sock *sk)
+{
+   if (!sk_fullsock(sk)) {
+   sock_gen_put(sk);
+   return -EOPNOTSUPP;
+   }
+
+   /* Don't race with userspace socket closes such as tcp_close. */
+   lock_sock(sk);
+
+   /* Don't race with BH socket closes such as inet_csk_listen_stop. */
+   local_bh_disable();
+   bh_lock_sock(sk);
+
+   if (!sock_flag(sk, SOCK_DEAD)) {
+   sk->sk_err = ECONNABORTED;
+   /* This barrier is coupled with smp_rmb() in tcp_poll() */
+   smp_wmb();
+   sk->sk_error_report(sk);
+   if (tcp_need_reset(sk->sk_state))
+   tcp_send_active_reset(sk, GFP_ATOMIC);
+   tcp_done(sk);
+   }
+
+   bh_unlock_sock(sk);
+   local_bh_enable();
+   release_sock(sk);
+   sock_put(sk);
+   return 0;
+}
+EXPORT_SYMBOL_GPL(tcp_abort);
+#endif
+
 extern struct tcp_congestion_ops tcp_reno;
 
 static __initdata unsigned long thash_entries;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index b316040..8d435f17 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -10,6 +10,8 @@
  */
 
 #include 
+#include 
+#include 
 #include 
 
 #include 
@@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const 
struct nlmsghdr *nlh,
return inet_diag_dump_one_icsk(_hashinfo, in_skb, nlh, req);
 }
 
+#ifdef CONFIG_INET_DIAG_DESTROY
+static int tcp_diag_destroy(struct sk_buff *in_skb,
+   const struct inet_diag_req_v2 *req)
+{
+   struct net *net = sock_net(in_skb->sk);
+   struct sock *sk = inet_diag_find_one_icsk(net, _hashinfo, req);
+
+   if (IS_ERR(sk))
+   return PTR_ERR(sk);
+
+   return sock_diag_destroy(sk);
+}
+#endif
+
 static const struct inet_diag_handler tcp_diag_handler = {
.dump= tcp_diag_dump,
.dump_one= tcp_diag_dump_one,
.idiag_get_info  = tcp_diag_get_info,
.idiag_type  = IPPROTO_TCP,
.idiag_info_size = sizeof(struct tcp_info),
+#ifdef CONFIG_INET_DIAG_DESTROY
+   .destroy = tcp_diag_destroy,
+#endif
 };
 
 static int __init tcp_diag_init(void)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index db00343..5e28bf1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2342,6 +2342,9 @@ struct proto tcp_prot = {
.destroy_cgroup = tcp_destroy_cgroup,
.proto_cgroup   = tcp_proto_cgroup,
 #endif
+#ifdef CONFIG_INET_DIAG_DESTROY
+   .diag_destroy   = tcp_abort,
+#endif
 };
 EXPORT_SYMBOL(tcp_prot);
 
diff --git a/net/ipv6/tcp_ipv6.c 

[PATCH v5 3/4] net: diag: Support SOCK_DESTROY for inet sockets.

2015-12-14 Thread Lorenzo Colitti
This passes the SOCK_DESTROY operation to the underlying
protocol diag handler, or returns -EINVAL if that handler does
not define a destroy operation.

Most of this patch is just renaming functions. This is not
strictly necessary, but it would be fairly counterintuitive to
have the code to destroy inet sockets be in a function whose name
starts with inet_diag_get.

Signed-off-by: Lorenzo Colitti 
---
 include/linux/inet_diag.h |  4 
 net/ipv4/inet_diag.c  | 23 +++
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index e7032f04..7c27fa1 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -24,6 +24,10 @@ struct inet_diag_handler {
void(*idiag_get_info)(struct sock *sk,
  struct inet_diag_msg *r,
  void *info);
+
+   int (*destroy)(struct sk_buff *in_skb,
+  const struct inet_diag_req_v2 *req);
+
__u16   idiag_type;
__u16   idiag_info_size;
 };
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index cfabb8f..8bb8e7a 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -426,7 +426,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
 
-static int inet_diag_get_exact(struct sk_buff *in_skb,
+static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
   const struct nlmsghdr *nlh,
   const struct inet_diag_req_v2 *req)
 {
@@ -436,8 +436,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
handler = inet_diag_lock_handler(req->sdiag_protocol);
if (IS_ERR(handler))
err = PTR_ERR(handler);
-   else
+   else if (cmd == SOCK_DIAG_BY_FAMILY)
err = handler->dump_one(in_skb, nlh, req);
+   else if (cmd == SOCK_DESTROY && handler->destroy)
+   err = handler->destroy(in_skb, req);
+   else
+   err = -EOPNOTSUPP;
inet_diag_unlock_handler(handler);
 
return err;
@@ -950,7 +954,7 @@ static int inet_diag_get_exact_compat(struct sk_buff 
*in_skb,
req.idiag_states = rc->idiag_states;
req.id = rc->id;
 
-   return inet_diag_get_exact(in_skb, nlh, );
+   return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, );
 }
 
 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -984,7 +988,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, 
struct nlmsghdr *nlh)
return inet_diag_get_exact_compat(skb, nlh);
 }
 
-static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
 {
int hdrlen = sizeof(struct inet_diag_req_v2);
struct net *net = sock_net(skb->sk);
@@ -992,7 +996,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, 
struct nlmsghdr *h)
if (nlmsg_len(h) < hdrlen)
return -EINVAL;
 
-   if (h->nlmsg_flags & NLM_F_DUMP) {
+   if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
+   h->nlmsg_flags & NLM_F_DUMP) {
if (nlmsg_attrlen(h, hdrlen)) {
struct nlattr *attr;
 
@@ -1011,7 +1016,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, 
struct nlmsghdr *h)
}
}
 
-   return inet_diag_get_exact(skb, h, nlmsg_data(h));
+   return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
 }
 
 static
@@ -1062,14 +1067,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, 
struct sock *sk)
 
 static const struct sock_diag_handler inet_diag_handler = {
.family = AF_INET,
-   .dump = inet_diag_handler_dump,
+   .dump = inet_diag_handler_cmd,
.get_info = inet_diag_handler_get_info,
+   .destroy = inet_diag_handler_cmd,
 };
 
 static const struct sock_diag_handler inet6_diag_handler = {
.family = AF_INET6,
-   .dump = inet_diag_handler_dump,
+   .dump = inet_diag_handler_cmd,
.get_info = inet_diag_handler_get_info,
+   .destroy = inet_diag_handler_cmd,
 };
 
 int inet_diag_register(const struct inet_diag_handler *h)
-- 
2.6.0.rc2.230.g3dd15c0

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] sh_eth: uninline sh_eth_{write|read}()

2015-12-14 Thread David Miller
From: Sergei Shtylyov 
Date: Sun, 13 Dec 2015 01:44:50 +0300

> Commit 3365711df024 ("sh_eth: WARN on access to a register not implemented in
> in  a particular chip") added WARN_ON() to sh_eth_{read|write}(), thus making
> it  unacceptable for these functions to be *inline* anymore. Remove *inline*
> and move the functions from the header to the driver itself. Below   is our
> code economy with ARM gcc 4.7.3:
> 
> $ size drivers/net/ethernet/renesas/sh_eth.o{~,}
>text  data bss dec hex filename
>   32489  1140   0   33629835d 
> drivers/net/ethernet/renesas/sh_eth.o~
>   25413  1140   0   2655367b9 
> drivers/net/ethernet/renesas/sh_eth.o
> 
> Suggested-by: Ben Hutchings 
> Signed-off-by: Sergei Shtylyov 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] ipv6: use a random ifid for headerless devices

2015-12-14 Thread Hannes Frederic Sowa
Hello,

On 08.12.2015 19:57, Bjørn Mork wrote:
> Hannes Frederic Sowa  writes:
>> On 05.12.2015 20:02, Bjørn Mork wrote:
>>> Hannes Frederic Sowa  writes:
 On Thu, Dec 3, 2015, at 20:29, Bjørn Mork wrote:

> After looking more at addrconf, I started wondering if we couldn't abuse
> ipv6_generate_stable_address() for this purpose?  We could add a new
> addr_gen_mode which would trigger automatic generation of a secret if
> stable_secret is uninitialized.  This would be good enough to ensure
> stability until the interface is destroyed.  And it would still allow
> the adminstrator to select IN6_ADDR_GEN_MODE_STABLE_PRIVACY by entering
> a new secret.

 I am fine with your proposal but I would really like to see it only
 happen on the per-interface stable_secret instance.
>>>
>>> Do you think something like the patch below will be OK?
>>
>> I wouldn't call it IN6_ADDR_GEN_MODE_AUTO, this doesn't say anything.
>> But the idea is already good.
> 
> No, I didn't like that name either.  I just couldn't come up with
> anything descriptive, short and non-redundant. "random", "generated",
> "stable" are even worse.  And that's about where my imagination ended.
> We need a child here :)

Sorry for answering so late...

What do you think about simply using IN6_ADDR_GEN_MODE_RANDOM?

>>> Or would it be better to drop the additional mode and just generate a
>>> random secret if the mode is IN6_ADDR_GEN_MODE_STABLE_PRIVACY and the
>>> secrets are missing?  Or would that be changing the userspace ABI?  This
>>> is not clear to me...
>>
>> I would not like to do that somehow. The problem is that the stable
>> secrets get written by user space probably during boot-up, but we don't
>> know when. That's why I would also not set the ->initialized flag, so
>> user can overwrite it to the final secret later on. We block it otherwise.
> 
> I am not sure I follow...  There is nothing preventing userspace from
> initializing the secret before or after generation of the random secret.

I actually missed that. Shortly before sending the patch I decided to
allow to reinitialize the stable_secret. Before I had a check in there
to not being able to rewrite the stable_secret after it became
initialized. So we are good here. Sorry for the confusion.

> Writing to /proc/sys/net/ipv6/conf//stable_secret will update the
> secret and set the mode to IN6_ADDR_GEN_MODE_STABLE_PRIVACY as before,
> even if we have generated a random secret first.  I have verified that
> this part works as expected.

Thanks!

> I guess we should check >ipv6.devconf_dflt->stable_secret too
> before choosing the default mode.  IN6_ADDR_GEN_MODE_STABLE_PRIVACY is a
> more approproate default if a default secret is set.  IMHO, this should
> really be the case without the proposed change too, but it isn't. The
> current behaviour confuses me: Setting 'default' changes all existing
> interfaces, but does not change the default for new interfaces. Is that
> right?

Nope, that is a good point. I think we should do that unconditionally.
If we have a stable secret set, which we can use, we always should use
this address generation mode. Can you send the addition of this as a
separate patch so we can propose it for stable? Otherwise I can do that,
too.

>> My proposal would be to use the stable privacy generator in case the
>> device does not have a device address for EUI-48 generation with a
>> secret which we simply generate on the stack. Let's factor out the part
>> of the generator which depends on the inet6_dev and cnf bits for that.
> 
> Not sure I get this part either.  The point was to have stable addresses
> for the lifetime of the netdev.  We can generate the secret on the
> stack, but we will still need to stash it somewhere.  That could of
> course be to a new field.  But I don't see the point since there is no
> way you can combine this mode with IN6_ADDR_GEN_MODE_STABLE_PRIVACY.
> Only one mode can be active at, and that mode can then own the secret.

Ok, your argument makes sense.

> As long as we can manage to introduce this without changing any existing
> behaviour, of course.

Besides the naming I think your patch looks fine.

Thank you,
Hannes

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] ipv6: use a random ifid for headerless devices

2015-12-14 Thread Bjørn Mork
Hannes Frederic Sowa  writes:

> Sorry for answering so late...

No problem.  There is no rush here AFAICS.  Thanks for taking the time
to look at this.

> What do you think about simply using IN6_ADDR_GEN_MODE_RANDOM?

Yes, that's fine with me (actually what I first used :)

>> I guess we should check >ipv6.devconf_dflt->stable_secret too
>> before choosing the default mode.  IN6_ADDR_GEN_MODE_STABLE_PRIVACY is a
>> more approproate default if a default secret is set.  IMHO, this should
>> really be the case without the proposed change too, but it isn't. The
>> current behaviour confuses me: Setting 'default' changes all existing
>> interfaces, but does not change the default for new interfaces. Is that
>> right?
>
> Nope, that is a good point. I think we should do that unconditionally.
> If we have a stable secret set, which we can use, we always should use
> this address generation mode. Can you send the addition of this as a
> separate patch so we can propose it for stable? Otherwise I can do that,
> too.

I can do that if it can wait for whenever I get around to actually
submit this.  No guarantee that will be in time for v4.5.


>>> My proposal would be to use the stable privacy generator in case the
>>> device does not have a device address for EUI-48 generation with a
>>> secret which we simply generate on the stack. Let's factor out the part
>>> of the generator which depends on the inet6_dev and cnf bits for that.
>> 
>> Not sure I get this part either.  The point was to have stable addresses
>> for the lifetime of the netdev.  We can generate the secret on the
>> stack, but we will still need to stash it somewhere.  That could of
>> course be to a new field.  But I don't see the point since there is no
>> way you can combine this mode with IN6_ADDR_GEN_MODE_STABLE_PRIVACY.
>> Only one mode can be active at, and that mode can then own the secret.
>
> Ok, your argument makes sense.
>
>> As long as we can manage to introduce this without changing any existing
>> behaviour, of course.
>
> Besides the naming I think your patch looks fine.

Thanks!  Will fixup that and formally submit when I find some time.


Bjørn
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Patch net] pptp: verify sockaddr_len in pptp_bind() and pptp_connect()

2015-12-14 Thread Cong Wang
Reported-by: Dmitry Vyukov 
Signed-off-by: Cong Wang 
---
 drivers/net/ppp/pptp.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index fc69e41..597c53e 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -419,6 +419,9 @@ static int pptp_bind(struct socket *sock, struct sockaddr 
*uservaddr,
struct pptp_opt *opt = >proto.pptp;
int error = 0;
 
+   if (sockaddr_len < sizeof(struct sockaddr_pppox))
+   return -EINVAL;
+
lock_sock(sk);
 
opt->src_addr = sp->sa_addr.pptp;
@@ -440,6 +443,9 @@ static int pptp_connect(struct socket *sock, struct 
sockaddr *uservaddr,
struct flowi4 fl4;
int error = 0;
 
+   if (sockaddr_len < sizeof(struct sockaddr_pppox))
+   return -EINVAL;
+
if (sp->sa_protocol != PX_PROTO_PPTP)
return -EINVAL;
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 1/5] geneve: Add geneve udp port offload for ethernet devices

2015-12-14 Thread Singhai, Anjali

Resending the series with a  cover-letter that got missed.

Thanks
Anjali

On 12/14/2015 11:57 AM, Anjali Singhai Jain wrote:

Add ndo_ops to add/del UDP ports to a device that supports geneve
offload.

v2: Comment fix.

Signed-off-by: Anjali Singhai Jain 
Signed-off-by: Kiran Patil 
---
  drivers/net/geneve.c  | 23 +++
  include/linux/netdevice.h | 20 +++-
  2 files changed, 42 insertions(+), 1 deletion(-)




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next] ipv6: addrconf: drop ieee802154 specific things

2015-12-14 Thread Alexander Aring
This patch removes ARPHRD_IEEE802154 from addrconf handling. In the
earlier days of 802.15.4 6LoWPAN, the interface type was ARPHRD_IEEE802154
which introduced several issues, because 802.15.4 interfaces used the
same type.

Since commit 965e613d299c ("ieee802154: 6lowpan: fix ARPHRD to
ARPHRD_6LOWPAN") we use ARPHRD_6LOWPAN for 6LoWPAN interfaces. This
patch will remove ARPHRD_IEEE802154 which is currently deadcode, because
ARPHRD_IEEE802154 doesn't reach the minimum 1280 MTU of IPv6.

Also we use 6LoWPAN EUI64 specific defines instead using link-layer
constanst from 802.15.4 link-layer header.

Cc: David S. Miller 
Cc: Alexey Kuznetsov 
Cc: James Morris 
Cc: Hideaki YOSHIFUJI 
Cc: Patrick McHardy 
Signed-off-by: Alexander Aring 
---
 net/ipv6/addrconf.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5e9111d..7082fb7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -70,7 +70,7 @@
 #include 
 #include 
 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -1947,9 +1947,9 @@ static void addrconf_leave_anycast(struct inet6_ifaddr 
*ifp)
 
 static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
 {
-   if (dev->addr_len != IEEE802154_ADDR_LEN)
+   if (dev->addr_len != EUI64_ADDR_LEN)
return -1;
-   memcpy(eui, dev->dev_addr, 8);
+   memcpy(eui, dev->dev_addr, EUI64_ADDR_LEN);
eui[0] ^= 2;
return 0;
 }
@@ -2041,7 +2041,6 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device 
*dev)
case ARPHRD_IPGRE:
return addrconf_ifid_gre(eui, dev);
case ARPHRD_6LOWPAN:
-   case ARPHRD_IEEE802154:
return addrconf_ifid_eui64(eui, dev);
case ARPHRD_IEEE1394:
return addrconf_ifid_ieee1394(eui, dev);
@@ -3066,7 +3065,6 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_FDDI) &&
(dev->type != ARPHRD_ARCNET) &&
(dev->type != ARPHRD_INFINIBAND) &&
-   (dev->type != ARPHRD_IEEE802154) &&
(dev->type != ARPHRD_IEEE1394) &&
(dev->type != ARPHRD_TUNNEL6) &&
(dev->type != ARPHRD_6LOWPAN)) {
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


re: [PATCH] chelsio: add support for other 10G boards

2015-12-14 Thread Dan Carpenter
Hello Stephen Hemminger,

The patch f1d3d38af757: "[PATCH] chelsio: add support for other 10G
boards" from Dec 1, 2006, leads to the following static checker
warning:

drivers/net/ethernet/chelsio/cxgb/subr.c:630 t1_link_start()
warn: was shift intended here '(mac->adapter->params.nports < 2)'

drivers/net/ethernet/chelsio/cxgb/subr.c
   623  int t1_link_start(struct cphy *phy, struct cmac *mac, struct 
link_config *lc)
   624  {
   625  unsigned int fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX);
   626  
   627  if (lc->supported & SUPPORTED_Autoneg) {
   628  lc->advertising &= ~(ADVERTISED_ASYM_PAUSE | 
ADVERTISED_PAUSE);
   629  if (fc) {
   630  if (fc == ((PAUSE_RX | PAUSE_TX) &
   631 (mac->adapter->params.nports < 2)))

This condition is never weird.  PAUSE_RX is 1.  PAUSE_TX is 2.
The nports < 2 condition is either 0 or 1.  We know fc is in 1-3 range.

We could re-write it as:

if (fc == 1 && mac->adapter->params.nports < 2)

The static checker is suggesting that we could do nports << 2 but then
the condition would never be true so that can't be right.

   632  lc->advertising |= ADVERTISED_PAUSE;
   633  else {
   634  lc->advertising |= 
ADVERTISED_ASYM_PAUSE;
   635  if (fc == PAUSE_RX)
   636  lc->advertising |= 
ADVERTISED_PAUSE;
   637  }
   638  }
   639  phy->ops->advertise(phy, lc->advertising);

regards,
dan carpenter
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 4/5] geneve: Add geneve_get_rx_port support

2015-12-14 Thread Anjali Singhai Jain
This patch adds an op that the drivers can call into to get existing
geneve ports.

Signed-off-by: Anjali Singhai Jain 
---
 drivers/net/geneve.c | 24 
 include/net/geneve.h |  8 
 2 files changed, 32 insertions(+)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 89325e4..31b19fd 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1109,6 +1109,30 @@ static struct device_type geneve_type = {
.name = "geneve",
 };
 
+/* Calls the ndo_add_geneve_port of the caller in order to
+ * supply the listening GENEVE udp ports. Callers are expected
+ * to implement the ndo_add_geneve_port.
+ */
+void geneve_get_rx_port(struct net_device *dev)
+{
+   struct net *net = dev_net(dev);
+   struct geneve_net *gn = net_generic(net, geneve_net_id);
+   struct geneve_sock *gs;
+   sa_family_t sa_family;
+   struct sock *sk;
+   __be16 port;
+
+   rcu_read_lock();
+   list_for_each_entry_rcu(gs, >sock_list, list) {
+   sk = gs->sock->sk;
+   sa_family = sk->sk_family;
+   port = inet_sk(sk)->inet_sport;
+   dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, port);
+   }
+   rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(geneve_get_rx_port);
+
 /* Initialize the device structure. */
 static void geneve_setup(struct net_device *dev)
 {
diff --git a/include/net/geneve.h b/include/net/geneve.h
index 3106ed6..e6c23dc 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -62,6 +62,14 @@ struct genevehdr {
struct geneve_opt options[];
 };
 
+#if IS_ENABLED(CONFIG_GENEVE)
+void geneve_get_rx_port(struct net_device *netdev);
+#else
+static inline void geneve_get_rx_port(struct net_device *netdev)
+{
+}
+#endif
+
 #ifdef CONFIG_INET
 struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
u8 name_assign_type, u16 dst_port);
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ANNOUNCE] Another round of NetDev 1.1 updates

2015-12-14 Thread Pablo Neira Ayuso
Hi!

We would like to share with you another round of incremental updates
on accepted sessions in netdev 1.1, the community-driven Linux
networking conference held back-to-back with netconf in Sevilla,
Spain, February 10-12, 2016.

= Keynote =

* "Hardware Checksumming: Less is More" (David S. Miller)

= BoF =

* "Unlocking SR-IOV in Linux" (John Fastabend)

= Talks =

* "Measuring wifi performance across all Google Fiber customers"
  (Avery Pennarun)

* "Load balancing with nftables" (Laura Garcia)

= Tutorials =

* "Running Cellular Network Infrastructure on Linux" (Harald Welte)

Remember session proposal submission is open until Dec 20th.

Registration is open at: https://www.netdevconf.org/1.1/registration.html

 _
/ If you miss netdev 1.1, \
\ you'll regret!  /
 -
\   ^__^
 \  (oo)\___
(__)\   )\/\
||w |
|| ||

For more info, visit:
* netdev 1.1: https://www.netdevconf.org/1.1/
* netconf 2016: http://vger.kernel.org/netconf2016.html

We would also like to thank those that already confirmed sponsorship:
Cumulus Networks, Facebook, VmWare, Google, Mellanox, Mojatatu
Networks, OISF/Suricata and Zen Load Balancer. Thank you.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 0/5] Add support for Geneve udp port offload

2015-12-14 Thread Anjali Singhai Jain
This patch series adds new ndo ops for Geneve add/del port, so as
to help offload Geneve tunnel functionalities such as RX checksum,
RSS, filters etc.

i40e driver has been tested with the changes to make sure the offloads
happen.

We do understand that this is not the ideal solution and most likely
will be redone with a more generic offload framework.
But this certainly will enable us to start seeing benefits of the
accelerations for Geneve tunnels.

As a side note, we did find an existing issue in i40e driver where a
service task can modify tunnel data structures with no locks held to
help linearize access. A separate patch will be taking care of that issue.

A question out to the community is regarding the driver Kconfig parameters
for VxLAN and Geneve, it would be ideal to drop those if there is a way
to help resolve vxlan/geneve_get_rx_port symbols while the tunnel modules
are not loaded.

Performance numbers:
With the offloads enable on X722 devices with remote checksum enabled
and no other tuning in terms of cpu governer etc on my test machine:

With offload
Throughput: 5527Mbits/sec with a single thread
%cpu: ~43% per core with 4 threads

Without offload
Throughput: 2364Mbits/sec with a single thread
%cpu: ~99% per core with 4 threads

These numbers will get better for X722 as it is being worked. But
this does bring out the delta in terms of when the stack is notified
with csum_level 1 and CHECKSUM_UNNECESSARY vs not without the RX offload.

---
v2: Comment fix.
v3: Add Performance data.
v4: Comment fix and split a patch into two.
v5: Add cover letter.

Anjali Singhai Jain (5):
  geneve: Add geneve udp port offload for ethernet devices
  i40e: geneve tunnel offload support
  i40e: Kernel dependency update for i40e to support geneve offload
  geneve: Add geneve_get_rx_port support
  i40e: Call geneve_get_rx_port to get the existing Geneve ports

 drivers/net/ethernet/intel/Kconfig  |  10 ++
 drivers/net/ethernet/intel/i40e/i40e.h  |  16 +--
 drivers/net/ethernet/intel/i40e/i40e_main.c | 170 +++-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c |   8 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.h |   2 +-
 drivers/net/geneve.c|  47 
 include/linux/netdevice.h   |  20 +++-
 include/net/geneve.h|   8 ++
 8 files changed, 237 insertions(+), 44 deletions(-)

-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 3/5] i40e: Kernel dependency update for i40e to support geneve offload

2015-12-14 Thread Anjali Singhai Jain
Update the Kconfig file with dependency for supporting GENEVE tunnel
offloads.

Signed-off-by: Anjali Singhai Jain 
Signed-off-by: Kiran Patil 
---
 drivers/net/ethernet/intel/Kconfig | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/intel/Kconfig 
b/drivers/net/ethernet/intel/Kconfig
index 4163b16..fa593dd 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -280,6 +280,16 @@ config I40E_VXLAN
  Say Y here if you want to use Virtual eXtensible Local Area Network
  (VXLAN) in the driver.
 
+config I40E_GENEVE
+   bool "Generic Network Virtualization Encapsulation (GENEVE) Support"
+   depends on I40E && GENEVE && !(I40E=y && GENEVE=m)
+   default n
+   ---help---
+ This allows one to create GENEVE virtual interfaces that provide
+ Layer 2 Networks over Layer 3 Networks. GENEVE is often used
+ to tunnel virtual network infrastructure in virtualized environments.
+ Say Y here if you want to use GENEVE in the driver.
+
 config I40E_DCB
bool "Data Center Bridging (DCB) Support"
default n
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 2/5] i40e: geneve tunnel offload support

2015-12-14 Thread Anjali Singhai Jain
This patch adds driver hooks to implement ndo_ops to add/del
udp port in the HW to identify GENEVE tunnels.

Signed-off-by: Anjali Singhai Jain 
Signed-off-by: Kiran Patil 
---
 drivers/net/ethernet/intel/i40e/i40e.h  |  16 +--
 drivers/net/ethernet/intel/i40e/i40e_main.c | 167 ++--
 drivers/net/ethernet/intel/i40e/i40e_txrx.c |   8 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.h |   2 +-
 4 files changed, 150 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h 
b/drivers/net/ethernet/intel/i40e/i40e.h
index b7bc014..c202f9b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -245,6 +245,11 @@ struct i40e_tc_configuration {
struct i40e_tc_info tc_info[I40E_MAX_TRAFFIC_CLASS];
 };
 
+struct i40e_udp_port_config {
+   __be16 index;
+   u8 type;
+};
+
 /* struct that defines the Ethernet device */
 struct i40e_pf {
struct pci_dev *pdev;
@@ -281,11 +286,9 @@ struct i40e_pf {
u32 fd_atr_cnt;
u32 fd_tcp_rule;
 
-#ifdef CONFIG_I40E_VXLAN
-   __be16  vxlan_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
-   u16 pending_vxlan_bitmap;
+   struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
+   u16 pending_udp_bitmap;
 
-#endif
enum i40e_interrupt_policy int_policy;
u16 rx_itr_default;
u16 tx_itr_default;
@@ -322,9 +325,7 @@ struct i40e_pf {
 #define I40E_FLAG_FD_ATR_ENABLED   BIT_ULL(22)
 #define I40E_FLAG_PTP  BIT_ULL(25)
 #define I40E_FLAG_MFP_ENABLED  BIT_ULL(26)
-#ifdef CONFIG_I40E_VXLAN
-#define I40E_FLAG_VXLAN_FILTER_SYNCBIT_ULL(27)
-#endif
+#define I40E_FLAG_UDP_FILTER_SYNC  BIT_ULL(27)
 #define I40E_FLAG_PORT_ID_VALIDBIT_ULL(28)
 #define I40E_FLAG_DCB_CAPABLE  BIT_ULL(29)
 #define I40E_FLAG_RSS_AQ_CAPABLE   BIT_ULL(31)
@@ -336,6 +337,7 @@ struct i40e_pf {
 #define I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE  BIT_ULL(38)
 #define I40E_FLAG_LINK_POLLING_ENABLED BIT_ULL(39)
 #define I40E_FLAG_VEB_MODE_ENABLED BIT_ULL(40)
+#define I40E_FLAG_GENEVE_OFFLOAD_CAPABLE   BIT_ULL(41)
 #define I40E_FLAG_NO_PCI_LINK_CHECKBIT_ULL(42)
 
/* tracks features that get auto disabled by errors */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index b118deb..81a6693 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -27,9 +27,12 @@
 /* Local includes */
 #include "i40e.h"
 #include "i40e_diag.h"
-#ifdef CONFIG_I40E_VXLAN
+#if IS_ENABLED(CONFIG_VXLAN)
 #include 
 #endif
+#if IS_ENABLED(CONFIG_GENEVE)
+#include 
+#endif
 
 const char i40e_driver_name[] = "i40e";
 static const char i40e_driver_string[] =
@@ -7036,30 +7039,30 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
i40e_flush(hw);
 }
 
-#ifdef CONFIG_I40E_VXLAN
 /**
- * i40e_sync_vxlan_filters_subtask - Sync the VSI filter list with HW
+ * i40e_sync_udp_filters_subtask - Sync the VSI filter list with HW
  * @pf: board private structure
  **/
-static void i40e_sync_vxlan_filters_subtask(struct i40e_pf *pf)
+static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
 {
+#if IS_ENABLED(CONFIG_VXLAN) || IS_ENABLED(CONFIG_GENEVE)
struct i40e_hw *hw = >hw;
i40e_status ret;
__be16 port;
int i;
 
-   if (!(pf->flags & I40E_FLAG_VXLAN_FILTER_SYNC))
+   if (!(pf->flags & I40E_FLAG_UDP_FILTER_SYNC))
return;
 
-   pf->flags &= ~I40E_FLAG_VXLAN_FILTER_SYNC;
+   pf->flags &= ~I40E_FLAG_UDP_FILTER_SYNC;
 
for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
-   if (pf->pending_vxlan_bitmap & BIT_ULL(i)) {
-   pf->pending_vxlan_bitmap &= ~BIT_ULL(i);
-   port = pf->vxlan_ports[i];
+   if (pf->pending_udp_bitmap & BIT_ULL(i)) {
+   pf->pending_udp_bitmap &= ~BIT_ULL(i);
+   port = pf->udp_ports[i].index;
if (port)
ret = i40e_aq_add_udp_tunnel(hw, ntohs(port),
-I40E_AQC_TUNNEL_TYPE_VXLAN,
+pf->udp_ports[i].type,
 NULL, NULL);
else
ret = i40e_aq_del_udp_tunnel(hw, i, NULL);
@@ -7072,13 +7075,13 @@ static void i40e_sync_vxlan_filters_subtask(struct 
i40e_pf *pf)
 i40e_stat_str(>hw, ret),
 i40e_aq_str(>hw,
pf->hw.aq.asq_last_status));
-   pf->vxlan_ports[i] = 0;
+   

[PATCH v5 1/5] geneve: Add geneve udp port offload for ethernet devices

2015-12-14 Thread Anjali Singhai Jain
Add ndo_ops to add/del UDP ports to a device that supports geneve
offload.

v2: Comment fix.

Signed-off-by: Anjali Singhai Jain 
Signed-off-by: Kiran Patil 
---
 drivers/net/geneve.c  | 23 +++
 include/linux/netdevice.h | 20 +++-
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 0750d7a..89325e4 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -380,8 +380,11 @@ static struct socket *geneve_create_sock(struct net *net, 
bool ipv6,
 
 static void geneve_notify_add_rx_port(struct geneve_sock *gs)
 {
+   struct net_device *dev;
struct sock *sk = gs->sock->sk;
+   struct net *net = sock_net(sk);
sa_family_t sa_family = sk->sk_family;
+   __be16 port = inet_sk(sk)->inet_sport;
int err;
 
if (sa_family == AF_INET) {
@@ -390,6 +393,14 @@ static void geneve_notify_add_rx_port(struct geneve_sock 
*gs)
pr_warn("geneve: udp_add_offload failed with status 
%d\n",
err);
}
+
+   rcu_read_lock();
+   for_each_netdev_rcu(net, dev) {
+   if (dev->netdev_ops->ndo_add_geneve_port)
+   dev->netdev_ops->ndo_add_geneve_port(dev, sa_family,
+port);
+   }
+   rcu_read_unlock();
 }
 
 static int geneve_hlen(struct genevehdr *gh)
@@ -530,8 +541,20 @@ static struct geneve_sock *geneve_socket_create(struct net 
*net, __be16 port,
 
 static void geneve_notify_del_rx_port(struct geneve_sock *gs)
 {
+   struct net_device *dev;
struct sock *sk = gs->sock->sk;
+   struct net *net = sock_net(sk);
sa_family_t sa_family = sk->sk_family;
+   __be16 port = inet_sk(sk)->inet_sport;
+
+   rcu_read_lock();
+   for_each_netdev_rcu(net, dev) {
+   if (dev->netdev_ops->ndo_del_geneve_port)
+   dev->netdev_ops->ndo_del_geneve_port(dev, sa_family,
+port);
+   }
+
+   rcu_read_unlock();
 
if (sa_family == AF_INET)
udp_del_offload(>udp_offloads);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1bb21ff..82065a7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1013,6 +1013,19 @@ typedef u16 (*select_queue_fallback_t)(struct net_device 
*dev,
  * a new port starts listening. The operation is protected by the
  * vxlan_net->sock_lock.
  *
+ * void (*ndo_add_geneve_port)(struct net_device *dev,
+ *   sa_family_t sa_family, __be16 port);
+ * Called by geneve to notify a driver about the UDP port and socket
+ * address family that geneve is listnening to. It is called only when
+ * a new port starts listening. The operation is protected by the
+ * geneve_net->sock_lock.
+ *
+ * void (*ndo_del_geneve_port)(struct net_device *dev,
+ *   sa_family_t sa_family, __be16 port);
+ * Called by geneve to notify the driver about a UDP port and socket
+ * address family that geneve is not listening to anymore. The operation
+ * is protected by the geneve_net->sock_lock.
+ *
  * void (*ndo_del_vxlan_port)(struct  net_device *dev,
  *   sa_family_t sa_family, __be16 port);
  * Called by vxlan to notify the driver about a UDP port and socket
@@ -1217,7 +1230,12 @@ struct net_device_ops {
void(*ndo_del_vxlan_port)(struct  net_device *dev,
  sa_family_t sa_family,
  __be16 port);
-
+   void(*ndo_add_geneve_port)(struct  net_device *dev,
+  sa_family_t sa_family,
+  __be16 port);
+   void(*ndo_del_geneve_port)(struct  net_device *dev,
+  sa_family_t sa_family,
+  __be16 port);
void*   (*ndo_dfwd_add_station)(struct net_device *pdev,
struct net_device *dev);
void(*ndo_dfwd_del_station)(struct net_device *pdev,
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 5/5] i40e: Call geneve_get_rx_port to get the existing Geneve ports

2015-12-14 Thread Anjali Singhai Jain
This patch adds a call to geneve_get_rx_port in i40e so that when it
comes up it can learn about the existing geneve tunnels.

Signed-off-by: Anjali Singhai Jain 
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 81a6693..11059be 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -5339,6 +5339,9 @@ int i40e_open(struct net_device *netdev)
 #ifdef CONFIG_I40E_VXLAN
vxlan_get_rx_port(netdev);
 #endif
+#ifdef CONFIG_I40E_GENEVE
+   geneve_get_rx_port(netdev);
+#endif
 
return 0;
 }
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   >