[PATCH v5 net-next 4/4] ila: Add generic ILA translation facility
This patch implements an ILA tanslation table. This table can be configured with identifier to locator mappings, and can be be queried to resolve a mapping. Queries can be parameterized based on interface, direction (incoming or outoing), and matching locator. The table is implemented using rhashtable and is configured via netlink (through "ip ila .." in iproute). The table may be used as alternative means to do do ILA tanslations other than the lw tunnels Signed-off-by: Tom Herbert--- include/net/ila.h | 18 ++ include/uapi/linux/ila.h | 22 ++ net/ipv6/ila/Makefile | 2 +- net/ipv6/ila/ila.h| 2 + net/ipv6/ila/ila_common.c | 8 + net/ipv6/ila/ila_xlat.c | 680 ++ 6 files changed, 731 insertions(+), 1 deletion(-) create mode 100644 include/net/ila.h create mode 100644 net/ipv6/ila/ila_xlat.c diff --git a/include/net/ila.h b/include/net/ila.h new file mode 100644 index 000..9f4f43e --- /dev/null +++ b/include/net/ila.h @@ -0,0 +1,18 @@ +/* + * ILA kernel interface + * + * Copyright (c) 2015 Tom Herbert + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +#ifndef _NET_ILA_H +#define _NET_ILA_H + +int ila_xlat_outgoing(struct sk_buff *skb); +int ila_xlat_incoming(struct sk_buff *skb); + +#endif /* _NET_ILA_H */ diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h index 7ed9e67..abde7bb 100644 --- a/include/uapi/linux/ila.h +++ b/include/uapi/linux/ila.h @@ -3,13 +3,35 @@ #ifndef _UAPI_LINUX_ILA_H #define _UAPI_LINUX_ILA_H +/* NETLINK_GENERIC related info */ +#define ILA_GENL_NAME "ila" +#define ILA_GENL_VERSION 0x1 + enum { ILA_ATTR_UNSPEC, ILA_ATTR_LOCATOR, /* u64 */ + ILA_ATTR_IDENTIFIER,/* u64 */ + ILA_ATTR_LOCATOR_MATCH, /* u64 */ + ILA_ATTR_IFINDEX, /* s32 */ + ILA_ATTR_DIR, /* u32 */ __ILA_ATTR_MAX, }; #define ILA_ATTR_MAX (__ILA_ATTR_MAX - 1) +enum { + ILA_CMD_UNSPEC, + ILA_CMD_ADD, + ILA_CMD_DEL, + ILA_CMD_GET, + + __ILA_CMD_MAX, +}; + +#define ILA_CMD_MAX(__ILA_CMD_MAX - 1) + +#define ILA_DIR_IN (1 << 0) +#define ILA_DIR_OUT(1 << 1) + #endif /* _UAPI_LINUX_ILA_H */ diff --git a/net/ipv6/ila/Makefile b/net/ipv6/ila/Makefile index 31d136b..4b32e59 100644 --- a/net/ipv6/ila/Makefile +++ b/net/ipv6/ila/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_IPV6_ILA) += ila.o -ila-objs := ila_common.o ila_lwt.o +ila-objs := ila_common.o ila_lwt.o ila_xlat.o diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h index b94081f..28542cb 100644 --- a/net/ipv6/ila/ila.h +++ b/net/ipv6/ila/ila.h @@ -42,5 +42,7 @@ void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p); int ila_lwt_init(void); void ila_lwt_fini(void); +int ila_xlat_init(void); +void ila_xlat_fini(void); #endif /* __ILA_H */ diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index 64e1904..32dc9aa 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -80,12 +80,20 @@ static int __init ila_init(void) if (ret) goto fail_lwt; + ret = ila_xlat_init(); + if (ret) + goto fail_xlat; + + return 0; +fail_xlat: + ila_lwt_fini(); fail_lwt: return ret; } static void __exit ila_fini(void) { + ila_xlat_fini(); ila_lwt_fini(); } diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c new file mode 100644 index 000..1e1eaf3 --- /dev/null +++ b/net/ipv6/ila/ila_xlat.c @@ -0,0 +1,680 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ila.h" + +struct ila_xlat_params { + struct ila_params ip; + __be64 identifier; + int ifindex; + unsigned int dir; +}; + +struct ila_map { + struct ila_xlat_params p; + struct rhash_head node; + struct ila_map __rcu *next; + struct rcu_head rcu; +}; + +static unsigned int ila_net_id; + +struct ila_net { + struct rhashtable rhash_table; + spinlock_t *locks; /* Bucket locks for entry manipulation */ + unsigned int locks_mask; + bool hooks_registered; +}; + +#defineLOCKS_PER_CPU 10 + +static int alloc_ila_locks(struct ila_net *ilan, gfp_t gfp) +{ + unsigned int i, size; + unsigned int nr_pcpus = num_possible_cpus(); + + nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); + size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU); + + if (sizeof(spinlock_t) != 0) { +#ifdef CONFIG_NUMA + if (size * sizeof(spinlock_t) > PAGE_SIZE && +
[PATCH v5 net-next 1/4] ila: Create net/ipv6/ila directory
Create ila directory in preparation for supporting other hooks in the kernel than LWT for doing ILA. This includes: - Moving ila.c to ila/ila_lwt.c - Splitting out some common functions into ila_common.c Signed-off-by: Tom Herbert--- net/ipv6/Makefile | 2 +- net/ipv6/ila.c| 229 -- net/ipv6/ila/Makefile | 7 ++ net/ipv6/ila/ila.h| 46 ++ net/ipv6/ila/ila_common.c | 95 +++ net/ipv6/ila/ila_lwt.c| 152 ++ 6 files changed, 301 insertions(+), 230 deletions(-) delete mode 100644 net/ipv6/ila.c create mode 100644 net/ipv6/ila/Makefile create mode 100644 net/ipv6/ila/ila.h create mode 100644 net/ipv6/ila/ila_common.c create mode 100644 net/ipv6/ila/ila_lwt.c diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2c900c7..2fbd90b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -34,7 +34,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o obj-$(CONFIG_IPV6_MIP6) += mip6.o -obj-$(CONFIG_IPV6_ILA) += ila.o +obj-$(CONFIG_IPV6_ILA) += ila/ obj-$(CONFIG_NETFILTER)+= netfilter/ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o diff --git a/net/ipv6/ila.c b/net/ipv6/ila.c deleted file mode 100644 index 1a6852e..000 --- a/net/ipv6/ila.c +++ /dev/null @@ -1,229 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct ila_params { - __be64 locator; - __be64 locator_match; - __wsum csum_diff; -}; - -static inline struct ila_params *ila_params_lwtunnel( - struct lwtunnel_state *lwstate) -{ - return (struct ila_params *)lwstate->data; -} - -static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) -{ - __be32 diff[] = { - ~from[0], ~from[1], to[0], to[1], - }; - - return csum_partial(diff, sizeof(diff), 0); -} - -static inline __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) -{ - if (*(__be64 *)>daddr == p->locator_match) - return p->csum_diff; - else - return compute_csum_diff8((__be32 *)>daddr, - (__be32 *)>locator); -} - -static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p) -{ - __wsum diff; - struct ipv6hdr *ip6h = ipv6_hdr(skb); - size_t nhoff = sizeof(struct ipv6hdr); - - /* First update checksum */ - switch (ip6h->nexthdr) { - case NEXTHDR_TCP: - if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr { - struct tcphdr *th = (struct tcphdr *) - (skb_network_header(skb) + nhoff); - - diff = get_csum_diff(ip6h, p); - inet_proto_csum_replace_by_diff(>check, skb, - diff, true); - } - break; - case NEXTHDR_UDP: - if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr { - struct udphdr *uh = (struct udphdr *) - (skb_network_header(skb) + nhoff); - - if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { - diff = get_csum_diff(ip6h, p); - inet_proto_csum_replace_by_diff(>check, skb, - diff, true); - if (!uh->check) - uh->check = CSUM_MANGLED_0; - } - } - break; - case NEXTHDR_ICMP: - if (likely(pskb_may_pull(skb, -nhoff + sizeof(struct icmp6hdr { - struct icmp6hdr *ih = (struct icmp6hdr *) - (skb_network_header(skb) + nhoff); - - diff = get_csum_diff(ip6h, p); - inet_proto_csum_replace_by_diff(>icmp6_cksum, skb, - diff, true); - } - break; - } - - /* Now change destination address */ - *(__be64 *)>daddr = p->locator; -} - -static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - - if (skb->protocol != htons(ETH_P_IPV6)) - goto drop; - - update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate)); - - return dst->lwtstate->orig_output(net, sk, skb); - -drop: - kfree_skb(skb); - return -EINVAL; -} - -static int ila_input(struct sk_buff *skb) -{ -
[PATCH v5 net-next 2/4] rhashtable: add function to replace an element
Add the rhashtable_replace_fast function. This replaces one object in the table with another atomically. The hashes of the new and old objects must be equal. Signed-off-by: Tom Herbert--- include/linux/rhashtable.h | 82 ++ 1 file changed, 82 insertions(+) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 843ceca..77deece 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -819,4 +819,86 @@ out: return err; } +/* Internal function, please use rhashtable_replace_fast() instead */ +static inline int __rhashtable_replace_fast( + struct rhashtable *ht, struct bucket_table *tbl, + struct rhash_head *obj_old, struct rhash_head *obj_new, + const struct rhashtable_params params) +{ + struct rhash_head __rcu **pprev; + struct rhash_head *he; + spinlock_t *lock; + unsigned int hash; + int err = -ENOENT; + + /* Minimally, the old and new objects must have same hash +* (which should mean identifiers are the same). +*/ + hash = rht_head_hashfn(ht, tbl, obj_old, params); + if (hash != rht_head_hashfn(ht, tbl, obj_new, params)) + return -EINVAL; + + lock = rht_bucket_lock(tbl, hash); + + spin_lock_bh(lock); + + pprev = >buckets[hash]; + rht_for_each(he, tbl, hash) { + if (he != obj_old) { + pprev = >next; + continue; + } + + rcu_assign_pointer(obj_new->next, obj_old->next); + rcu_assign_pointer(*pprev, obj_new); + err = 0; + break; + } + + spin_unlock_bh(lock); + + return err; +} + +/** + * rhashtable_replace_fast - replace an object in hash table + * @ht:hash table + * @obj_old: pointer to hash head inside object being replaced + * @obj_new: pointer to hash head inside object which is new + * @params:hash table parameters + * + * Replacing an object doesn't affect the number of elements in the hash table + * or bucket, so we don't need to worry about shrinking or expanding the + * table here. + * + * Returns zero on success, -ENOENT if the entry could not be found, + * -EINVAL if hash is not the same for the old and new objects. + */ +static inline int rhashtable_replace_fast( + struct rhashtable *ht, struct rhash_head *obj_old, + struct rhash_head *obj_new, + const struct rhashtable_params params) +{ + struct bucket_table *tbl; + int err; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Because we have already taken (and released) the bucket +* lock in old_tbl, if we find that future_tbl is not yet +* visible then that guarantees the entry to still be in +* the old tbl if it exists. +*/ + while ((err = __rhashtable_replace_fast(ht, tbl, obj_old, + obj_new, params)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; + + rcu_read_unlock(); + + return err; +} + #endif /* _LINUX_RHASHTABLE_H */ -- 2.4.6 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 net-next 3/4] netlink: add a start callback for starting a netlink dump
The start callback allows the caller to set up a context for the dump callbacks. Presumably, the context can then be destroyed in the done callback. Signed-off-by: Tom Herbert--- include/linux/netlink.h | 2 ++ include/net/genetlink.h | 2 ++ net/netlink/af_netlink.c | 4 net/netlink/genetlink.c | 16 4 files changed, 24 insertions(+) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 639e9b8..0b41959 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -131,6 +131,7 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask) struct netlink_callback { struct sk_buff *skb; const struct nlmsghdr *nlh; + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); @@ -153,6 +154,7 @@ struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags); struct netlink_dump_control { + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *skb, struct netlink_callback *); int (*done)(struct netlink_callback *); void *data; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 1b6b6dc..43c0e77 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -114,6 +114,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) * @flags: flags * @policy: attribute validation policy * @doit: standard command callback + * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps * @ops_list: operations list @@ -122,6 +123,7 @@ struct genl_ops { const struct nla_policy *policy; int(*doit)(struct sk_buff *skb, struct genl_info *info); + int(*start)(struct netlink_callback *cb); int(*dumpit)(struct sk_buff *skb, struct netlink_callback *cb); int(*done)(struct netlink_callback *cb); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 59651af..81dc1bb 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2915,6 +2915,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb = >cb; memset(cb, 0, sizeof(*cb)); + cb->start = control->start; cb->dump = control->dump; cb->done = control->done; cb->nlh = nlh; @@ -2927,6 +2928,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, mutex_unlock(nlk->cb_mutex); + if (cb->start) + cb->start(cb); + ret = netlink_dump(sk); sock_put(sk); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index bc0e504..8e63662 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -513,6 +513,20 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); +static int genl_lock_start(struct netlink_callback *cb) +{ + /* our ops are always const - netlink API doesn't propagate that */ + const struct genl_ops *ops = cb->data; + int rc = 0; + + if (ops->start) { + genl_lock(); + rc = ops->start(cb); + genl_unlock(); + } + return rc; +} + static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { /* our ops are always const - netlink API doesn't propagate that */ @@ -577,6 +591,7 @@ static int genl_family_rcv_msg(struct genl_family *family, .module = family->module, /* we have const, but the netlink API doesn't */ .data = (void *)ops, + .start = genl_lock_start, .dump = genl_lock_dumpit, .done = genl_lock_done, }; @@ -588,6 +603,7 @@ static int genl_family_rcv_msg(struct genl_family *family, } else { struct netlink_dump_control c = { .module = family->module, + .start = ops->start, .dump = ops->dumpit, .done = ops->done, }; -- 2.4.6 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 net-next 0/4] ila: Optimization to preserve value of early demux
In the current implementation of ILA, LWT is used to perform translation on both the input and output paths. This is functional, however there is a big performance hit in the receive path. Early demux occurs before the routing lookup (a hit actually obviates the route lookup). Therefore the stack currently performs early demux before translation so that a local connection with ILA addresses is never matched. Note that this issue is not just with ILA, but pretty much any translated or encapsulated packet handled by LWT would miss the opportunity for early demux. Solving the general problem seems non trivial since we would need to move the route lookup before early demx thereby mitigating the value. This patch set addresses the issue for ILA by adding a fast locator lookup that occurs before early demux. This done by hooking in to NF_INET_PRE_ROUTING For the backend we implement an rhashtable that contains identifier to locator to mappings. The table also allows more specific matches that include original locator and interface. This patch set: - Add an rhashtable function to atomically replace and element. This is useful to implement sub-trees from a table entry without needing to use a special anchor structure as the table entry. - Add a start callback for starting a netlink dump. - Creates an ila directory under net/ipv6 and moves ila.c to it. ila.c is split into ila_common.c and ila_lwt.c. - Implement a table to do identifier->locator mapping. This is an rhashtable (in ila_xlat.c). - Configuration for the table with netlink. - Add a hook into NF_INET_PRE_ROUTING to perform ILA translation before early demux. Changes in v2: - Use iptables targets instead of a new xfrm function Changes in v3: - Add __rcu to next pointer in struct ila_map Changes in v4: - Use hook for NF_INET_PRE_ROUTING Changed in v5: - Register hooks per namespace using nf_register_net_hooks - Only register hooks when first mapping is actually added Testing: Running 200 netperf TCP_RR streams No ILA, baseline 79.26% CPU utilization 1678282 tps 104/189/390 50/90/99% latencies ILA before fix (LWT on both input and output) 81.91% CPU utilization 1464723 tps (-14.5% from baseline) 121/215/411 50/90/99% latencies ILA after fix 80.62% CPU utilization 1622985 (-3.4% from baseline) 110/191/347 50/90/99% latencies Tom Herbert (4): ila: Create net/ipv6/ila directory rhashtable: add function to replace an element netlink: add a start callback for starting a netlink dump ila: Add generic ILA translation facility include/linux/netlink.h| 2 + include/linux/rhashtable.h | 82 ++ include/net/genetlink.h| 2 + include/net/ila.h | 18 ++ include/uapi/linux/ila.h | 22 ++ net/ipv6/Makefile | 2 +- net/ipv6/ila.c | 229 --- net/ipv6/ila/Makefile | 7 + net/ipv6/ila/ila.h | 48 net/ipv6/ila/ila_common.c | 103 +++ net/ipv6/ila/ila_lwt.c | 152 ++ net/ipv6/ila/ila_xlat.c| 680 + net/netlink/af_netlink.c | 4 + net/netlink/genetlink.c| 16 ++ 14 files changed, 1137 insertions(+), 230 deletions(-) create mode 100644 include/net/ila.h delete mode 100644 net/ipv6/ila.c create mode 100644 net/ipv6/ila/Makefile create mode 100644 net/ipv6/ila/ila.h create mode 100644 net/ipv6/ila/ila_common.c create mode 100644 net/ipv6/ila/ila_lwt.c create mode 100644 net/ipv6/ila/ila_xlat.c -- 2.4.6 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] net: emac: emac gigabit ethernet controller driver
On 14/12/15 16:19, Gilad Avidov wrote: [snip] > + "sgmii_irq"; > + qcom,emac-gpio-mdc = < 123 0>; > + qcom,emac-gpio-mdio = < 124 0>; > + qcom,emac-tstamp-en; > + qcom,emac-ptp-frac-ns-adj = <12500 1>; > + phy-addr = <0>; Please use the standard Ethernet PHY and MDIO device tree bindings to describe your MAC to PHY connection here, that includes using a phy-connection-type property to describe the (x)MII lanes. [snip] > +/* EMAC_MAC_CTRL */ > +#define SINGLE_PAUSE_MODE 0x1000 > +#define DEBUG_MODE 0x800 > +#define BROAD_EN 0x400 > +#define MULTI_ALL0x200 > +#define RX_CHKSUM_EN 0x100 > +#define HUGE 0x80 > +#define SPEED_BMSK0x30 > +#define SPEED_SHFT 20 > +#define SIMR 0x8 > +#define TPAUSE 0x1 > +#define PROM_MODE 0x8000 > +#define VLAN_STRIP 0x4000 > +#define PRLEN_BMSK 0x3c00 > +#define PRLEN_SHFT 10 > +#define HUGEN0x200 > +#define FLCHK0x100 > +#define PCRCE 0x80 > +#define CRCE 0x40 > +#define FULLD 0x20 > +#define MAC_LP_EN 0x10 > +#define RXFC 0x8 > +#define TXFC 0x4 > +#define RXEN 0x2 > +#define TXEN 0x1 BIT(x)? which would avoid making this reverse christmas tree, I know this is the time of year though. [snip] > +/* DMA address */ > +#define DMA_ADDR_HI_MASK 0xULL > +#define DMA_ADDR_LO_MASK 0xULL > + > +#define EMAC_DMA_ADDR_HI(_addr) \ > + ((u32)(((u64)(_addr) & DMA_ADDR_HI_MASK) >> 32)) > +#define EMAC_DMA_ADDR_LO(_addr) \ > + ((u32)((u64)(_addr) & DMA_ADDR_LO_MASK)) The kernel provides helpers for that: upper_32bits and lower_32bits(). [snip] > +struct emac_skb_cb { > + u32 tpd_idx; > + unsigned long jiffies; > +}; > + > +struct emac_tx_ts_cb { > + u32 sec; > + u32 ns; > +}; > + > +#define EMAC_SKB_CB(skb) ((struct emac_skb_cb *)(skb)->cb) > +#define EMAC_TX_TS_CB(skb) ((struct emac_tx_ts_cb *)(skb)->cb) Should not these two have different offsets within skb->cb in case they both end-up being added to the same SKB? [snip] > +static void emac_mac_irq_enable(struct emac_adapter *adpt) > +{ > + int i; > + > + for (i = 0; i < EMAC_NUM_CORE_IRQ; i++) { > + struct emac_irq *irq = >irq[i]; > + const struct emac_irq_config*irq_cfg = _irq_cfg_tbl[i]; > + > + writel_relaxed(~DIS_INT, adpt->base + irq_cfg->status_reg); > + writel_relaxed(irq->mask, adpt->base + irq_cfg->mask_reg); > + } > + > + wmb(); /* ensure that irq and ptp setting are flushed to HW */ Would not using writel() make the appropriate thing here instead of using _relaxed which has no barrier? [snip] > + mta = readl_relaxed(adpt->base + EMAC_HASH_TAB_REG0 + (reg << 2)); > + mta |= (0x1 << bit); > + writel_relaxed(mta, adpt->base + EMAC_HASH_TAB_REG0 + (reg << 2)); > + wmb(); /* ensure that the mac address is flushed to HW */ This is getting too much here, just use the correct I/O accessor for your platform, period. [snip] > + > + /* enable RX/TX Flow Control */ > + switch (phy->cur_fc_mode) { > + case EMAC_FC_FULL: > + mac |= (TXFC | RXFC); > + break; > + case EMAC_FC_RX_PAUSE: > + mac |= RXFC; > + break; > + case EMAC_FC_TX_PAUSE: > + mac |= TXFC; > + break; > + default: > + break; > + } > + > + /* setup link speed */ > + mac &= ~SPEED_BMSK; > + switch (phy->link_speed) { > + case EMAC_LINK_SPEED_1GB_FULL: > + mac |= ((emac_mac_speed_1000 << SPEED_SHFT) & SPEED_BMSK); > + csr1 |= FREQ_MODE; > +
Re: [PATCH net] pptp: validate sockaddr_len before binding
On 14.12.2015 23:58, Cong Wang wrote: > On Mon, Dec 14, 2015 at 2:45 PM, Hannes Frederic Sowa >wrote: >> diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c >> index fc69e41d09506e..f9ffdf070ad807 100644 >> --- a/drivers/net/ppp/pptp.c >> +++ b/drivers/net/ppp/pptp.c >> @@ -419,6 +419,9 @@ static int pptp_bind(struct socket *sock, struct >> sockaddr *uservaddr, >> struct pptp_opt *opt = >proto.pptp; >> int error = 0; >> >> + if (sockaddr_len < sizeof(*sp)) >> + return -EINVAL; >> + > > I sent a very similar patch: > https://patchwork.ozlabs.org/patch/556663/ Ah, thanks. Did not notice. The connect() part is also already in my queue, but I don't think it solves the use-after-free. The RCU implementation of callid_sock seems broken to me. David, discard my patch. Thanks, Hannes -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
WARN trace - skb_warn_bad_offload - vxlan - large udp packet - udp checksum disabled
Using a slightly modified version of udpspam (see diff below - hopefully not mangled by corporate email servers), where I set the SO_NO_CHECK socket option and can specify a large buffer size, I can reliably get the following WARN trace. I have reproduced this on both ixgbe and i40e drivers using "udpspam-no-check 6000". It looks to me like this is in the Tx path before we get to the actual NIC drivers, but I may be wrong. [ 1757.644324] [ cut here ] [ 1757.644333] WARNING: CPU: 22 PID: 5537 at net/core/dev.c:2423 skb_warn_bad_offload+0x104/0x111() [ 1757.644340] ixgbe: caps=(0x080660314bb3, 0x) len=6092 data_len=6000 gso_size=1528 gso_type=1026 ip_summed=0 [ 1757.644343] Modules linked in: nfnetlink_queue nfnetlink_log nfnetlink vxlan ip6_udp_tunnel udp_tunnel rfcomm xt_CHECKSUM bnep bluetooth rfkill tun fuse ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_mangle iptable_security iptable_raw x86_pkg_temp_thermal coretemp kvm_intel kvm joydev iTCO_wdt ipmi_devintf iTCO_vendor_support irqbypass crct10dif_pclmul ixgbe igb crc32_pclmul ipmi_si ptp pps_core sb_edac lpc_ich crc32c_intel pcspkr edac_core i2c_i801 mfd_core mdio ipmi_msghandler mei_me shpchp tpm_tis ioatdma mei dca wmi tpm binfmt_misc uinput mgag200 i2c_algo_bit drm_kms_helper [ 1757.63] ttm drm isci libsas firewire_ohci firewire_core i2c_core scsi_transport_sas crc_itu_t [ 1757.644475] CPU: 22 PID: 5537 Comm: udpspam-no-chec Tainted: GW 4.4.0-rc3+ #1 [ 1757.644480] Hardware name: Intel Corporation S2600CO/S2600CO, BIOS SE5C600.86B.02.03.0003.041920141333 04/19/2014 [ 1757.644482] c92b03e4 88081907b410 8138f918 [ 1757.644488] 88081907b458 88081907b448 8109c036 8808196e9500 [ 1757.644494] 880816f6 0402 88080e8eb2ac [ 1757.644499] Call Trace: [ 1757.644509] [] dump_stack+0x44/0x5c [ 1757.644514] [] warn_slowpath_common+0x86/0xc0 [ 1757.644518] [] warn_slowpath_fmt+0x5c/0x80 [ 1757.644523] [] ? ___ratelimit+0x8c/0xf0 [ 1757.644539] [] skb_warn_bad_offload+0x104/0x111 [ 1757.644549] [] __skb_gso_segment+0x7f/0xd0 [ 1757.644563] [] validate_xmit_skb.isra.104.part.105+0x11f/0x2a0 [ 1757.644572] [] validate_xmit_skb_list+0x3b/0x60 [ 1757.644579] [] sch_direct_xmit+0xc1/0x1f0 [ 1757.644585] [] __dev_queue_xmit+0x21b/0x510 [ 1757.644589] [] dev_queue_xmit+0x10/0x20 [ 1757.644593] [] ip_finish_output2+0x23f/0x310 [ 1757.644598] [] ip_finish_output+0x139/0x1f0 [ 1757.644605] [] ? nf_hook_slow+0x76/0xd0 [ 1757.644610] [] ip_output+0x6e/0xe0 [ 1757.644615] [] ? __ip_local_out+0x42/0x100 [ 1757.644620] [] ? ip_fragment.constprop.49+0x80/0x80 [ 1757.644627] [] ip_local_out+0x35/0x40 [ 1757.644634] [] iptunnel_xmit+0x12d/0x150 [ 1757.644640] [] udp_tunnel_xmit_skb+0xea/0x100 [udp_tunnel] [ 1757.644648] [] vxlan_xmit_one+0xac6/0x1280 [vxlan] [ 1757.644659] [] ? vprintk_emit+0x2f2/0x4f0 [ 1757.644675] [] ? printk+0x5d/0x74 [ 1757.644681] [] ? warn_slowpath_common+0x95/0xc0 [ 1757.644688] [] vxlan_xmit+0x172/0xd44 [vxlan] [ 1757.644694] [] ? inet_gso_segment+0x163/0x360 [ 1757.644711] [] dev_hard_start_xmit+0x22e/0x3b0 [ 1757.644721] [] __dev_queue_xmit+0x414/0x510 [ 1757.644734] [] dev_queue_xmit+0x10/0x20 [ 1757.644747] [] ip_finish_output2+0x23f/0x310 [ 1757.644758] [] ip_finish_output+0x139/0x1f0 [ 1757.644763] [] ? nf_hook_slow+0x76/0xd0 [ 1757.644768] [] ip_output+0x6e/0xe0 [ 1757.644775] [] ? __ip_local_out+0x42/0x100 [ 1757.644780] [] ? ip_fragment.constprop.49+0x80/0x80 [ 1757.644785] [] ip_local_out+0x35/0x40 [ 1757.644793] [] ip_send_skb+0x19/0x40 [ 1757.644800] [] udp_send_skb+0x16d/0x270 [ 1757.644807] [] udp_sendmsg+0x2c8/0x9a0 [ 1757.644812] [] ? ip_reply_glue_bits+0x60/0x60 [ 1757.644825] [] inet_sendmsg+0x67/0xa0 [ 1757.644838] [] sock_sendmsg+0x38/0x50 [ 1757.644852] [] SYSC_sendto+0x102/0x190 [ 1757.644860] [] ? __audit_syscall_entry+0xaf/0x100 [ 1757.644867] [] ? do_audit_syscall_entry+0x66/0x70 [ 1757.644873] [] ? syscall_trace_enter_phase1+0x11f/0x140 [ 1757.644879] [] ? syscall_slow_exit_work+0x3f/0x9f [ 1757.644883] [] SyS_sendto+0xe/0x10 [ 1757.644890] [] entry_SYSCALL_64_fastpath+0x12/0x71 [ 1757.644895] ---[ end trace ec9dfd887c59f41f ]--- Here are the udpspam.c diffs from the original that I found at http://oss.sgi.com/archives/netdev/2001-10/txtmtEDzF63p0.txt --- udpspam.c 2015-12-14 16:56:18.287053786 -0800 +++ udpspam-no-check.c 2015-12-14 17:02:21.979047972 -0800 @@ -42,8 +42,8 @@ typedef unsigned long dword; /* Globals */ -#define PAYLOAD_SIZE 4 -static char payload[PAYLOAD_SIZE]; +static int payload_size = 4; +static char *payload; /* Socket functions
RE: [PATCH net-next] net, cgroup: cgroup_sk_updat_lock was missing initializer
> -Original Message- > From: David Miller [mailto:da...@davemloft.net] > Sent: Tuesday, December 15, 2015 3:21 > To: t...@kernel.org > Cc: Dexuan Cui; pa...@netfilter.org; ka...@trash.net; > kad...@blackhole.kfki.hu; dan...@iogearbox.net; daniel.wag...@bmw-carit.de; > nhor...@tuxdriver.com; lize...@huawei.com; han...@cmpxchg.org; > netdev@vger.kernel.org; netfilter-de...@vger.kernel.org; > coret...@netfilter.org; cgro...@vger.kernel.org; linux- > ker...@vger.kernel.org; kernel-t...@fb.com; nin...@fb.com > Subject: Re: [PATCH net-next] net, cgroup: cgroup_sk_updat_lock was missing > initializer > > From: Tejun Heo > Date: Mon, 14 Dec 2015 11:24:06 -0500 > > > bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") added global > > spinlock cgroup_sk_update_lock but erroneously skipped initializer > > leading to uninitialized spinlock warning. Fix it by using > > DEFINE_SPINLOCK(). > > > > Signed-off-by: Tejun Heo > > Reported-by: Dexuan Cui > > Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") > > Applied, thanks. Thanks! I can confirm it fixed the issue. Thanks, -- Dexuan -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] gianfar: Don't enable RX Filer if not supported
After commit 15bf176db1fb ("gianfar: Don't enable the Filer w/o the Parser"), 'TSEC' model controllers (for example as seen on MPC8541E) always have 8 bytes stripped from the front of received frames. Only 'eTSEC' gianfar controllers have the RX Filer capability (amongst other enhancements). Previously this was treated as always enabled for both 'TSEC' and 'eTSEC' controllers. In commit 15bf176db1fb ("gianfar: Don't enable the Filer w/o the Parser") a subtle change was made to the setting of 'uses_rxfcb' to effectively always set it (since 'rx_filer_enable' was always true). This had the side-effect of always stripping 8 bytes from the front of received frames on 'TSEC' type controllers. We now only enable the RX Filer capability on controller types that support it, thereby avoiding the issue for 'TSEC' type controllers. Reviewed-by: Chris PackhamReviewed-by: Mark Tomlinson Signed-off-by: Hamish Martin --- drivers/net/ethernet/freescale/gianfar.c | 8 +--- drivers/net/ethernet/freescale/gianfar.h | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 7cf8984..3e233d9 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -894,7 +894,8 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) FSL_GIANFAR_DEV_HAS_VLAN | FSL_GIANFAR_DEV_HAS_MAGIC_PACKET | FSL_GIANFAR_DEV_HAS_EXTENDED_HASH | -FSL_GIANFAR_DEV_HAS_TIMER; +FSL_GIANFAR_DEV_HAS_TIMER | +FSL_GIANFAR_DEV_HAS_RX_FILER; err = of_property_read_string(np, "phy-connection-type", ); @@ -1396,8 +1397,9 @@ static int gfar_probe(struct platform_device *ofdev) priv->rx_queue[i]->rxic = DEFAULT_RXIC; } - /* always enable rx filer */ - priv->rx_filer_enable = 1; + /* Always enable rx filer if available */ + priv->rx_filer_enable = + (priv->device_flags & FSL_GIANFAR_DEV_HAS_RX_FILER) ? 1 : 0; /* Enable most messages by default */ priv->msg_enable = (NETIF_MSG_IFUP << 1 ) - 1; /* use pritority h/w tx queue scheduling for single queue devices */ diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h index f266b20..cb77667 100644 --- a/drivers/net/ethernet/freescale/gianfar.h +++ b/drivers/net/ethernet/freescale/gianfar.h @@ -923,6 +923,7 @@ struct gfar { #define FSL_GIANFAR_DEV_HAS_BUF_STASHING 0x0400 #define FSL_GIANFAR_DEV_HAS_TIMER 0x0800 #define FSL_GIANFAR_DEV_HAS_WAKE_ON_FILER 0x1000 +#define FSL_GIANFAR_DEV_HAS_RX_FILER 0x2000 #if (MAXGROUPS == 2) #define DEFAULT_MAPPING0xAA -- 2.6.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
linux-next: manual merge of the net-next tree with the net tree
Hi all, Today's linux-next merge of the net-next tree got a conflict in: drivers/net/geneve.c between commit: a322a1bcf329 ("geneve: Fix IPv6 xmit stats update.") from the net tree and commit: abe492b4f50c ("geneve: UDP checksum configuration via netlink") from the net-next tree. I fixed it up (see below) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwells...@canb.auug.org.au diff --cc drivers/net/geneve.c index c2b79f5d1c89,0750d7a93878.. --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@@ -966,7 -984,10 +984,8 @@@ static netdev_tx_t geneve6_xmit_skb(str } err = udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, , , prio, ttl, - sport, geneve->dst_port, !udp_csum); + sport, geneve->dst_port, + !!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX)); - - iptunnel_xmit_stats(err, >stats, dev->tstats); return NETDEV_TX_OK; tx_error: -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH 1/1] net: sctp: dynamically enable or disable pf state
From: zyjzyj2...@gmail.com > Sent: 11 December 2015 09:06 ... > +pf_enable - INTEGER > + Enable or disable pf state. A value of pf_retrans > path_max_retrans > + also disables pf state. That is, one of both pf_enable and > + pf_retrans > path_max_retrans can disable pf state. Since pf_retrans > + and path_max_retrans can be changed by userspace application, sometimes > + user expects to disable pf state by the value of > + pf_retrans > path_max_retrans, but ocassionally the value of pf_retrans > + or path_max_retrans is changed by the user application, this pf state is > + enabled. As such, it is necessary to add this to dynamically enable > + and disable pf state. > + > + 1: Enable pf. > + > + 0: Disable pf. > + > + Default: 1 You ought to say what 'pf' is short for. David -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: use-after-free in sctp_do_sm
From: Vlad Yasevich > Sent: 11 December 2015 18:38 ... > > Found a similar place in abort primitive handling like in this last > > patch update, it's probably the issue you're still triggering. > > > > Also found another place that may lead to this use after free, in case > > we receive a packet with a chunk that has no data. > > > > Oh my.. :) > > Yes. This is what I was worried about... Anything that triggers > a DELTE_TCB command has to return a code that we can trap. > > The other way is to do what Dmitri suggested, but even there, we > need to be very careful. I'm always wary of anything that queues actions up for later processing. It is far too easy (as found here) to end up processing actions in invalid states, or to process actions in 'unusual' orders when specific events happen close together. I wonder how much fallout there'd be from getting the sctp code to immediately action things, instead of queuing the actions for later. It would certainly remove a lot of the unusual combinations of events. David -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] ath9k_htc: fix handling return value of ath9k_hw_calibrate
The function can return negative values in case of error. Its result should be then tested for such case. The problem has been detected using proposed semantic patch scripts/coccinelle/tests/assign_signed_to_unsigned.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2046107 Signed-off-by: Andrzej Hajda--- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index a680a97..fe1fd1a 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -834,7 +834,7 @@ void ath9k_htc_ani_work(struct work_struct *work) if (longcal || shortcal) common->ani.caldone = ath9k_hw_calibrate(ah, ah->curchan, - ah->rxchainmask, longcal); + ah->rxchainmask, longcal) > 0; ath9k_htc_ps_restore(priv); } -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] net/mlx4_core: fix handling return value of mlx4_slave_convert_port
The function can return negative values, so its result should be assigned to signed variable. The problem has been detected using proposed semantic patch scripts/coccinelle/tests/assign_signed_to_unsigned.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2046107 Signed-off-by: Andrzej Hajda--- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index da7f578..b46dbe2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4331,9 +4331,10 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, return -EOPNOTSUPP; ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; - ctrl->port = mlx4_slave_convert_port(dev, slave, ctrl->port); - if (ctrl->port <= 0) + err = mlx4_slave_convert_port(dev, slave, ctrl->port); + if (err <= 0) return -EINVAL; + ctrl->port = err; qpn = be32_to_cpu(ctrl->qpn) & 0xff; err = get_res(dev, slave, qpn, RES_QP, ); if (err) { -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: sbc8641: drop bogus PHY IRQ entries from DTS file
On Tue, 2015-08-12 at 22:44:02 UTC, Paul Gortmaker wrote: > This file was originally cloned off of the MPC8641D-HPCN reference > platform, which actually had a PHY IRQ line connected. However > this board does not. The bogus entry was largely inert and went > undetected until commit 321beec5047af83db90c88114b7e664b156f49fe > ("net: phy: Use interrupts when available in NOLINK state") was > added to the tree. > > With the above commit, the board fails to NFS boot since it sits > waiting for a PHY IRQ event that of course never arrives. Removing > the bogus entries from the DTS file fixes the issue. > > Cc: Andrew Lunn> Signed-off-by: Paul Gortmaker Applied to powerpc fixes, thanks. https://git.kernel.org/powerpc/c/5b01310cfc8d2302dcca1d8d cheers -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Information leak in pptp_bind
Hello, The following program leak various uninit garbage including kernel addresses and whatever is on kernel stack, in particular defeating ASLR. The issue is in pptp_bind which does not verify sockaddr_len #include #include #include #include #include #include #include #include #include #include #include int main(void) { struct sockaddr sa; unsigned len, i, try; int fd; for (try = 0; try < 5; try++) { fd = socket(AF_PPPOX, SOCK_RAW, PX_PROTO_PPTP); if (fd == -1) return; memset(, 0, sizeof(sa)); bind(fd, , 0); len = sizeof(sa); getsockname(fd, , ); for (i = 0; i < len; i++) printf("%02x", ((unsigned char*))[i]); printf("\n"); } return 0; } # ./a.out 180002004700c012833d0088b002405eddc66d2b 180002004800408bf13a0088b002405eddc66d2b 18000200490080a5f13a0088b002405eddc66d2b 180002004a8ff13a0088b002405eddc66d2b 180002004b008096f13a0088b002405eddc66d2b -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/6] netfilter: nf_dup: add missing dependencies with NF_CONNTRACK
CONFIG_NF_CONNTRACK=m CONFIG_NF_DUP_IPV4=y results in: net/built-in.o: In function `nf_dup_ipv4': >> (.text+0xd434f): undefined reference to `nf_conntrack_untracked' Reported-by: kbuild test robotSigned-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 1 + net/ipv6/netfilter/Kconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index a355841..c187c60 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -60,6 +60,7 @@ config NFT_REJECT_IPV4 config NFT_DUP_IPV4 tristate "IPv4 nf_tables packet duplication support" + depends on !NF_CONNTRACK || NF_CONNTRACK select NF_DUP_IPV4 help This module enables IPv4 packet duplication support for nf_tables. diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index f6a024e..e10a04c 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -49,6 +49,7 @@ config NFT_REJECT_IPV6 config NFT_DUP_IPV6 tristate "IPv6 nf_tables packet duplication support" + depends on !NF_CONNTRACK || NF_CONNTRACK select NF_DUP_IPV6 help This module enables IPv6 packet duplication support for nf_tables. -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/6] netfilter: nf_tables: use reverse traversal commit_list in nf_tables_abort
From: Xin LongWhen we use 'nft -f' to submit rules, it will build multiple rules into one netlink skb to send to kernel, kernel will process them one by one. meanwhile, it add the trans into commit_list to record every commit. if one of them's return value is -EAGAIN, status |= NFNL_BATCH_REPLAY will be marked. after all the process is done. it will roll back all the commits. now kernel use list_add_tail to add trans to commit, and use list_for_each_entry_safe to roll back. which means the order of adding and rollback is the same. that will cause some cases cannot work well, even trigger call trace, like: 1. add a set into table foo [return -EAGAIN]: commit_list = 'add set trans' 2. del foo: commit_list = 'add set trans' -> 'del set trans' -> 'del tab trans' then nf_tables_abort will be called to roll back: firstly process 'add set trans': case NFT_MSG_NEWSET: trans->ctx.table->use--; list_del_rcu(_trans_set(trans)->list); it will del the set from the table foo, but it has removed when del table foo [step 2], then the kernel will panic. the right order of rollback should be: 'del tab trans' -> 'del set trans' -> 'add set trans'. which is opposite with commit_list order. so fix it by rolling back commits with reverse order in nf_tables_abort. Signed-off-by: Xin Long Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f1002dc..2cb429d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4024,7 +4024,8 @@ static int nf_tables_abort(struct sk_buff *skb) struct nft_trans *trans, *next; struct nft_trans_elem *te; - list_for_each_entry_safe(trans, next, >nft.commit_list, list) { + list_for_each_entry_safe_reverse(trans, next, >nft.commit_list, +list) { switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 0/4] stmmac: create of compatible mdio bus for stmacc driver
From: Phil ReidDate: Mon, 14 Dec 2015 11:31:58 +0800 > Provide ability to specify a fixed phy in the device tree and > retain the mdio bus if no phy is found. This is needed where > a dsa is connected via a fixed phy and uses the mdio bus for config. > Fixed ptp ref clock calculatins for the stmmac when ptp ref clock > is running at <= 50Mhz. Also add device tree setting to config > ptp clk source on socfpga platforms. Series applied to net-next, thanks. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next 7/9] e1000e: Do not read ICR in Other interrupt
From: Benjamin PoirierRemoves the ICR read in the other interrupt handler, uses EIAC to autoclear the Other bit from ICR and IMS. This allows us to avoid interference with Rx and Tx interrupts in the Other interrupt handler. The information read from ICR is not needed. IMS is configured such that the only interrupt cause that can trigger the Other interrupt is Link Status Change. Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 22 +++--- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 26cf183..56bc422 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1905,24 +1905,15 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) struct net_device *netdev = data; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = >hw; - u32 icr = er32(ICR); - if (icr & adapter->eiac_mask) - ew32(ICS, (icr & adapter->eiac_mask)); + hw->mac.get_link_status = true; - if (icr & E1000_ICR_OTHER) { - if (!(icr & E1000_ICR_LSC)) - goto no_link_interrupt; - hw->mac.get_link_status = true; - /* guard against interrupt when we're going down */ - if (!test_bit(__E1000_DOWN, >state)) - mod_timer(>watchdog_timer, jiffies + 1); + /* guard against interrupt when we're going down */ + if (!test_bit(__E1000_DOWN, >state)) { + mod_timer(>watchdog_timer, jiffies + 1); + ew32(IMS, E1000_IMS_OTHER); } -no_link_interrupt: - if (!test_bit(__E1000_DOWN, >state)) - ew32(IMS, E1000_IMS_LSC | E1000_IMS_OTHER); - return IRQ_HANDLED; } @@ -2021,6 +2012,7 @@ static void e1000_configure_msix(struct e1000_adapter *adapter) hw->hw_addr + E1000_EITR_82574(vector)); else writel(1, hw->hw_addr + E1000_EITR_82574(vector)); + adapter->eiac_mask |= E1000_IMS_OTHER; /* Cause Tx interrupts on every write back */ ivar |= (1 << 31); @@ -2249,7 +2241,7 @@ static void e1000_irq_enable(struct e1000_adapter *adapter) if (adapter->msix_entries) { ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574); - ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC); + ew32(IMS, adapter->eiac_mask | E1000_IMS_LSC); } else if ((hw->mac.type == e1000_pch_lpt) || (hw->mac.type == e1000_pch_spt)) { ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER); -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next 0/9][pull request] 1GbE Intel Wired LAN Driver Updates 2015-12-14
This series contains updates to e1000e and igb. Alex Duyck changes e1000_up() to void since it always returned 0, also by making it void, we can drop some code since we no longer have to worry about non-zero return values. Aaron Sierra removes GS40G specific defines and functions since the i210 internal PHY can be accessed with the access functions shared by 82580, i350 and i354 devices. Also removes the code to add the PHY address into the PCDL register address, since there is no real reason to do so. Joe updates the cable length function reports all four pairs true min, max and average cable length for i210. Also updated ethtool to use enum-based labels instead of hard coded values. Benjamin Poirier cleans up code that is never reachable since MSI-X interrupts are not shared in e1000e. Also removes the ICR read in the other interrupt handler, since the information is not needed and IMS is configured such that the only link status change can trigger the other interrupt handler. Fixed in MSI-X mode, there is no handler for the LSC interrupt so there is no point in writing that to ICS now that we always assume other interrupts are caused by LSC. The following are changes since commit cb4396edd84ed73081635fb933d19c1410fafaf4: drivers/net: fix eisa_driver probe section mismatch and are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue 1GbE Aaron Sierra (2): igb: Remove GS40G specific defines/functions igb: Don't add PHY address to PCDL address Alexander Duyck (1): e1000e: Switch e1000e_up to void, drop code checking for error result Benjamin Poirier (4): e1000e: Remove unreachable code e1000e: Do not read ICR in Other interrupt e1000e: Do not write lsc to ics in msi-x mode e1000e: Fix msi-x interrupt automask Joe Schultz (2): igb: Improve cable length function for I210, etc. igb: Explicitly label self-test result indices drivers/net/ethernet/intel/e1000e/defines.h| 3 +- drivers/net/ethernet/intel/e1000e/e1000.h | 2 +- drivers/net/ethernet/intel/e1000e/netdev.c | 80 ++- drivers/net/ethernet/intel/igb/e1000_82575.c | 13 +-- drivers/net/ethernet/intel/igb/e1000_defines.h | 5 +- drivers/net/ethernet/intel/igb/e1000_hw.h | 1 + drivers/net/ethernet/intel/igb/e1000_i210.c| 5 +- drivers/net/ethernet/intel/igb/e1000_i210.h| 2 +- drivers/net/ethernet/intel/igb/e1000_phy.c | 135 + drivers/net/ethernet/intel/igb/e1000_phy.h | 15 +-- drivers/net/ethernet/intel/igb/igb_ethtool.c | 38 --- 11 files changed, 120 insertions(+), 179 deletions(-) -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH iproute2 -next] bpf: minor fix in api and bpf_dump_error() usage
Fix a whitespace in bpf_dump_error() usage, and also a missing closing bracket in ntohl() macro for eBPF programs. Signed-off-by: Daniel Borkmann--- include/bpf_api.h | 2 +- tc/tc_bpf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/bpf_api.h b/include/bpf_api.h index 8503b9a..0666a31 100644 --- a/include/bpf_api.h +++ b/include/bpf_api.h @@ -53,7 +53,7 @@ #endif #ifndef ntohl -# define ntohl(X) __constant_ntohl((X) +# define ntohl(X) __constant_ntohl((X)) #endif /** Section helper macros. */ diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c index beb74be..f9b2b00 100644 --- a/tc/tc_bpf.c +++ b/tc/tc_bpf.c @@ -1042,7 +1042,7 @@ static int bpf_prog_attach(const char *section, "license:\'%s\') %s%s (%d)!\n\n", section, prog->type, prog->size / sizeof(struct bpf_insn), - prog->license, fd < 0 ? "rejected :" : + prog->license, fd < 0 ? "rejected: " : "loaded", fd < 0 ? strerror(errno) : "", fd < 0 ? errno : fd); } -- 1.9.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 1/4] geneve: Add geneve udp port offload for ethernet devices
On 12/11/2015 7:11 PM, Tom Herbert wrote: On Tue, Dec 8, 2015 at 10:12 AM, Anjali Singhai Jainwrote: Add ndo_ops to add/del UDP ports to a device that supports geneve offload. v3: Add some more comments about the use of the new ndo ops. Signed-off-by: Anjali Singhai Jain Signed-off-by: Kiran Patil --- drivers/net/geneve.c | 23 +++ include/linux/netdevice.h | 21 - 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index de5c30c..b43fd56 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -371,8 +371,11 @@ static struct socket *geneve_create_sock(struct net *net, bool ipv6, static void geneve_notify_add_rx_port(struct geneve_sock *gs) { + struct net_device *dev; struct sock *sk = gs->sock->sk; + struct net *net = sock_net(sk); sa_family_t sa_family = sk->sk_family; + __be16 port = inet_sk(sk)->inet_sport; int err; if (sa_family == AF_INET) { @@ -381,6 +384,14 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs) pr_warn("geneve: udp_add_offload failed with status %d\n", err); } + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + if (dev->netdev_ops->ndo_add_geneve_port) + dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, +port); + } + rcu_read_unlock(); What about IPv6 case? The driver still gets add port calls for IPv6 and can decide to offload L4 RX checksum if the HW is capable. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/6] netfilter fixes for net
From: Pablo Neira AyusoDate: Mon, 14 Dec 2015 12:25:40 +0100 > The following patchset contains Netfilter fixes for you net tree, > specifically for nf_tables and nfnetlink_queue, they are: Pulled, thanks a lot Pablo. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next] net, cgroup: cgroup_sk_updat_lock was missing initializer
bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") added global spinlock cgroup_sk_update_lock but erroneously skipped initializer leading to uninitialized spinlock warning. Fix it by using DEFINE_SPINLOCK(). Signed-off-by: Tejun HeoReported-by: Dexuan Cui Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") --- Hello, Dexuan. Oops, sorry about that. Somehow thought it was a different problem which is already fixed. This should do it. Thanks. kernel/cgroup.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4f8f792..4466273f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5790,7 +5790,7 @@ EXPORT_SYMBOL_GPL(cgroup_get_from_path); #if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) -spinlock_t cgroup_sk_update_lock; +DEFINE_SPINLOCK(cgroup_sk_update_lock); static bool cgroup_sk_alloc_disabled __read_mostly; void cgroup_sk_alloc_disable(void) -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] net: vxlan: enable local checksum offload on HW_CSUM devices
Signed-off-by: Edward Cree--- drivers/net/vxlan.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 6369a57..c1660d6 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1785,6 +1785,9 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *sk bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM); int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; u16 hdrlen = sizeof(struct vxlanhdr); + /* Is device able to do the inner checksum? */ + bool inner_csum = skb_dst(skb) && skb_dst(skb)->dev && + (skb_dst(skb)->dev->features & NETIF_F_HW_CSUM); if ((vxflags & VXLAN_F_REMCSUM_TX) && skb->ip_summed == CHECKSUM_PARTIAL) { @@ -1814,7 +1817,7 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *sk if (WARN_ON(!skb)) return -ENOMEM; - skb = iptunnel_handle_offloads(skb, udp_sum, type); + skb = iptunnel_handle_offloads(skb, udp_sum && !inner_csum, type); if (IS_ERR(skb)) return PTR_ERR(skb); -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] net: udp: local checksum offload for encapsulation
The arithmetic properties of the ones-complement checksum mean that a correctly checksummed inner packet, including its checksum, has a ones complement sum depending only on whatever value was used to initialise the checksum field before checksumming (in the case of TCP and UDP, this is the ones complement sum of the pseudo header, complemented). Consequently, if we are going to offload the inner checksum with CHECKSUM_PARTIAL, we can compute the outer checksum based only on the packed data not covered by the inner checksum, and the initial value of the inner checksum field. Signed-off-by: Edward Cree--- net/ipv4/udp.c | 31 +++ 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0c7b0e6..07d679e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -767,12 +767,35 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb, { struct udphdr *uh = udp_hdr(skb); - if (nocheck) + if (nocheck) { uh->check = 0; - else if (skb_is_gso(skb)) + } else if (skb_is_gso(skb)) { uh->check = ~udp_v4_check(len, saddr, daddr, 0); - else if (skb_dst(skb) && skb_dst(skb)->dev && -(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { + } else if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_dst(skb) && skb_dst(skb)->dev && + (skb_dst(skb)->dev->features & NETIF_F_HW_CSUM)) { + /* Everything from csum_start onwards will be +* checksummed and will thus have a sum of whatever +* we previously put in the checksum field (eg. sum +* of pseudo-header) +*/ + __wsum csum; + + /* Fill in our pseudo-header checksum */ + uh->check = ~udp_v4_check(len, saddr, daddr, 0); + /* Start with complement of inner pseudo-header checksum */ + csum = ~skb_checksum(skb, skb_checksum_start_offset(skb) + skb->csum_offset, +2, 0); + /* Add in checksum of our headers (incl. pseudo-header +* checksum filled in above) +*/ + csum = skb_checksum(skb, 0, skb_checksum_start_offset(skb), csum); + /* The result is the outer checksum */ + uh->check = csum_fold(csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } else if (skb_dst(skb) && skb_dst(skb)->dev && + (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH iproute2] ip, route: fix minor compile warning
On Mon, Dec 14, 2015 at 04:34:29PM +0100, Daniel Borkmann wrote: > Seems like gcc (4.8.3) doesn't catch this false positive, triggering > after 0f7543322c5f ("route: ignore RTAX_HOPLIMIT of value -1"): > > iproute.c: In function 'print_route': > iproute.c:301:12: warning: 'val' may be used uninitialized in this function > [-Wmaybe-uninitialized] > features &= ~RTAX_FEATURE_ECN; > ^ > iproute.c:575:10: note: 'val' was declared here > __u32 val; > ^ > So just shut it up by initializing to 0. Hmm. Interestingly, my patch shouldn't have changed anything relevant for gcc's decision. OTOH, I don't see a warning using gcc-4.9.3. Cheers, Phil -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH iproute2] ip, route: fix minor compile warning
On 12/14/2015 04:51 PM, Phil Sutter wrote: On Mon, Dec 14, 2015 at 04:34:29PM +0100, Daniel Borkmann wrote: Seems like gcc (4.8.3) doesn't catch this false positive, triggering after 0f7543322c5f ("route: ignore RTAX_HOPLIMIT of value -1"): iproute.c: In function 'print_route': iproute.c:301:12: warning: 'val' may be used uninitialized in this function [-Wmaybe-uninitialized] features &= ~RTAX_FEATURE_ECN; ^ iproute.c:575:10: note: 'val' was declared here __u32 val; ^ So just shut it up by initializing to 0. Hmm. Interestingly, my patch shouldn't have changed anything relevant for gcc's decision. OTOH, I don't see a warning using gcc-4.9.3. If I revert it, the warning is gone for me ;) perhaps some heuristic issue with that gcc version. Cheers, Daniel -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Another pppoe-related crash
On Fri, Dec 11, 2015 at 05:07:54PM +0200, Andrew wrote: > I've got another pppoe-related crash on one PPPoE BRAS. > > Kernel is 4.1.13 with patch "pppoe: fix memory corruption in padt work > structure" > Commit 1acea4f6ce1b ("ppp: fix pppoe_dev deletion condition in pppoe_release()") is missing from 4.1.13. Can you try with 4.1.14 (or at least manually apply this patch)? -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH net-next 0/2] Local checksum offload for VXLAN
When the inner packet checksum is offloaded, the outer UDP checksum is easy to calculate as it doesn't depend on the payload (because the inner checksum cancels out everything from the inner packet except the pseudo header). Thus, transmit checksums for VXLAN (and in principle other encapsulations, but I haven't enabled it for / tested with those) can be offloaded on any device supporting NETIF_F_HW_CSUM. Only the innermost checksum has to be offloaded, the rest are filled in by the stack. Tested by hacking a driver to report NETIF_F_HW_CSUM, call skb_checksum_help before transmitting a packet, and not actually offload anything to the hw. I did it that way because I don't have any hw that can actually offload the inner checksum; but I should be able to get hold of some soon. Edward Cree (2): net: udp: local checksum offload for encapsulation net: vxlan: enable local checksum offload on HW_CSUM devices drivers/net/vxlan.c | 5 - net/ipv4/udp.c | 31 +++ 2 files changed, 31 insertions(+), 5 deletions(-) -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH iproute2] ip, route: fix minor compile warning
Seems like gcc (4.8.3) doesn't catch this false positive, triggering after 0f7543322c5f ("route: ignore RTAX_HOPLIMIT of value -1"): iproute.c: In function 'print_route': iproute.c:301:12: warning: 'val' may be used uninitialized in this function [-Wmaybe-uninitialized] features &= ~RTAX_FEATURE_ECN; ^ iproute.c:575:10: note: 'val' was declared here __u32 val; ^ So just shut it up by initializing to 0. Signed-off-by: Daniel Borkmann--- ip/iproute.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ip/iproute.c b/ip/iproute.c index c42ea0b..867c8da 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -572,7 +572,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) mxlock = *(unsigned*)RTA_DATA(mxrta[RTAX_LOCK]); for (i=2; i<= RTAX_MAX; i++) { - __u32 val; + __u32 val = 0; if (mxrta[i] == NULL) continue; -- 1.9.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [V2 PATCH 1/1] net: sctp: dynamically enable or disable pf state
On Mon, Dec 14, 2015 at 02:22:19PM +0800, zyjzyj2...@gmail.com wrote: > From: Zhu Yanjun> > As we all know, the value of pf_retrans >= max_retrans_path can > disable pf state. The variables of pf_retrans and max_retrans_path > can be changed by the user space application. > > Sometimes the user expects to disable pf state while the 2 > variables are changed to enable pf state. So it is necessary to > introduce a new variable to disable pf state. > > According to the suggestions from Vlad Yasevich, extra1 and extra2 > are removed. The initialization of pf_enable is added. > > Signed-off-by: Zhu Yanjun > --- > include/net/netns/sctp.h |7 +++ > net/sctp/protocol.c |3 +++ > net/sctp/sm_sideeffect.c |5 - > net/sctp/sysctl.c|7 +++ > 4 files changed, 21 insertions(+), 1 deletion(-) > > diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h > index 8ba379f..c501d67 100644 > --- a/include/net/netns/sctp.h > +++ b/include/net/netns/sctp.h > @@ -89,6 +89,13 @@ struct netns_sctp { > int pf_retrans; > > /* > + * Disable Potentially-Failed feature, the feature is enabled by default > + * pf_enable- 0 : disable pf > + * - >0 : enable pf > + */ > + int pf_enable; > + > + /* >* Policy for preforming sctp/socket accounting >* 0 - do socket level accounting, all assocs share sk_sndbuf >* 1 - do sctp accounting, each asoc may use sk_sndbuf bytes Please add this documentation to Documentation/networking/ip-sysctl.txt too, mentioning the RFC/draft it's about. https://datatracker.ietf.org/doc/draft-ietf-tsvwg-sctp-failover/ And update the text on pf_retrans mentioning this new variable as well. Marcelo -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net] net: add validation for the socket syscall protocol argument
郭永刚 reported that one could simply crash the kernel as root by using a simple program: int socket_fd; struct sockaddr_in addr; addr.sin_port = 0; addr.sin_addr.s_addr = INADDR_ANY; addr.sin_family = 10; socket_fd = socket(10,3,0x4000); connect(socket_fd , ,16); AF_INET, AF_INET6 sockets actually only support 8-bit protocol identifiers. inet_sock's skc_protocol field thus is sized accordingly, thus larger protocol identifiers simply cut off the higher bits and store a zero in the protocol fields. This could lead to e.g. NULL function pointer because as a result of the cut off inet_num is zero and we call down to inet_autobind, which is NULL for raw sockets. kernel: Call Trace: kernel: [] ? inet_autobind+0x2e/0x70 kernel: [] inet_dgram_connect+0x54/0x80 kernel: [] SYSC_connect+0xd9/0x110 kernel: [] ? ptrace_notify+0x5b/0x80 kernel: [] ? syscall_trace_enter_phase2+0x108/0x200 kernel: [] SyS_connect+0xe/0x10 kernel: [] tracesys_phase2+0x84/0x89 I found no particular commit which introduced this problem. CVE: CVE-2015-8543 Reported-by: 郭永刚Signed-off-by: Hannes Frederic Sowa --- net/ipv4/af_inet.c | 3 +++ net/ipv6/af_inet6.c | 3 +++ net/socket.c| 3 +++ 3 files changed, 9 insertions(+) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 11c4ca1..cfb4496 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -257,6 +257,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, int try_loading_module = 0; int err; + if (protocol >= IPPROTO_MAX) + return -EINVAL; + sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8ec0df7..9fb093c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -109,6 +109,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, int try_loading_module = 0; int err; + if (protocol >= IPPROTO_MAX) + return -EINVAL; + /* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; diff --git a/net/socket.c b/net/socket.c index 456fadb..d2f3d49 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1098,6 +1098,9 @@ int __sock_create(struct net *net, int family, int type, int protocol, return -EAFNOSUPPORT; if (type < 0 || type >= SOCK_MAX) return -EINVAL; + /* upper bound should be tested by per-protocol .create callbacks */ + if (protocol < 0) + return -EINVAL; /* Compatibility. -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 net-next 4/4] ila: Add generic ILA translation facility
From: Tom HerbertDate: Mon, 14 Dec 2015 15:56:48 -0800 > +static int alloc_ila_locks(struct ila_net *ilan, gfp_t gfp) gfp is always GFP_KERNEL, thus this flexibility is never necessary. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] net/macb: add support for resetting PHY using GPIO
On December 14, 2015 2:56:34 PM PST, Rob Herringwrote: >On Fri, Dec 11, 2015 at 11:34:53AM +0100, Gregory CLEMENT wrote: >> With device tree it is no more possible to reset the PHY at board >> level. Furthermore, doing in the driver allow to power down the PHY >when >> the network interface is no more used. >> >> This reset can't be done at the PHY driver level. The PHY must be >able to >> answer the to the mii bus scan to let the kernel creating a PHY >device. >> >> The patch introduces a new optional property "phy-reset-gpios" >inspired >> from the one use for the FEC. >> >> Signed-off-by: Gregory CLEMENT >> --- >> >> Since the v1, I used the gpiod functions. It allows to simplify the >> code and to not introduce any #ifdef. >> >> I also rename the property in phy-reset-gpios, even if actually the >> gpiod will match both phy-reset-gpios and phy-reset-gpio. >> >> >> Documentation/devicetree/bindings/net/macb.txt | 3 +++ >> drivers/net/ethernet/cadence/macb.c| 8 >> drivers/net/ethernet/cadence/macb.h| 1 + >> 3 files changed, 12 insertions(+) >> >> diff --git a/Documentation/devicetree/bindings/net/macb.txt >b/Documentation/devicetree/bindings/net/macb.txt >> index b5d7976..4a7fb6c 100644 >> --- a/Documentation/devicetree/bindings/net/macb.txt >> +++ b/Documentation/devicetree/bindings/net/macb.txt >> @@ -19,6 +19,9 @@ Required properties: >> Optional elements: 'tx_clk' >> - clocks: Phandles to input clocks. >> >> +Optional properties: >> +- phy-reset-gpios : Should specify the gpio for phy reset >> + > >This alone is simple enough, but I worry that this doesn't really >scale. >What if you need to enable clocks or regulators for the same reason? >The >mmc folks did a pwrseq binding for similar reasons. I don't think I'd >recommend that here as I think it is kind of ugly. We really need a >pre-probe/scan hook for drivers. This is also needed for USB devices >mounted on boards. In this particular case, the way Ethernet MAC drivers register their MDIO buses and therefore PHYs, there is always a good way to deassert the PHY GPIO line without requiring major core device driver changes. Worst case, there is the MDIO bus reset callback which could used for that matter. In the case of PCI, USB etc. I do agree having a way to twiddle things before scanning/probing would be awesome. I have some boards here which have GPIO controlled regulator and hacking the RC driver to deal with that is suboptimal... > >But I'm not going to hold up something simple to do all that, so: > >Acked-by: Rob Herring > >___ >linux-arm-kernel mailing list >linux-arm-ker...@lists.infradead.org >http://lists.infradead.org/mailman/listinfo/linux-arm-kernel -- Sent from my Android device with K-9 Mail. Please excuse my brevity. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Patch net] pptp: verify sockaddr_len in pptp_bind() and pptp_connect()
From: Cong WangDate: Mon, 14 Dec 2015 13:48:36 -0800 > Reported-by: Dmitry Vyukov > Signed-off-by: Cong Wang Applied and queued up for -stable, thanks. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net] skbuff: Fix offset error in skb_reorder_vlan_header
From: Vladislav YasevichDate: Mon, 14 Dec 2015 17:44:10 -0500 > skb_reorder_vlan_header is called after the vlan header has > been pulled. As a result the offset of the begining of > the mac header has been incrased by 4 bytes (VLAN_HLEN). > When moving the mac addresses, include this incrase in > the offset calcualation so that the mac addresses are > copied correctly. > > Fixes: a6e18ff1117 (vlan: Fix untag operations of stacked vlans with > REORDER_HEADER off) > CC: Nicolas Dichtel > CC: Patrick McHardy > Signed-off-by: Vladislav Yasevich Applied and queued up for -stable, thanks Vlad. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] sh_eth: fix descriptor access endianness
From: Sergei ShtylyovDate: Sun, 13 Dec 2015 23:05:07 +0300 > The driver never calls cpu_to_edmac() when writing the descriptor address > and edmac_to_cpu() when reading it, although it should -- fix this. > > Note that the frame/buffer length descriptor field accesses also need fixing > but since they are both 16-bit we can't use {cpu|edmac}_to_{edmac|cpu}()... > > Signed-off-by: Sergei Shtylyov Applied. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] sh_eth: fix TX buffer byte-swapping
From: Sergei ShtylyovDate: Sun, 13 Dec 2015 21:27:04 +0300 > For the little-endian SH771x kernels the driver has to byte-swap the RX/TX > buffers, however yet unset physcial address from the TX descriptor is used > to call sh_eth_soft_swap(). Use 'skb->data' instead... > > Fixes: 31fcb99d9958 ("net: sh_eth: remove __flush_purge_region") > Signed-off-by: Sergei Shtylyov Applied. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net] ravb: Add disable 10base
From: Yoshihiro KanekoDate: Mon, 14 Dec 2015 00:15:58 +0900 > From: Kazuya Mizuguchi > > Ethernet AVB does not support 10 Mbps transfer speed. > > Signed-off-by: Kazuya Mizuguchi > Signed-off-by: Yoshihiro Kaneko Applied. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC 00/26] Phylink & SFP support
On Mon Dec 07 17:35, Russell King - ARM Linux wrote: > Hi, Hello. > SFP modules are hot-pluggable ethernet transceivers; they can be > detected at runtime and accordingly configured. There are a range of > modules offering many different features. > > Some SFP modules have PHYs conventional integrated into them, others > drive a laser diode from the Serdes bus. Some have monitoring, others > do not. > > Some SFP modules want to use SGMII over the Serdes link, others want > to use 1000base-X over the Serdes link. > > This makes it non-trivial to support with the existing code structure. > Not wanting to write something specific to the mvneta driver, I decided > to have a go at coming up with something more generic. > > My initial attempts were to provide a PHY driver, but I found that > phylib's state machine got in the way, and it was hard to support two > chained PHYs. Conversely, having a fixed DT specified setup (via > the fixed phy infrastructure) would allow some SFP modules to work, but > not others. The same is true of the "managed" in-band status (which > is SGMII.) > > The result is that I came up with phylink - an infrastructure layer > which sits between the network driver and any attached PHY, and a > SFP module layer detects the SFP module, and configures phylink > accordingly. > > Overall, this supports: > > * switching the serdes mode at the NIC driver > * controlling autonegotiation and autoneg results > * allowing PHYs to be hotplugged > * allowing SFP modules to be hotplugged with proper link indication > * fixed-mode links without involving phylib > * flow control > * EEE support > * reading SFP module EEPROMs > > Overall, phylink supports several link modes, with dynamic switching > possible between these: > * A true fixed link mode, where the parameters are set by DT. > * PHY mode, where we read the negotiation results from the PHY registers > and pass them to the NIC driver. > * SGMII mode, where the in-band status indicates the speed, duplex and > flow control settings of the link partner. > * 1000base-X mode, where the in-band status indicates only duplex and > flow control settings (different, incompatible bit layout from SGMII.) I've been working on some similar code to handle interactions with a wide range of SFF modules, 1G to 100G, on Linux network switches for some time. For practical reasons a lot of that was in userspace but I've been planning and recently working on an SFF kernel driver that does some of what's done in this series. I think the model you're proposing is right on, and since you're further along in implementation I'd like to help round out support for the other SFF modules if I can. Then make this work on the network ASICs I have access to. Any concrete plans for QSFP or the new 25G modules? > Ethtool support is included, as well as emulation of the MII registers > for situations where a PHY is not attached, giving compatible emulation > of existing user interfaces where required. > > The patches here include modification of mvneta (against 4.4-rc1, so > probably won't apply to current development tips.) It basically > hooks into the places where the phylib would hook into. > > DT wise, the changes needed to support SFP look like this (example > taken from Clearfog): > > ethernet@34000 { > + managed = "in-band-status"; > phy-mode = "sgmii"; > status = "okay"; > - > - fixed-link { > - speed = <1000>; > - full-duplex; > - }; > }; > ... > + sfp: sfp { > + compatible = "sff,sfp"; > + i2c-bus = <>; > + los-gpio = < 12 GPIO_ACTIVE_HIGH>; > + moddef0-gpio = < 15 GPIO_ACTIVE_LOW>; > + sfp,ethernet = <>; Using is unambiguous in the this case because there's only one serdes and one mac involved. To specify the mac/serdes/cage associations at the same level of detail as the gpios it might be nice (at least for some devices) to point to a serdes node (or 4 in the case of QSFP) instead of Any thoughts on that? Switch ASICs, and I imagine at least some NICs, are really flexible in terms of how serdes are wired to a cage. Both in the sense that the board designer gets to pick which wires route to the cage based on physical constraints and the user gets to pick which serdes or group of serdes compose the ethernet device. For example, using a breakout cable to get 4xSFP out of a QSFP or the other way around. Perhaps the simple case (sfp,ethernet -> ) can remain simple, but I'd be interested in any thoughts you have on introducing a serdes layer here. I think adding such a layer would make it easier to 1) make serdes to cage mappings part of the platform description (DT or ACPI) and 2) allow automatic reconfiguration of
[PATCH] 82xx: FCC: Fixing a bug causing to FCC port lock-up
The patch fixes FCC port lock-up, which occurs as a result of a bug during underrun/collision handling. Within the tx_startup() function in mac-fcc.c, the address of last BD is not calculated correctly. As a result of wrong calculation of the last BD address, the next transmitted BD may be set to an area out of the transmit BD ring. This actually causes to port lock-up and it is not recoverable. Signed-off-by: Martin Roth--- drivers/net/ethernet/freescale/fs_enet/mac-fcc.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c index 08f5b91..52e0091 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c @@ -552,7 +552,7 @@ static void tx_restart(struct net_device *dev) cbd_t __iomem *prev_bd; cbd_t __iomem *last_tx_bd; - last_tx_bd = fep->tx_bd_base + (fpi->tx_ring * sizeof(cbd_t)); + last_tx_bd = fep->tx_bd_base + ((fpi->tx_ring - 1) * sizeof(cbd_t)); /* get the current bd held in TBPTR and scan back from this point */ recheck_bd = curr_tbptr = (cbd_t __iomem *) -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 net-next 4/4] ila: Add generic ILA translation facility
Tom Herbertwrote: > +static int ila_add_mapping(struct net *net, struct ila_xlat_params *p) > +{ > + struct ila_net *ilan = net_generic(net, ila_net_id); > + struct ila_map *ila, *head; > + spinlock_t *lock = ila_get_lock(ilan, p->identifier); > + int err = 0, order; > + > + if (!ilan->hooks_registered) { > + /* We defer registering net hooks in the namespace until the > + * first mapping is added. > + */ > + err = nf_register_net_hooks(net, ila_nf_hook_ops, > + ARRAY_SIZE(ila_nf_hook_ops)); > + if (err) > + return err; Looks as if this misses a ilan->hooks_registered = true > + } > + > + ila = kzalloc(sizeof(*ila), GFP_KERNEL); -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net] net: fix IP early demux races
From: Eric DumazetDate: Mon, 14 Dec 2015 14:08:53 -0800 > From: Eric Dumazet > > David Wilder reported crashes caused by dst reuse. > > > I am seeing a crash on a distro V4.2.3 kernel caused by a double > release of a dst_entry. In ipv4_dst_destroy() the call to > list_empty() finds a poisoned next pointer, indicating the dst_entry > has already been removed from the list and freed. The crash occurs > 18 to 24 hours into a run of a network stress exerciser. > > > Thanks to his detailed report and analysis, we were able to understand > the core issue. > > IP early demux can associate a dst to skb, after a lookup in TCP/UDP > sockets. > > When socket cache is not properly set, we want to store into > sk->sk_dst_cache the dst for future IP early demux lookups, > by acquiring a stable refcount on the dst. > > Problem is this acquisition is simply using an atomic_inc(), > which works well, unless the dst was queued for destruction from > dst_release() noticing dst refcount went to zero, if DST_NOCACHE > was set on dst. > > We need to make sure current refcount is not zero before incrementing > it, or risk double free as David reported. > > This patch, being a stable candidate, adds two new helpers, and use > them only from IP early demux problematic paths. > > It might be possible to merge in net-next skb_dst_force() and > skb_dst_force_safe(), but I prefer having the smallest patch for stable > kernels : Maybe some skb_dst_force() callers do not expect skb->dst > can suddenly be cleared. > > Can probably be backported back to linux-3.6 kernels > > Reported-by: David J. Wilder > Tested-by: David J. Wilder > Signed-off-by: Eric Dumazet Applied and queued up for -stable, thanks Eric. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Fwd: Query regarding Coverity tool
Hi, May I know if the community uses the Coverity tool and, if yes where can I find a repo of Coverity scans of kernels and IGNORE LIST; cause there obviously be false positives. Cheers, Pavi -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next 6/9] e1000e: Remove unreachable code
From: Benjamin Poiriermsi-x interrupts are not shared so there's no need to check if the interrupt was really from this adapter. Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 6 -- 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 955c8c7..26cf183 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1907,12 +1907,6 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) struct e1000_hw *hw = >hw; u32 icr = er32(ICR); - if (!(icr & E1000_ICR_INT_ASSERTED)) { - if (!test_bit(__E1000_DOWN, >state)) - ew32(IMS, E1000_IMS_OTHER); - return IRQ_NONE; - } - if (icr & adapter->eiac_mask) ew32(ICS, (icr & adapter->eiac_mask)); -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next 9/9] e1000e: Fix msi-x interrupt automask
From: Benjamin PoirierSince the introduction of 82574 support in e1000e, the driver has worked on the assumption that msi-x interrupt generation is automatically disabled after each irq. As it turns out, this is not the case. Currently, rx interrupts can fire multiple times before and during napi processing. This can be a problem for users because frames that arrive in a certain window (after adapter->clean_rx() but before napi_complete_done() has cleared NAPI_STATE_SCHED) generate an interrupt which does not lead to napi_schedule(). These frames sit in the rx queue until another frame arrives (a tcp retransmit for example). While the EIAC and CTRL_EXT registers are properly configured for irq automask, the modification of IAM in e1000_configure_msix() is what prevents automask from working as intended. This patch removes that erroneous write and fixes interrupt rearming for tx interrupts. It also clears IAME from CTRL_EXT. This is not strictly necessary for operation of the driver but it is to avoid disruption from potential programs that access the registers directly, like `ethregs -c`. Reported-by: Frank Steiner Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 11 +-- 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index d59c0bc..c71ba1b 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1931,6 +1931,9 @@ static irqreturn_t e1000_intr_msix_tx(int __always_unused irq, void *data) /* Ring was not completely cleaned, so fire another interrupt */ ew32(ICS, tx_ring->ims_val); + if (!test_bit(__E1000_DOWN, >state)) + ew32(IMS, adapter->tx_ring->ims_val); + return IRQ_HANDLED; } @@ -2020,12 +2023,8 @@ static void e1000_configure_msix(struct e1000_adapter *adapter) ew32(IVAR, ivar); /* enable MSI-X PBA support */ - ctrl_ext = er32(CTRL_EXT); - ctrl_ext |= E1000_CTRL_EXT_PBA_CLR; - - /* Auto-Mask Other interrupts upon ICR read */ - ew32(IAM, ~E1000_EIAC_MASK_82574 | E1000_IMS_OTHER); - ctrl_ext |= E1000_CTRL_EXT_EIAME; + ctrl_ext = er32(CTRL_EXT) & ~E1000_CTRL_EXT_IAME; + ctrl_ext |= E1000_CTRL_EXT_PBA_CLR | E1000_CTRL_EXT_EIAME; ew32(CTRL_EXT, ctrl_ext); e1e_flush(); } -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next 4/9] igb: Improve cable length function for I210, etc.
From: Joe SchultzPreviously, the PHY-specific code to get the cable length for the I210 internal and related PHYs was reporting the cable length of a single pair and reporting it as the min, max, and total cable length. Update it so that all four pairs are checked so the true min, max, and average cable lengths are reported. Signed-off-by: Joe Schultz Signed-off-by: Aaron Sierra Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/e1000_defines.h | 5 ++- drivers/net/ethernet/intel/igb/e1000_hw.h | 1 + drivers/net/ethernet/intel/igb/e1000_phy.c | 54 ++ 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index a61ee94..c3c598c 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -927,7 +927,10 @@ /* Intel i347-AT4 Registers */ -#define I347AT4_PCDL 0x10 /* PHY Cable Diagnostics Length */ +#define I347AT4_PCDL0 0x10 /* Pair 0 PHY Cable Diagnostics Length */ +#define I347AT4_PCDL1 0x11 /* Pair 1 PHY Cable Diagnostics Length */ +#define I347AT4_PCDL2 0x12 /* Pair 2 PHY Cable Diagnostics Length */ +#define I347AT4_PCDL3 0x13 /* Pair 3 PHY Cable Diagnostics Length */ #define I347AT4_PCDC 0x15 /* PHY Cable Diagnostics Control */ #define I347AT4_PAGE_SELECT0x16 diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h index 2003b37..4034207 100644 --- a/drivers/net/ethernet/intel/igb/e1000_hw.h +++ b/drivers/net/ethernet/intel/igb/e1000_hw.h @@ -441,6 +441,7 @@ struct e1000_phy_info { u16 cable_length; u16 max_cable_length; u16 min_cable_length; + u16 pair_length[4]; u8 mdix; diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 8015f3b..5b54254 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -1717,6 +1717,9 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw) struct e1000_phy_info *phy = >phy; s32 ret_val; u16 phy_data, phy_data2, index, default_page, is_cm; + int len_tot = 0; + u16 len_min; + u16 len_max; switch (hw->phy.id) { case M88E1543_E_PHY_ID: @@ -1733,11 +1736,6 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw) if (ret_val) goto out; - /* Get cable length from PHY Cable Diagnostics Control Reg */ - ret_val = phy->ops.read_reg(hw, I347AT4_PCDL, _data); - if (ret_val) - goto out; - /* Check if the unit of cable length is meters or cm */ ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, _data2); if (ret_val) @@ -1745,10 +1743,50 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw) is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT); + /* Get cable length from Pair 0 length Regs */ + ret_val = phy->ops.read_reg(hw, I347AT4_PCDL0, _data); + if (ret_val) + goto out; + + phy->pair_length[0] = phy_data / (is_cm ? 100 : 1); + len_tot = phy->pair_length[0]; + len_min = phy->pair_length[0]; + len_max = phy->pair_length[0]; + + /* Get cable length from Pair 1 length Regs */ + ret_val = phy->ops.read_reg(hw, I347AT4_PCDL1, _data); + if (ret_val) + goto out; + + phy->pair_length[1] = phy_data / (is_cm ? 100 : 1); + len_tot += phy->pair_length[1]; + len_min = min(len_min, phy->pair_length[1]); + len_max = max(len_max, phy->pair_length[1]); + + /* Get cable length from Pair 2 length Regs */ + ret_val = phy->ops.read_reg(hw, I347AT4_PCDL2, _data); + if (ret_val) + goto out; + + phy->pair_length[2] = phy_data / (is_cm ? 100 : 1); + len_tot += phy->pair_length[2]; + len_min = min(len_min, phy->pair_length[2]); + len_max = max(len_max, phy->pair_length[2]); + + /* Get cable length from Pair 3 length Regs */ + ret_val = phy->ops.read_reg(hw, I347AT4_PCDL3, _data); + if (ret_val) + goto out; + + phy->pair_length[3] = phy_data / (is_cm ? 100 : 1); + len_tot += phy->pair_length[3]; + len_min = min(len_min, phy->pair_length[3]); +
[net-next 8/9] e1000e: Do not write lsc to ics in msi-x mode
From: Benjamin PoirierIn msi-x mode, there is no handler for the lsc interrupt so there is no point in writing that to ics now that we always assume Other interrupts are caused by lsc. Reviewed-by: Jasna Hodzic Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/defines.h | 3 ++- drivers/net/ethernet/intel/e1000e/netdev.c | 28 +--- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index 133d407..f7c7804 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -441,12 +441,13 @@ #define E1000_IMS_RXQ1 E1000_ICR_RXQ1 /* Rx Queue 1 Interrupt */ #define E1000_IMS_TXQ0 E1000_ICR_TXQ0 /* Tx Queue 0 Interrupt */ #define E1000_IMS_TXQ1 E1000_ICR_TXQ1 /* Tx Queue 1 Interrupt */ -#define E1000_IMS_OTHER E1000_ICR_OTHER /* Other Interrupts */ +#define E1000_IMS_OTHER E1000_ICR_OTHER /* Other Interrupt */ /* Interrupt Cause Set */ #define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */ #define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ #define E1000_ICS_RXDMT0E1000_ICR_RXDMT0/* Rx desc min. threshold */ +#define E1000_ICS_OTHER E1000_ICR_OTHER /* Other Interrupt */ /* Transmit Descriptor Control */ #define E1000_TXDCTL_PTHRESH 0x003F /* TXDCTL Prefetch Threshold */ diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 56bc422..d59c0bc 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -4132,10 +4132,24 @@ void e1000e_reset(struct e1000_adapter *adapter) } -void e1000e_up(struct e1000_adapter *adapter) +/** + * e1000e_trigger_lsc - trigger an LSC interrupt + * @adapter: + * + * Fire a link status change interrupt to start the watchdog. + **/ +static void e1000e_trigger_lsc(struct e1000_adapter *adapter) { struct e1000_hw *hw = >hw; + if (adapter->msix_entries) + ew32(ICS, E1000_ICS_OTHER); + else + ew32(ICS, E1000_ICS_LSC); +} + +void e1000e_up(struct e1000_adapter *adapter) +{ /* hardware has been reset, we need to reload some things */ e1000_configure(adapter); @@ -4147,11 +4161,7 @@ void e1000e_up(struct e1000_adapter *adapter) netif_start_queue(adapter->netdev); - /* fire a link change interrupt to start the watchdog */ - if (adapter->msix_entries) - ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER); - else - ew32(ICS, E1000_ICS_LSC); + e1000e_trigger_lsc(adapter); } static void e1000e_flush_descriptors(struct e1000_adapter *adapter) @@ -4576,11 +4586,7 @@ static int e1000_open(struct net_device *netdev) hw->mac.get_link_status = true; pm_runtime_put(>dev); - /* fire a link status change interrupt to start the watchdog */ - if (adapter->msix_entries) - ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER); - else - ew32(ICS, E1000_ICS_LSC); + e1000e_trigger_lsc(adapter); return 0; -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next 3/9] igb: Don't add PHY address to PCDL address
From: Aaron SierraThere is no reason to add the PHY address into the PCDL register address. Signed-off-by: Aaron Sierra Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/e1000_phy.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index c906826..8015f3b 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -1734,8 +1734,7 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw) goto out; /* Get cable length from PHY Cable Diagnostics Control Reg */ - ret_val = phy->ops.read_reg(hw, (I347AT4_PCDL + phy->addr), - _data); + ret_val = phy->ops.read_reg(hw, I347AT4_PCDL, _data); if (ret_val) goto out; -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next 2/9] igb: Remove GS40G specific defines/functions
From: Aaron SierraThe I210 internal PHY can be accessed just as well with the access functions shared by 82580, I350, and I354 devices. A side effect of relying on the common functions, is that I210 cable length support is folded back into the common case which effectively reverts the following commit: commit 59f301046b276f87483b3afa3201a4273def06a9 Author: Carolyn Wyborny Date: Wed Oct 10 04:42:59 2012 + igb: Update get cable length function for i210/i211 Cc: Carolyn Wyborny Signed-off-by: Aaron Sierra Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/e1000_82575.c | 13 ++--- drivers/net/ethernet/intel/igb/e1000_i210.c | 5 +- drivers/net/ethernet/intel/igb/e1000_i210.h | 2 +- drivers/net/ethernet/intel/igb/e1000_phy.c | 82 +--- drivers/net/ethernet/intel/igb/e1000_phy.h | 15 + 5 files changed, 11 insertions(+), 106 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index 362911d..adb33e2 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -45,8 +45,6 @@ static s32 igb_get_cfg_done_82575(struct e1000_hw *); static s32 igb_init_hw_82575(struct e1000_hw *); static s32 igb_phy_hw_reset_sgmii_82575(struct e1000_hw *); static s32 igb_read_phy_reg_sgmii_82575(struct e1000_hw *, u32, u16 *); -static s32 igb_read_phy_reg_82580(struct e1000_hw *, u32, u16 *); -static s32 igb_write_phy_reg_82580(struct e1000_hw *, u32, u16); static s32 igb_reset_hw_82575(struct e1000_hw *); static s32 igb_reset_hw_82580(struct e1000_hw *); static s32 igb_set_d0_lplu_state_82575(struct e1000_hw *, bool); @@ -205,13 +203,10 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw) case e1000_82580: case e1000_i350: case e1000_i354: - phy->ops.read_reg = igb_read_phy_reg_82580; - phy->ops.write_reg = igb_write_phy_reg_82580; - break; case e1000_i210: case e1000_i211: - phy->ops.read_reg = igb_read_phy_reg_gs40g; - phy->ops.write_reg = igb_write_phy_reg_gs40g; + phy->ops.read_reg = igb_read_phy_reg_82580; + phy->ops.write_reg = igb_write_phy_reg_82580; break; default: phy->ops.read_reg = igb_read_phy_reg_igp; @@ -2153,7 +2148,7 @@ void igb_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable) * Reads the MDI control register in the PHY at offset and stores the * information read to data. **/ -static s32 igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data) +s32 igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data) { s32 ret_val; @@ -2177,7 +2172,7 @@ out: * * Writes data to MDI control register in the PHY at offset. **/ -static s32 igb_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data) +s32 igb_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data) { s32 ret_val; diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c index 29f59c7..8aa7987 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.c +++ b/drivers/net/ethernet/intel/igb/e1000_i210.c @@ -861,10 +861,10 @@ s32 igb_pll_workaround_i210(struct e1000_hw *hw) if (ret_val) nvm_word = E1000_INVM_DEFAULT_AL; tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL; + igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, E1000_PHY_PLL_FREQ_PAGE); for (i = 0; i < E1000_MAX_PLL_TRIES; i++) { /* check current state directly from internal PHY */ - igb_read_phy_reg_gs40g(hw, (E1000_PHY_PLL_FREQ_PAGE | -E1000_PHY_PLL_FREQ_REG), _word); + igb_read_phy_reg_82580(hw, E1000_PHY_PLL_FREQ_REG, _word); if ((phy_word & E1000_PHY_PLL_UNCONF) != E1000_PHY_PLL_UNCONF) { ret_val = 0; @@ -896,6 +896,7 @@ s32 igb_pll_workaround_i210(struct e1000_hw *hw) /* restore WUC register */ wr32(E1000_WUC, wuc); } + igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, 0); /* restore MDICNFG setting */ wr32(E1000_MDICNFG, mdicnfg); return ret_val; diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h index eaa68a5..b2964a2 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.h +++ b/drivers/net/ethernet/intel/igb/e1000_i210.h @@ -85,7 +85,7 @@ enum E1000_INVM_STRUCTURE_TYPE { #define E1000_PCI_PMCSR_D3
[net-next 5/9] igb: Explicitly label self-test result indices
From: Joe SchultzPreviously, the ethtool self-test gstrings/data arrays were accessed via hardcoded indices, which made the code difficult to follow. This patch replaces the hardcoded values with enum-based labels. Signed-off-by: Joe Schultz Signed-off-by: Aaron Sierra Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 38 ++-- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 2529bc6..1d329f1 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -127,10 +127,20 @@ static const struct igb_stats igb_gstrings_net_stats[] = { #define IGB_STATS_LEN \ (IGB_GLOBAL_STATS_LEN + IGB_NETDEV_STATS_LEN + IGB_QUEUE_STATS_LEN) +enum igb_diagnostics_results { + TEST_REG = 0, + TEST_EEP, + TEST_IRQ, + TEST_LOOP, + TEST_LINK +}; + static const char igb_gstrings_test[][ETH_GSTRING_LEN] = { - "Register test (offline)", "Eeprom test(offline)", - "Interrupt test (offline)", "Loopback test (offline)", - "Link test (on/offline)" + [TEST_REG] = "Register test (offline)", + [TEST_EEP] = "Eeprom test(offline)", + [TEST_IRQ] = "Interrupt test (offline)", + [TEST_LOOP] = "Loopback test (offline)", + [TEST_LINK] = "Link test (on/offline)" }; #define IGB_TEST_LEN (sizeof(igb_gstrings_test) / ETH_GSTRING_LEN) @@ -2002,7 +2012,7 @@ static void igb_diag_test(struct net_device *netdev, /* Link test performed before hardware reset so autoneg doesn't * interfere with test result */ - if (igb_link_test(adapter, [4])) + if (igb_link_test(adapter, [TEST_LINK])) eth_test->flags |= ETH_TEST_FL_FAILED; if (if_running) @@ -2011,21 +2021,21 @@ static void igb_diag_test(struct net_device *netdev, else igb_reset(adapter); - if (igb_reg_test(adapter, [0])) + if (igb_reg_test(adapter, [TEST_REG])) eth_test->flags |= ETH_TEST_FL_FAILED; igb_reset(adapter); - if (igb_eeprom_test(adapter, [1])) + if (igb_eeprom_test(adapter, [TEST_EEP])) eth_test->flags |= ETH_TEST_FL_FAILED; igb_reset(adapter); - if (igb_intr_test(adapter, [2])) + if (igb_intr_test(adapter, [TEST_IRQ])) eth_test->flags |= ETH_TEST_FL_FAILED; igb_reset(adapter); /* power up link for loopback test */ igb_power_up_link(adapter); - if (igb_loopback_test(adapter, [3])) + if (igb_loopback_test(adapter, [TEST_LOOP])) eth_test->flags |= ETH_TEST_FL_FAILED; /* restore speed, duplex, autoneg settings */ @@ -2045,16 +2055,16 @@ static void igb_diag_test(struct net_device *netdev, dev_info(>pdev->dev, "online testing starting\n"); /* PHY is powered down when interface is down */ - if (if_running && igb_link_test(adapter, [4])) + if (if_running && igb_link_test(adapter, [TEST_LINK])) eth_test->flags |= ETH_TEST_FL_FAILED; else - data[4] = 0; + data[TEST_LINK] = 0; /* Online tests aren't run; pass by default */ - data[0] = 0; - data[1] = 0; - data[2] = 0; - data[3] = 0; + data[TEST_REG] = 0; + data[TEST_EEP] = 0; + data[TEST_IRQ] = 0; + data[TEST_LOOP] = 0; clear_bit(__IGB_TESTING, >state); } -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next 1/9] e1000e: Switch e1000e_up to void, drop code checking for error result
From: Alexander DuyckThe function e1000e_up always returns 0. As such we can convert it to a void and just ignore the results. This allows us to drop some code in a couple spots as we no longer need to worry about non-zero return values. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/e1000.h | 2 +- drivers/net/ethernet/intel/e1000e/netdev.c | 15 --- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index 0b748d1..1dc293b 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -480,7 +480,7 @@ extern const char e1000e_driver_version[]; void e1000e_check_options(struct e1000_adapter *adapter); void e1000e_set_ethtool_ops(struct net_device *netdev); -int e1000e_up(struct e1000_adapter *adapter); +void e1000e_up(struct e1000_adapter *adapter); void e1000e_down(struct e1000_adapter *adapter, bool reset); void e1000e_reinit_locked(struct e1000_adapter *adapter); void e1000e_reset(struct e1000_adapter *adapter); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 775e389..955c8c7 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -4146,7 +4146,7 @@ void e1000e_reset(struct e1000_adapter *adapter) } -int e1000e_up(struct e1000_adapter *adapter) +void e1000e_up(struct e1000_adapter *adapter) { struct e1000_hw *hw = >hw; @@ -4166,8 +4166,6 @@ int e1000e_up(struct e1000_adapter *adapter) ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER); else ew32(ICS, E1000_ICS_LSC); - - return 0; } static void e1000e_flush_descriptors(struct e1000_adapter *adapter) @@ -6633,7 +6631,7 @@ static int e1000e_pm_runtime_resume(struct device *dev) return rc; if (netdev->flags & IFF_UP) - rc = e1000e_up(adapter); + e1000e_up(adapter); return rc; } @@ -6824,13 +6822,8 @@ static void e1000_io_resume(struct pci_dev *pdev) e1000_init_manageability_pt(adapter); - if (netif_running(netdev)) { - if (e1000e_up(adapter)) { - dev_err(>dev, - "can't bring device back up after reset\n"); - return; - } - } + if (netif_running(netdev)) + e1000e_up(adapter); netif_device_attach(netdev); -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCHSET v4] netfilter, cgroup: implement cgroup2 path match in xt_cgroup
iptables extension as a reply. diffstat follows. Thanks. > > fs/kernfs/dir.c | 46 +++ > include/linux/cgroup-defs.h | 126 > +++ > include/linux/cgroup.h | 66 +++- > include/linux/kernfs.h | 12 ++ > include/net/cls_cgroup.h | 11 +- > include/net/netprio_cgroup.h | 16 +++ > include/net/sock.h | 13 --- > include/uapi/linux/netfilter/xt_cgroup.h | 15 +++ > kernel/cgroup.c | 126 > --- > net/Kconfig |6 + > net/core/dev.c |3 > net/core/netclassid_cgroup.c | 11 +- > net/core/netprio_cgroup.c| 19 > net/core/scm.c |4 > net/core/sock.c | 17 > net/netfilter/nft_meta.c |2 > net/netfilter/xt_cgroup.c| 108 ++ > 17 files changed, 513 insertions(+), 88 deletions(-) > > -- > tejun Hi Tejun, With today's linux-next (next-20151214), I still got the same back trace, which was previously reported at http://lists.openwall.net/netdev/2015/11/23/80: [ 15.129701] BUG: spinlock bad magic on CPU#6, (systemd)/1012 [ 15.129701] lock: cgroup_sk_update_lock+0x0/0x40, .magic: , .owner: /-1, .owner_cpu: 0 [ 15.129701] CPU: 6 PID: 1012 Comm: (systemd) Not tainted 4.4.0-rc4-next-20151214+ #3 [ 15.129701] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090006 05/23/2012 [ 15.129701] ae6cddc0 8800e158bab0 ad317212 [ 15.129701] 8800e158bad0 ad0a1b8c ae6cddc0 ad800ee6 [ 15.129701] 8800e158baf0 ad0a1c06 ae6cddc0 8800ead9f080 [ 15.129701] Call Trace: [ 15.129701] [] dump_stack+0x44/0x62 [ 15.129701] [] spin_dump+0x7c/0xd0 [ 15.129701] [] spin_bug+0x26/0x30 [ 15.129701] [] do_raw_spin_lock+0xe5/0x120 [ 15.129701] [] _raw_spin_lock+0x39/0x40 [ 15.129701] [] ? update_classid_sock+0x33/0x80 [ 15.129701] [] update_classid_sock+0x33/0x80 [ 15.129701] [] ? write_classid+0x30/0x30 [ 15.129701] [] iterate_fd+0x5a/0x90 [ 15.129701] [] update_classid+0x47/0x80 [ 15.129701] [] cgrp_attach+0x25/0x30 [ 15.129701] [] cgroup_taskset_migrate+0x14b/0x280 [ 15.129701] [] cgroup_migrate+0xbf/0x100 [ 15.129701] [] ? cgroup_migrate+0x5/0x100 [ 15.129701] [] cgroup_attach_task+0xb5/0x100 [ 15.129701] [] ? cgroup_attach_task+0x5/0x100 [ 15.129701] [] __cgroup_procs_write+0x1da/0x310 [ 15.129701] [] ? __cgroup_procs_write+0x5e/0x310 [ 15.129701] [] cgroup_procs_write+0x14/0x20 [ 15.129701] [] cgroup_file_write+0x40/0x130 [ 15.129701] [] kernfs_fop_write+0x130/0x180 [ 15.129701] [] __vfs_write+0x28/0xe0 [ 15.129701] [] ? percpu_down_read+0x3c/0x90 [ 15.129701] [] ? __sb_start_write+0xdc/0xf0 [ 15.129701] [] ? __sb_start_write+0xdc/0xf0 [ 15.129701] [] vfs_write+0xa9/0x190 [ 15.129701] [] SyS_write+0x49/0xa0 [ 15.129701] [] entry_SYSCALL_64_fastpath+0x16/0x7a My kernel config is attached FYI. Thanks, -- Dexuan kernnel.config Description: kernnel.config
[RFCv4 bluetooth-next 2/2] ipv6: add 6co as icmpv6 userspace option
This patch adds the 6LoWPAN Context Option (6CO) as userspace option to processing such options inside RA messages in userspace. Cc: David S. MillerCc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Signed-off-by: Alexander Aring --- include/net/ndisc.h | 1 + net/ipv6/ndisc.c| 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 2d8edaa..944258d 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -35,6 +35,7 @@ enum { ND_OPT_ROUTE_INFO = 24, /* RFC4191 */ ND_OPT_RDNSS = 25, /* RFC5006 */ ND_OPT_DNSSL = 31, /* RFC6106 */ + ND_OPT_6CO = 34,/* RFC6775 */ __ND_OPT_MAX }; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d6161e1..bed154e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -188,7 +188,8 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, static inline int ndisc_is_useropt(struct nd_opt_hdr *opt) { return opt->nd_opt_type == ND_OPT_RDNSS || - opt->nd_opt_type == ND_OPT_DNSSL; + opt->nd_opt_type == ND_OPT_DNSSL || + opt->nd_opt_type == ND_OPT_6CO; } static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, -- 2.6.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFCv4 bluetooth-next 0/2] 6lowpan: 6co and stateful compression support
Hi, this patch series adds stateful compression support and add 6co option as a new userspace option for processing RA messages inside userspace. I am not sure if "6CO" handling inside userspace is the best option here. I will send also "radvd" patches which introduce a very "basic" support for processing(non 6LBR)/manage(6LBR) 6CO option fields. These patches doesn't support lifetime handling of contexts. There exists the question as well if we should handle the lifetime handling inside userspace or kernelspace. I am currently follow this approach: If we doesn't need it inside the kernelspace, then we should handle it in userspace. It's difficult to figure out if we really can it handle inside userspace only. RFC6775 describes some different roles inside the network: - 6LN (6LoWPAN Node) - 6LR (Router inside 6LoWPAN network) - 6LBR ($IP_NETWORK <-> 6LoWPAN network) Processing ICMPv6 (RA/RS, NA/NS) messages may be different for each role. I currently have not the full overlook inside RFC6775 and sometimes (as example of ABRO field, another Option-Field for 6LoWPAN) says: 8.1.3. Routers Processing Router Advertisements Note: (I suppose this is for 6LR only!) If a received RA does not contain an ABRO, then the RA MUST be silently ignored. --- For my knowledge such handling need to be inside kernelspace. This is filter functionality only, processing can be handled inside userspace (which needs ABRO also as userspace option at first), but then the kernel need to know which "role (6LN, 6LR, 6LBR)" the interface has. - Alex changes since v4: - remove patches for adding debugfs which are already upstream. - add "ipv6: add 6co as icmpv6 userspace option" - fix transmit check on (cid) instead (sci || dci) for adding CID inline data. If CID is zero it will be compressed. - remove "dci_table, sci_table, mcast_table" we have "ctx_table" only. - Change enabled with "u32 flags" since we need more information than "enabled" only. We handle also "compression flag" now. Alexander Aring (2): 6lowpan: iphc: add support for stateful compression ipv6: add 6co as icmpv6 userspace option include/net/6lowpan.h | 31 include/net/ndisc.h | 1 + net/6lowpan/core.c| 6 +- net/6lowpan/debugfs.c | 97 net/6lowpan/iphc.c| 420 +++--- net/ipv6/ndisc.c | 3 +- 6 files changed, 499 insertions(+), 59 deletions(-) -- 2.6.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: use-after-free in sctp_do_sm
On 12/14/2015 04:50 AM, David Laight wrote: > From: Vlad Yasevich >> Sent: 11 December 2015 18:38 > ... >>> Found a similar place in abort primitive handling like in this last >>> patch update, it's probably the issue you're still triggering. >>> >>> Also found another place that may lead to this use after free, in case >>> we receive a packet with a chunk that has no data. >>> >>> Oh my.. :) >> >> Yes. This is what I was worried about... Anything that triggers >> a DELTE_TCB command has to return a code that we can trap. >> >> The other way is to do what Dmitri suggested, but even there, we >> need to be very careful. > > I'm always wary of anything that queues actions up for later processing. > It is far too easy (as found here) to end up processing actions > in invalid states, or to process actions in 'unusual' orders when > specific events happen close together. > > I wonder how much fallout there'd be from getting the sctp code > to immediately action things, instead of queuing the actions for later. > It would certainly remove a lot of the unusual combinations of events. > We've bandied this idea around for a while, but no one has had the time to tackle this. This would be rather time-consuming task, but in the end might be a good idea. -vlad > David > > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFCv4 bluetooth-next 1/2] 6lowpan: iphc: add support for stateful compression
This patch introduce support for IPHC stateful address compression. It will offer the context table via one debugfs entry. Example to setup a context id: A "cat /sys/kernel/debug/6lowpan/lowpan0/ctx_table" will display all contexts which are available. Example: ID ipv6-address/prefix-length flags 0 :::::::/0 0 1 :::::::/0 0 2 :::::::/0 0 3 :::::::/0 0 4 :::::::/0 0 5 :::::::/0 0 6 :::::::/0 0 7 :::::::/0 0 8 :::::::/0 0 9 :::::::/0 0 10 :::::::/0 0 11 :::::::/0 0 12 :::::::/0 0 13 :::::::/0 0 14 :::::::/0 0 15 :::::::/0 0 For setting a context e.g. context id 0, context 2001::, prefix-length 64. Hint: Simple copy one line and then maniuplate it. echo "0 2001:::::::/64 3" > /sys/kernel/debug/6lowpan/lowpan0/ctx_table The flags are currently two: - BIT(0) - active: entry is added or deleted to the ctx_table. - BIT(1) - c: compression flag according rfc6775. On transmit side: The IPHC code will automatically search for a context which would be match for the address. Then it will be use the context with the best compression method. Means the longest prefix which match will be used. Example: 2001::/126 vs 2001::/127 - the 2001::/127 can be full compressed if the last bit of the address which has the prefix 2001::/127 is the same like the IID from the Encapsulating Header. A context ID can also be a 2001::1/128, which is then a full ipv6 address. On Receive side: If there is a context defined (when CID not available then it's the default context 0) then it will be used, if the header doesn't set SAC or DAC bit thens, it will be dropped. Signed-off-by: Alexander Aring--- include/net/6lowpan.h | 31 net/6lowpan/core.c| 6 +- net/6lowpan/debugfs.c | 97 net/6lowpan/iphc.c| 420 +++--- 4 files changed, 496 insertions(+), 58 deletions(-) diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index 2f6a3f2..db636c8 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -75,6 +75,8 @@ #define LOWPAN_IPHC_MAX_HC_BUF_LEN (sizeof(struct ipv6hdr) + \ LOWPAN_IPHC_MAX_HEADER_LEN + \ LOWPAN_NHC_MAX_HDR_LEN) +/* SCI/DCI is 4 bit width, so we have maximum 16 entries */ +#define LOWPAN_IPHC_CI_TABLE_SIZE (1 << 4) #define LOWPAN_DISPATCH_IPV6 0x41 /* 0101 = 65 */ #define LOWPAN_DISPATCH_IPHC 0x60 /* 011x = ... */ @@ -98,9 +100,38 @@ enum lowpan_lltypes { LOWPAN_LLTYPE_IEEE802154, }; +enum lowpan_iphc_ctx_flags { + LOWPAN_IPHC_CTX_FLAG_ACTIVE = BIT(0), + LOWPAN_IPHC_CTX_FLAG_C = BIT(1), +}; + +struct lowpan_iphc_ctx { + u8 id; + struct in6_addr pfx; + u8 plen; + u32 flags; +}; + +struct lowpan_iphc_ctx_table { + spinlock_t lock; + const struct lowpan_iphc_ctx_ops *ops; + struct lowpan_iphc_ctx table[LOWPAN_IPHC_CI_TABLE_SIZE]; +}; + +static inline bool lowpan_iphc_ctx_is_active(const struct lowpan_iphc_ctx *ctx) +{ + return ctx->flags & LOWPAN_IPHC_CTX_FLAG_ACTIVE; +} + +static inline bool lowpan_iphc_ctx_is_c(const struct lowpan_iphc_ctx *ctx) +{ + return ctx->flags & LOWPAN_IPHC_CTX_FLAG_C; +} + struct lowpan_priv { enum lowpan_lltypes lltype; struct dentry *iface_debugfs; + struct lowpan_iphc_ctx_table ctx; /* must be last */ u8 priv[0] __aligned(sizeof(void *)); diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c index c7f06f5..772f51c 100644 --- a/net/6lowpan/core.c +++ b/net/6lowpan/core.c @@ -20,7 +20,7 @@ int lowpan_register_netdevice(struct net_device *dev, enum lowpan_lltypes lltype) { - int ret; + int i, ret; dev->addr_len = EUI64_ADDR_LEN; dev->type = ARPHRD_6LOWPAN; @@ -29,6 +29,10 @@ int lowpan_register_netdevice(struct net_device *dev, lowpan_priv(dev)->lltype = lltype; + spin_lock_init(_priv(dev)->ctx.lock); + for (i = 0; i < LOWPAN_IPHC_CI_TABLE_SIZE; i++) + lowpan_priv(dev)->ctx.table[i].id = i; + ret = lowpan_dev_debugfs_init(dev); if (ret < 0) return ret; diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c index 88eef84..5270fa1 100644 --- a/net/6lowpan/debugfs.c +++ b/net/6lowpan/debugfs.c @@
Re: [V2 PATCH 1/1] net: sctp: dynamically enable or disable pf state
On 12/14/2015 01:22 AM, zyjzyj2...@gmail.com wrote: > From: Zhu Yanjun> > As we all know, the value of pf_retrans >= max_retrans_path can > disable pf state. The variables of pf_retrans and max_retrans_path > can be changed by the user space application. > > Sometimes the user expects to disable pf state while the 2 > variables are changed to enable pf state. So it is necessary to > introduce a new variable to disable pf state. > > According to the suggestions from Vlad Yasevich, extra1 and extra2 > are removed. The initialization of pf_enable is added. > > Signed-off-by: Zhu Yanjun Acked-by: Vlad Yasevich -vlad > --- > include/net/netns/sctp.h |7 +++ > net/sctp/protocol.c |3 +++ > net/sctp/sm_sideeffect.c |5 - > net/sctp/sysctl.c|7 +++ > 4 files changed, 21 insertions(+), 1 deletion(-) > > diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h > index 8ba379f..c501d67 100644 > --- a/include/net/netns/sctp.h > +++ b/include/net/netns/sctp.h > @@ -89,6 +89,13 @@ struct netns_sctp { > int pf_retrans; > > /* > + * Disable Potentially-Failed feature, the feature is enabled by default > + * pf_enable- 0 : disable pf > + * - >0 : enable pf > + */ > + int pf_enable; > + > + /* >* Policy for preforming sctp/socket accounting >* 0 - do socket level accounting, all assocs share sk_sndbuf >* 1 - do sctp accounting, each asoc may use sk_sndbuf bytes > diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c > index 4d9912f..571a631 100644 > --- a/net/sctp/protocol.c > +++ b/net/sctp/protocol.c > @@ -1223,6 +1223,9 @@ static int __net_init sctp_defaults_init(struct net > *net) > /* Max.Burst- 4 */ > net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST; > > + /* Enable pf state by default */ > + net->sctp.pf_enable = 1; > + > /* Association.Max.Retrans - 10 attempts >* Path.Max.Retrans - 5 attempts (per destination address) >* Max.Init.Retransmits - 8 attempts > diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c > index 6098d4c..05cd164 100644 > --- a/net/sctp/sm_sideeffect.c > +++ b/net/sctp/sm_sideeffect.c > @@ -477,6 +477,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t > *commands, >struct sctp_transport *transport, >int is_hb) > { > + struct net *net = sock_net(asoc->base.sk); > + > /* The check for association's overall error counter exceeding the >* threshold is done in the state function. >*/ > @@ -503,7 +505,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t > *commands, >* is SCTP_ACTIVE, then mark this transport as Partially Failed, >* see SCTP Quick Failover Draft, section 5.1 >*/ > - if ((transport->state == SCTP_ACTIVE) && > + if (net->sctp.pf_enable && > +(transport->state == SCTP_ACTIVE) && > (asoc->pf_retrans < transport->pathmaxrxt) && > (transport->error_count > asoc->pf_retrans)) { > > diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c > index 26d50c5..ccbfc93 100644 > --- a/net/sctp/sysctl.c > +++ b/net/sctp/sysctl.c > @@ -308,6 +308,13 @@ static struct ctl_table sctp_net_table[] = { > .extra1 = _autoclose_min, > .extra2 = _autoclose_max, > }, > + { > + .procname = "pf_enable", > + .data = _net.sctp.pf_enable, > + .maxlen = sizeof(int), > + .mode = 0644, > + .proc_handler = proc_dointvec, > + }, > > { /* sentinel */ } > }; > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] net: phy: mdio-mux: Check return value of mdiobus_alloc()
mdiobus_alloc() might return NULL, but its return value is not checked in mdio_mux_init(). This could potentially lead to a NULL pointer dereference. Fix it by checking the return value Fixes: 0ca2997d1452 ("netdev/of/phy: Add MDIO bus multiplexer support.") Signed-off-by: Tobias Klauser--- drivers/net/phy/mdio-mux.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index 908e8d486342..7f8e7662e28c 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -149,9 +149,14 @@ int mdio_mux_init(struct device *dev, } cb->bus_number = v; cb->parent = pb; + cb->mii_bus = mdiobus_alloc(); + if (!cb->mii_bus) { + ret_val = -ENOMEM; + of_node_put(child_bus_node); + break; + } cb->mii_bus->priv = cb; - cb->mii_bus->irq = cb->phy_irq; cb->mii_bus->name = "mdio_mux"; snprintf(cb->mii_bus->id, MII_BUS_ID_SIZE, "%x.%x", -- 2.6.3.368.gf34be46.dirty -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net] openvswitch: fix trivial comment typo
The commit 33db4125ec74 ("openvswitch: Rename LABEL->LABELS") left over an old OVS_CT_ATTR_LABEL instance, fix it. Fixes: 33db4125ec74 ("openvswitch: Rename LABEL->LABELS") Signed-off-by: Paolo Abeni--- include/uapi/linux/openvswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 28ccedd..a27222d 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -628,7 +628,7 @@ struct ovs_action_hash { * @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the * mask, the corresponding bit in the value is copied to the connection * tracking mark field in the connection. - * @OVS_CT_ATTR_LABEL: %OVS_CT_LABELS_LEN value followed by %OVS_CT_LABELS_LEN + * @OVS_CT_ATTR_LABELS: %OVS_CT_LABELS_LEN value followed by %OVS_CT_LABELS_LEN * mask. For each bit set in the mask, the corresponding bit in the value is * copied to the connection tracking label field in the connection. * @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG. -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC radvd 2/2] radvd: rework 6CO handling
Current issues with the 6CO handling: - Doesn't work on little endian at my side because forgotten byteordering handling at bitfields. - There can be multiple 6CO options. Up to 16 6CO options at maximum. - It doesn't work as it should. Maybe for some use-case somebody need that, but 6CO contains information for header parsing and this need functionality to tell it the kernel. Currently we have a debugfs entry for that. As an example, RFC6775 describes the 6LBR should be configurated and managed the context entries of RFC6282. interface lowpan0 { Adv6LBR on; AdvSendAdvert on; UnicastOnly on; AdvCurHopLimit 255; prefix 2001::/64 { AdvOnLink on; AdvAutonomous on; AdvRouterAddr on; }; lowpanco { ctx 0 { AdvContextCompressionFlag on; AdvContextLength 64; AdvContextPrefix 2001::; AdvLifeTime 1000; }; }; }; If we set "Adv6LBR" to on, then the "lowpanco" contexts will be setup during startup of radvd, otherwise all contexts are empty (non active). I changed the parsing of contexts: - lowpanco contains up-to 16 contexts with _unique_ id's. - The id is after "ctx" specified. What doesn't work: - Lifetime handling. - AdvContextCompressionFlag should be 0 at first to propagate "safety" the context inside the context. RFC6775 says here: New context information SHOULD be introduced into the LoWPAN with C=0, to ensure that it is known by all nodes that may have to perform header decompression based on this context information. Only when it is reasonable to assume that this information was successfully disseminated SHOULD an option with C=1 be sent, enabling the actual use of the context information for compression I know what this means, but then don't know "when" we can do "C=1", maybe this is out-of-scope in RFC6775. Note: I ignore the ABRO for now. The ABRO need to be included and the version fields indicates if new context or old context information. This is just to begin with something to handle 6CO. Signed-off-by: Alexander Aring--- defaults.h | 3 +++ device-bsd44.c | 6 ++ device-linux.c | 35 +++ gram.y | 55 ++- pathnames.h | 1 + privsep-linux.c | 55 ++- process.c | 48 radvd.c | 6 ++ radvd.h | 21 +++-- scanner.l | 4 +++- send.c | 33 - 11 files changed, 245 insertions(+), 22 deletions(-) diff --git a/defaults.h b/defaults.h index fedd546..a328793 100644 --- a/defaults.h +++ b/defaults.h @@ -125,6 +125,9 @@ #define MAX_PrefixLen 128 +/* RFC6282 Constraints */ +#define MAX_CIDLen 16 + /* SLAAC (RFC4862) Constants and Derived Values */ #define MIN_AdvValidLifetime 7200/* 2 hours in secs */ diff --git a/device-bsd44.c b/device-bsd44.c index f1aacca..6d4d838 100644 --- a/device-bsd44.c +++ b/device-bsd44.c @@ -143,6 +143,12 @@ int set_interface_retranstimer(const char *iface, uint32_t rettimer) return -1; } +int set_interface_6ctx(const struct Interface *iface, struct AdvLowpanCtx ctx) +{ + dlog(LOG_DEBUG, 4, "update 6LoWPAN context not supported"); + return 0; +} + int check_ip6_forwarding(void) { dlog(LOG_DEBUG, 4, "checking ipv6 forwarding not supported"); diff --git a/device-linux.c b/device-linux.c index 7301927..c9b516f 100644 --- a/device-linux.c +++ b/device-linux.c @@ -86,6 +86,30 @@ int update_device_info(int sock, struct Interface *iface) case ARPHRD_6LOWPAN: iface->sllao.if_hwaddr_len = 64; iface->sllao.if_prefix_len = 64; + + if (iface->state_info.configured) + break; + + /* if nothing specified use a empy AdvLowpanCoList as default */ + if (!iface->AdvLowpanCoList) { + iface->AdvLowpanCoList = malloc(sizeof(struct AdvLowpanCo)); + if (iface->AdvLowpanCoList == NULL) { + flog(LOG_ERR, "AdvLowpanCo allocation failed"); + return -2; + } + + memset(iface->AdvLowpanCoList, 0, sizeof(struct AdvLowpanCo)); + } else { + /* If the LoWPAN uses header compression [RFC6282] with context, then +* the 6LBR must be configured with context information and related +* CIDs. Zero all if non 6LBR. +*/ + if (!iface->Adv6LBR) +
[RFC radvd 0/2] radvd: 6lowpan 6CO testing patches
Hi, this patch is for testing 6CO fields in RA messages with the help of radvd. I tested it with the following configuration and two or more nodes which can directly reach each other. On one node (6LBR, if more 6LBR they need to have the same context information!): interface lowpan0 { Adv6LBR on; AdvSendAdvert on; UnicastOnly on; AdvCurHopLimit 255; prefix 2001::/64 { AdvOnLink on; AdvAutonomous on; AdvRouterAddr on; }; lowpanco { ctx 0 { AdvContextCompressionFlag on; AdvContextLength 64; AdvContextPrefix 2001::; AdvLifeTime 1000; }; }; }; On other nodes: interface lowpan0 { Adv6LBR off; AdvSendAdvert off; }; Then you can see that the assigned global link 2001::/64 address will be compressed. Tested with: ping6 and wireshark on $WPAN interface, when using 802.15.4 6LoWPAN. - Alex Alexander Aring (2): device-linux: replace ARPHRD_IEEE802154 to ARPHRD_6LOWPAN radvd: rework 6CO handling defaults.h | 3 +++ device-bsd44.c | 6 ++ device-linux.c | 46 +++--- gram.y | 55 ++- pathnames.h | 1 + privsep-linux.c | 55 ++- process.c | 48 radvd.c | 6 ++ radvd.h | 21 +++-- scanner.l | 4 +++- send.c | 33 - 11 files changed, 253 insertions(+), 25 deletions(-) -- 2.6.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC radvd 1/2] device-linux: replace ARPHRD_IEEE802154 to ARPHRD_6LOWPAN
This patch changes the ARPHRD_IEEE802154 to ARPHRD_6LOWPAN. The IEEE 802.15.4 6lowpan module changed the ARPHRD_IEEE802154 type to ARPHRD_6LOWPAN. Nowadays it's use ARPHRD_6LOWPAN which is also used by BTLE 6LoWPAN. Both interfaces uses an EUI64 address and the handling to get the link-layer address should be the same. There is no backward compatibility for 802.15.4 6LoWPAN before we changed the ARPHRD. Anyway if somebody wants that it should be patched manually. When the ARPHRD was ARPHRD_IEEE802154 the 802.15.4 6lowpan was anyway in a somehow unusable state. Cc: linux-blueto...@vger.kernel.org Cc: linux-w...@vger.kernel.org Cc: Oleg HahmSigned-off-by: Alexander Aring --- device-linux.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/device-linux.c b/device-linux.c index de83f2e..7301927 100644 --- a/device-linux.c +++ b/device-linux.c @@ -22,6 +22,10 @@ #define IPV6_ADDR_LINKLOCAL 0x0020U #endif +#ifndef ARPHRD_6LOWPAN +#define ARPHRD_6LOWPAN 825 /* IPv6 over LoWPAN */ +#endif + static char const *hwstr(unsigned short sa_family); /* @@ -79,12 +83,10 @@ int update_device_info(int sock, struct Interface *iface) iface->sllao.if_maxmtu = -1; break; #endif /* ARPHDR_ARCNET */ -#ifdef ARPHRD_IEEE802154 - case ARPHRD_IEEE802154: + case ARPHRD_6LOWPAN: iface->sllao.if_hwaddr_len = 64; iface->sllao.if_prefix_len = 64; break; -#endif default: iface->sllao.if_hwaddr_len = -1; iface->sllao.if_prefix_len = -1; @@ -382,6 +384,9 @@ static char const *hwstr(unsigned short sa_family) rc = "ARPHRD_IEEE802154_PHY"; break; #endif + case ARPHRD_6LOWPAN: + rc = "ARPHRD_6LOWPAN"; + break; case ARPHRD_VOID: rc = "ARPHRD_VOID"; break; -- 2.6.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/1] net: bonding: remove redudant brackets
Le 12/12/2015 02:03, David Miller a écrit : From:From: yzhu1 Signed-off-by: yzhu1 Does not apply to the net-next tree. Also three different emails ... -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] vlan: Fix untag operations of stacked vlans with REORDER_HEADER off
Le 16/11/2015 21:43, Vladislav Yasevich a écrit : When we have multiple stacked vlan devices all of which have turned off REORDER_HEADER flag, the untag operation does not locate the ethernet addresses correctly for nested vlans. The reason is that in case of REORDER_HEADER flag being off, the outer vlan headers are put back and the mac_len is adjusted to account for the presense of the header. Then, the subsequent untag operation, for the next level vlan, always use VLAN_ETH_HLEN to locate the begining of the ethernet header and that ends up being a multiple of 4 bytes short of the actuall beginning of the mac header (the multiple depending on the how many vlan encapsulations ethere are). As a reslult, if there are multiple levles of vlan devices with REODER_HEADER being off, the recevied packets end up being dropped. To solve this, we use skb->mac_len as the offset. The value is always set on receive path and starts out as a ETH_HLEN. The value is also updated when the vlan header manupations occur so we know it will be correct. Signed-off-by: Vladislav Yasevich--- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fab4599..160193f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4268,7 +4268,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) return NULL; } - memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); + memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len, + 2 * ETH_ALEN); skb->mac_header += VLAN_HLEN; return skb; } This patch breaks the following test case: a vlan packet is received by an e1000 interface. Here is the configuration of the interface: $ ethtool -k ntfp2 | grep "vlan\|offload" tcp-segmentation-offload: off udp-fragmentation-offload: off [fixed] generic-segmentation-offload: on generic-receive-offload: on large-receive-offload: off [fixed] rx-vlan-offload: off tx-vlan-offload: off [fixed] rx-vlan-filter: on [fixed] vlan-challenged: off [fixed] tx-vlan-stag-hw-insert: off [fixed] rx-vlan-stag-hw-parse: off [fixed] rx-vlan-stag-filter: off [fixed] l2-fwd-offload: off [fixed] The vlan header is not removed by the driver. It calls dev_gro_receive() which sets the network header to +14, thus mac_len is also sets to 14 and skb_reorder_vlan_header() do a wrong memmove() (the packet is dropped). Not sure who is responsible to update mac_len before skb_vlan_untag() is called. Any suggestions? -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Double free of dst_entry in ipv4_dst_destroy()
On Mon, 2015-12-14 at 11:28 -0500, dwil...@us.ibm.com wrote: > Eric - > With this patch applied the test ran clean for 2 days. > > Thanks for your help. Excellent ! Thanks a lot David, I will submit it formally with your 'Reported-by' and 'Tested-by' I have no idea why this took so long to discover this race. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net] netlink: fix boolean evaluation on bound
portid may be 0, thus bound will set the flag to false for in-kernel created netlink sockets. Fixes: da314c9923fed55 ("netlink: Replace rhash_portid with bound") Cc: Herbert XuSigned-off-by: Hannes Frederic Sowa --- This patch should not affect anything and is just meant to close this loophole in future. I based it on net, but you can also apply it to net-next. net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 59651af8cc2705..278e94c3c7f6d1 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1137,7 +1137,7 @@ static int netlink_insert(struct sock *sk, u32 portid) /* We need to ensure that the socket is hashed and visible. */ smp_wmb(); - nlk_sk(sk)->bound = portid; + nlk_sk(sk)->bound = true; err: release_sock(sk); -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] net: udp: local checksum offload for encapsulation
On Mon, Dec 14, 2015 at 7:13 AM, Edward Creewrote: > The arithmetic properties of the ones-complement checksum mean that a > correctly checksummed inner packet, including its checksum, has a ones > complement sum depending only on whatever value was used to initialise > the checksum field before checksumming (in the case of TCP and UDP, > this is the ones complement sum of the pseudo header, complemented). > Consequently, if we are going to offload the inner checksum with > CHECKSUM_PARTIAL, we can compute the outer checksum based only on the > packed data not covered by the inner checksum, and the initial value of > the inner checksum field. > > Signed-off-by: Edward Cree > --- > net/ipv4/udp.c | 31 +++ > 1 file changed, 27 insertions(+), 4 deletions(-) > > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c > index 0c7b0e6..07d679e 100644 > --- a/net/ipv4/udp.c > +++ b/net/ipv4/udp.c > @@ -767,12 +767,35 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb, > { > struct udphdr *uh = udp_hdr(skb); > > - if (nocheck) > + if (nocheck) { > uh->check = 0; > - else if (skb_is_gso(skb)) > + } else if (skb_is_gso(skb)) { > uh->check = ~udp_v4_check(len, saddr, daddr, 0); > - else if (skb_dst(skb) && skb_dst(skb)->dev && > -(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { > + } else if (skb->ip_summed == CHECKSUM_PARTIAL && > + skb_dst(skb) && skb_dst(skb)->dev && > + (skb_dst(skb)->dev->features & NETIF_F_HW_CSUM)) { > + /* Everything from csum_start onwards will be > +* checksummed and will thus have a sum of whatever > +* we previously put in the checksum field (eg. sum > +* of pseudo-header) > +*/ > + __wsum csum; > + > + /* Fill in our pseudo-header checksum */ > + uh->check = ~udp_v4_check(len, saddr, daddr, 0); > + /* Start with complement of inner pseudo-header checksum */ > + csum = ~skb_checksum(skb, skb_checksum_start_offset(skb) + > skb->csum_offset, > +2, 0); > + /* Add in checksum of our headers (incl. pseudo-header > +* checksum filled in above) > +*/ > + csum = skb_checksum(skb, 0, skb_checksum_start_offset(skb), > csum); > + /* The result is the outer checksum */ > + uh->check = csum_fold(csum); > + if (uh->check == 0) > + uh->check = CSUM_MANGLED_0; > + } else if (skb_dst(skb) && skb_dst(skb)->dev && > + (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { > It's clever, but I'm not sure this saves much. The outer checksum could still be offloaded to the device without the extra work. Where this technique would be nice is if the device doesn't support checksum offload at all, then we would definitely avoid doing multiple checksums. That's going to be harder since we won't see CHECKSUM_PARTIAL in that case for the inner checksum, but it would get us to the principle that we only ever calculate the packet checksum once or zero times. . > BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); > > -- > 2.4.3 > > > The information contained in this message is confidential and is intended for > the addressee(s) only. If you have received this message in error, please > notify the sender immediately and delete the message. Unless you are an > addressee (or authorized to receive for an addressee), you may not use, copy > or disclose to anyone this message or any information contained in this > message. The unauthorized use, disclosure, copying or alteration of this > message is strictly prohibited. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net] openvswitch: fix trivial comment typo
On 14 December 2015 at 05:29, Paolo Abeniwrote: > The commit 33db4125ec74 ("openvswitch: Rename LABEL->LABELS") left > over an old OVS_CT_ATTR_LABEL instance, fix it. > > Fixes: 33db4125ec74 ("openvswitch: Rename LABEL->LABELS") > Signed-off-by: Paolo Abeni Thanks for the fix. Acked-by: Joe Stringer -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 net] ravb: clear RIC1 in init instead of stop
From: Kazuya MizuguchiAVB-DMAC Receive FIFO Warning interrupt is not enabled, so it is not necessary to disable the interrupt in ravb_close(). On the other hand, this patch disables the interrupt in ravb_dmac_init() to prevent the possibility that the interrupt is issued by the state that a boot loader left. Signed-off-by: Kazuya Mizuguchi Signed-off-by: Yoshihiro Kaneko --- This patch is based on the master branch of David Miller's networking tree. v3 [Yoshihiro Kaneko] * compile tested only * As suggested by Sergei Shtylyov - update the subject to reflect what this patch is doing. - clear RIC1 in ravb_dmac_init() instead of ravb_open(). v2 [Yoshihiro Kaneko] * compile tested only * As suggested by Sergei Shtylyov - clear RIC1 in ndo_open() to disable the interrupt regardless of a left state from a bootloader. drivers/net/ethernet/renesas/ravb_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index b69e0c2..b76303e 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -405,9 +405,11 @@ static int ravb_dmac_init(struct net_device *ndev) /* Timestamp enable */ ravb_write(ndev, TCCR_TFEN, TCCR); - /* Interrupt enable: */ + /* Interrupt init: */ /* Frame receive */ ravb_write(ndev, RIC0_FRE0 | RIC0_FRE1, RIC0); + /* Disable FIFO full warning */ + ravb_write(ndev, 0, RIC1); /* Receive FIFO full error, descriptor empty */ ravb_write(ndev, RIC2_QFE0 | RIC2_QFE1 | RIC2_RFFE, RIC2); /* Frame transmitted, timestamp FIFO updated */ @@ -1471,7 +1473,6 @@ static int ravb_close(struct net_device *ndev) /* Disable interrupts by clearing the interrupt masks. */ ravb_write(ndev, 0, RIC0); - ravb_write(ndev, 0, RIC1); ravb_write(ndev, 0, RIC2); ravb_write(ndev, 0, TIC); -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net] netlink: fix boolean evaluation on bound
On Mon, Dec 14, 2015 at 05:55:25PM +0100, Hannes Frederic Sowa wrote: > portid may be 0, thus bound will set the flag to false for in-kernel > created netlink sockets. > > Fixes: da314c9923fed55 ("netlink: Replace rhash_portid with bound") > Cc: Herbert Xu> Signed-off-by: Hannes Frederic Sowa > --- > This patch should not affect anything and is just meant to close this > loophole in future. I based it on net, but you can also apply it to > net-next. Nack. The bound field only needs to be true for user-space sockets. So please explain why you need it to be true for kernel sockets. Thanks, -- Email: Herbert Xu Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Double free of dst_entry in ipv4_dst_destroy()
Eric - With this patch applied the test ran clean for 2 days. Thanks for your help. Quoting Eric Dumazet: On Fri, 2015-12-11 at 07:48 -0800, Eric Dumazet wrote: On Fri, 2015-12-11 at 06:23 -0800, Eric Dumazet wrote: > On Sun, 2015-12-06 at 17:58 -0800, Eric Dumazet wrote: > > On Sun, 2015-12-06 at 13:03 -0800, Eric Dumazet wrote: > > > > > But then when later we promote a skb->dst to a refctounted one > > > (skb_dst_force(), we might make sure we abort the operation if __refcnt > > > == 0 ( and DST_NOCACHE is in dst->flags) > > > > > > > Minimum patch would be : > > > > Here is a more complete patch, it should fix the issue I think : Hmm, I'll send a v3, I forgot to test DST_NOCACHE properly. David, please test the following patch, thanks ! include/net/dst.h | 33 + include/net/sock.h |2 +- net/ipv4/tcp_ipv4.c |5 ++--- net/ipv6/tcp_ipv6.c |3 +-- 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 1279f9b09791..c7329dcd90cc 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -322,6 +322,39 @@ static inline void skb_dst_force(struct sk_buff *skb) } } +/** + * dst_hold_safe - Take a reference on a dst if possible + * @dst: pointer to dst entry + * + * This helper returns false if it could not safely + * take a reference on a dst. + */ +static inline bool dst_hold_safe(struct dst_entry *dst) +{ + if (dst->flags & DST_NOCACHE) + return atomic_inc_not_zero(>__refcnt); + dst_hold(dst); + return true; +} + +/** + * skb_dst_force_safe - makes sure skb dst is refcounted + * @skb: buffer + * + * If dst is not yet refcounted and not destroyed, grab a ref on it. + */ +static inline void skb_dst_force_safe(struct sk_buff *skb) +{ + if (skb_dst_is_noref(skb)) { + struct dst_entry *dst = skb_dst(skb); + + if (!dst_hold_safe(dst)) + dst = NULL; + + skb->_skb_refdst = (unsigned long)dst; + } +} + /** * __skb_tunnel_rx - prepare skb for rx reinsert diff --git a/include/net/sock.h b/include/net/sock.h index eaef41433d7a..18322bded064 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -816,7 +816,7 @@ void sk_stream_write_space(struct sock *sk); static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) { /* dont let skb dst not refcounted, we are going to leave rcu lock */ - skb_dst_force(skb); + skb_dst_force_safe(skb); if (!sk->sk_backlog.tail) sk->sk_backlog.head = skb; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index db003438aaf5..d8841a2f1569 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1493,7 +1493,7 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) if (likely(sk->sk_rx_dst)) skb_dst_drop(skb); else - skb_dst_force(skb); + skb_dst_force_safe(skb); __skb_queue_tail(>ucopy.prequeue, skb); tp->ucopy.memory += skb->truesize; @@ -1721,8 +1721,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - if (dst) { - dst_hold(dst); + if (dst && dst_hold_safe(dst)) { sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e7aab561b7b4..6b8a8a9091fa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -93,10 +93,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - if (dst) { + if (dst && dst_hold_safe(dst)) { const struct rt6_info *rt = (const struct rt6_info *)dst; - dst_hold(dst); sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 net] ravb: Remove clear unhandled interrupt
Hi, 2015-12-14 1:25 GMT+09:00 Sergei Shtylyov: > Hello. > > On 12/13/2015 06:12 PM, Yoshihiro Kaneko wrote: > >The subject doesn't seem to reflect what the patch is doing. > > >> From: Kazuya Mizuguchi >> >> AVB-DMAC Receive FIFO Warning interrupt is not enabled, so it is not >> necessary to disable the interrupt in ndo_close(). >> On the other hand, this patch disables the interrupt in ndo_open() to >> prevent the possibility that the interrupt is issued by the state that >> a boot loader left. >> >> Signed-off-by: Kazuya Mizuguchi >> Signed-off-by: Yoshihiro Kaneko >> --- >> >> This patch is based on the master branch of David Miller's networking >> tree. >> >> v2 [Yoshihiro Kaneko] >> * compile tested only >> * As suggested by Sergei Shtylyov >>- clear RIC1 in ndo_open() to disable the interrupt regardless of >> a left state from a bootloader. >> >> >> drivers/net/ethernet/renesas/ravb_main.c | 4 +++- >> 1 file changed, 3 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/net/ethernet/renesas/ravb_main.c >> b/drivers/net/ethernet/renesas/ravb_main.c >> index b69e0c2..5e34a88 100644 >> --- a/drivers/net/ethernet/renesas/ravb_main.c >> +++ b/drivers/net/ethernet/renesas/ravb_main.c >> @@ -1228,6 +1228,9 @@ static int ravb_open(struct net_device *ndev) >> goto out_free_irq2; >> ravb_emac_init(ndev); >> >> + /* Disable unhandled interrupt */ >> + ravb_write(ndev, 0, RIC1); >> + > > >Oh, I thought the interrupt masks are programmed here but they are set in > ravb_dmac_init() that this function calls. Please do it there instead (where > it was originally done). done. > > [...] > > MBR, Sergei > Thanks, kaneko -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 net] ravb: clear RIC1 in init instead of stop
On 12/14/2015 07:24 PM, Yoshihiro Kaneko wrote: From: Kazuya MizuguchiAVB-DMAC Receive FIFO Warning interrupt is not enabled, so it is not necessary to disable the interrupt in ravb_close(). On the other hand, this patch disables the interrupt in ravb_dmac_init() to prevent the possibility that the interrupt is issued by the state that a boot loader left. Signed-off-by: Kazuya Mizuguchi Signed-off-by: Yoshihiro Kaneko Acked-by: Sergei Shtylyov --- This patch is based on the master branch of David Miller's networking tree. I think this should rather go into net-next.git though... MBR, Sergei -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv2 net-next] ipv6: allow routes to be configured with expire values
Hi, On 14.12.2015 12:48, Xin Long wrote: >> >> This is the wrong way to do this. >> >> Currently we only ever dump rta_cacheinfo values to the user. >> >> If we use it to set things, we have to completely consider every >> member of that structure as potentially having meaning either >> intended by the user or choosen by us in the future. >> >> Therefore it is a poor choice to start using for specifying the >> expires value, and some other mechanism such as a new RTNETLINK >> attribute, should be used for this. > > we did it like this to avoid adding the new RTNETLINK attribute. > now I got your meaning, and rta_cacheinfo seems to be designed > for dumping info, not for seting info. i guess you hope we do it like: > > + if (tb[RTA_EXPIRES]) { > + unsigned long timeout = > addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ); > + > + if (addrconf_finite_timeout(timeout)) { > + cfg->fc_expires = jiffies_to_clock_t(timeout * HZ); > + cfg->fc_flags |= RTF_EXPIRES; > + } > + } > > hi Hannes, we seem to go back here again, what do you think ? Albeit I had the same idea and wanted to introduce a new netlink attribute, I decided to recommend this patch. It aligns with the code we already have for adding and listing ipv4 and ipv6 addresses (ifa_cacheinfo) and reporting routing changes for ipv4 and ipv6 (rta_cacheinfo). We can easily switch to new attributes. Should we introduce a new interface for this? Thanks, Hannes -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net] netlink: fix boolean evaluation on bound
On 14.12.2015 18:06, Herbert Xu wrote: > On Mon, Dec 14, 2015 at 05:55:25PM +0100, Hannes Frederic Sowa wrote: >> portid may be 0, thus bound will set the flag to false for in-kernel >> created netlink sockets. >> >> Fixes: da314c9923fed55 ("netlink: Replace rhash_portid with bound") >> Cc: Herbert Xu>> Signed-off-by: Hannes Frederic Sowa >> --- >> This patch should not affect anything and is just meant to close this >> loophole in future. I based it on net, but you can also apply it to >> net-next. > > Nack. The bound field only needs to be true for user-space sockets. > So please explain why you need it to be true for kernel sockets. I reviewed this very carefully and think this is currently a matter of taste as it does not change current logic. Otherwise I would recommend adding a "!!" to express that we actually want bound set based on the portid value? Bye, Hannes -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 2/4] net: diag: split inet_diag_dump_one_icsk into two
Currently, inet_diag_dump_one_icsk finds a socket and then dumps its information to userspace. Split it into a part that finds the socket and a part that dumps the information. Signed-off-by: Lorenzo Colitti--- include/linux/inet_diag.h | 5 + net/ipv4/inet_diag.c | 42 +++--- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 0e707f0..e7032f04 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -3,6 +3,7 @@ #include +struct net; struct sock; struct inet_hashinfo; struct nlattr; @@ -41,6 +42,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, const struct nlmsghdr *nlh, const struct inet_diag_req_v2 *req); +struct sock *inet_diag_find_one_icsk(struct net *net, +struct inet_hashinfo *hashinfo, +const struct inet_diag_req_v2 *req); + int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); extern int inet_diag_register(const struct inet_diag_handler *handler); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ab9f8a6..cfabb8f 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -350,17 +350,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, nlmsg_flags, unlh); } -int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - struct sk_buff *in_skb, - const struct nlmsghdr *nlh, - const struct inet_diag_req_v2 *req) +struct sock *inet_diag_find_one_icsk(struct net *net, +struct inet_hashinfo *hashinfo, +const struct inet_diag_req_v2 *req) { - struct net *net = sock_net(in_skb->sk); - struct sk_buff *rep; struct sock *sk; - int err; - err = -EINVAL; if (req->sdiag_family == AF_INET) sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], @@ -375,15 +370,33 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, req->id.idiag_if); #endif else - goto out_nosk; + return ERR_PTR(-EINVAL); - err = -ENOENT; if (!sk) - goto out_nosk; + return ERR_PTR(-ENOENT); - err = sock_diag_check_cookie(sk, req->id.idiag_cookie); - if (err) - goto out; + if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { + sock_gen_put(sk); + return ERR_PTR(-ENOENT); + } + + return sk; +} +EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); + +int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, + struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + struct sock *sk; + int err; + + sk = inet_diag_find_one_icsk(net, hashinfo, req); + if (IS_ERR(sk)) + return PTR_ERR(sk); rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL); if (!rep) { @@ -409,7 +422,6 @@ out: if (sk) sock_gen_put(sk); -out_nosk: return err; } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); -- 2.6.0.rc2.230.g3dd15c0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] sh_eth: fix descriptor access endianness
Hello. On 12/13/2015 11:05 PM, Sergei Shtylyov wrote: The driver never calls cpu_to_edmac() when writing the descriptor address and edmac_to_cpu() when reading it, although it should -- fix this. Note that the frame/buffer length descriptor field accesses also need fixing but since they are both 16-bit we can't use {cpu|edmac}_to_{edmac|cpu}()... Changed my mind about this one: I'll add a new pair of functions to deal with 16-bit conversions as well. Signed-off-by: Sergei Shtylyov[...] MBR, Sergei -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 1/4] net: diag: Add the ability to destroy a socket.
This adds a diag_destroy pointer to struct proto that allows a socket to be administratively closed without any action from the process owning the socket or the socket protocol. This allows a privileged userspace process, such as a connection manager or system administration tool, to close sockets belonging to other apps when the network they were established on has disconnected. It is needed on laptops and mobile hosts to ensure that network switches / disconnects do not result in applications being blocked for long periods of time (minutes) in read or connect calls on TCP sockets that will never succeed because the IP address they are bound to is no longer on the system. Closing the sockets causes these calls to fail fast and allows the apps to reconnect on another network. For many years Android kernels have supported this via an out-of-tree SIOCKILLADDR ioctl that is called on every RTM_DELADDR event, but this solution is cleaner, more robust and more flexible: the connection manager can iterate over all connections on the deleted IP address and close all of them. It can also be used to close all sockets opened by a given app process, for example if the user has restricted that app from using the network. It also allows in-kernel callers to perform the same sort of operation by invoking sk->sk_prot->diag_destroy(sk) directly. This patch adds a SOCK_DESTROY operation, a destroy function pointer to sock_diag_handler, and a diag_destroy function pointer. It does not include any implementation code. Signed-off-by: Lorenzo Colitti--- include/linux/sock_diag.h | 2 ++ include/net/sock.h | 1 + include/uapi/linux/sock_diag.h | 1 + net/core/sock_diag.c | 23 --- 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index fddebc6..15072fc 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -15,6 +15,7 @@ struct sock_diag_handler { __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); int (*get_info)(struct sk_buff *skb, struct sock *sk); + int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh); }; int sock_diag_register(const struct sock_diag_handler *h); @@ -68,4 +69,5 @@ bool sock_diag_has_destroy_listeners(const struct sock *sk) } void sock_diag_broadcast_destroy(struct sock *sk); +int sock_diag_destroy(struct sock *sk); #endif diff --git a/include/net/sock.h b/include/net/sock.h index 0ca22b0..a1b30d7f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1060,6 +1060,7 @@ struct proto { void(*destroy_cgroup)(struct mem_cgroup *memcg); struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); #endif + int (*diag_destroy)(struct sock *sk); }; int proto_register(struct proto *prot, int alloc_slab); diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index 49230d3..bae2d80 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -4,6 +4,7 @@ #include #define SOCK_DIAG_BY_FAMILY 20 +#define SOCK_DESTROY 21 struct sock_diag_req { __u8sdiag_family; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 0c1d58d..967d89f 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld) } EXPORT_SYMBOL_GPL(sock_diag_unregister); -static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) { int err; struct sock_diag_req *req = nlmsg_data(nlh); @@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) hndl = sock_diag_handlers[req->sdiag_family]; if (hndl == NULL) err = -ENOENT; - else + else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY) err = hndl->dump(skb, nlh); + else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy) + err = hndl->destroy(skb, nlh); + else + err = -EOPNOTSUPP; mutex_unlock(_diag_table_mutex); return err; @@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return ret; case SOCK_DIAG_BY_FAMILY: - return __sock_diag_rcv_msg(skb, nlh); + case SOCK_DESTROY: + return __sock_diag_cmd(skb, nlh); default: return -EINVAL; } @@ -295,6 +300,18 @@ static int sock_diag_bind(struct net *net, int group) return 0; } +int sock_diag_destroy(struct sock *sk) +{ + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!sk->sk_prot->diag_destroy) + return -EOPNOTSUPP; + + return
Re: Add a SOCK_DESTROY operation to close sockets from userspace
Here is a an updated version. The external behaviour of this patchset is the same as v4; for more details, see that cover letter at http://www.spinics.net/lists/netdev/msg354303.html . This version fixes two bugs spotted by Eric, and implements Tom's suggestion of making the socket destroy code a per-protocol function pointer so that in-kernel callers can use it. The resulting code is a bit longer but a bit more generic, and exposes fewer TCP implementation details. The operation is still called SOCK_DESTROY, but given that its main implementation is the TCP ABORT operation, and that the word "destroy" is used in the inet_csk code to refer to freeing a socket, and in the inet_diag code to refer to broadcasts about sockets being freed, perhaps it could be renamed to SOCK_ABORT. Tested using net_test. Tests check that TCP resets are sent in the right states, that accept(), read() and connect() are interrupted, that closing sockets makes the socket unusable, and that destroying non-TCP sockets returns EOPNOTSUPP. Tests at https://android-review.googlesource.com/#/c/187491/ . -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net] ravb: Add disable 10base
Hello. On 12/13/2015 06:15 PM, Yoshihiro Kaneko wrote: From: Kazuya MizuguchiEthernet AVB does not support 10 Mbps transfer speed. Signed-off-by: Kazuya Mizuguchi Signed-off-by: Yoshihiro Kaneko --- This patch is based on the master branch of David Miller's networking tree. drivers/net/ethernet/renesas/ravb_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index b69e0c2..467d416 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -905,6 +905,9 @@ static int ravb_phy_init(struct net_device *ndev) netdev_info(ndev, "limited PHY to 100Mbit/s\n"); } + /* 10BASE is not supported */ + phydev->supported &= ~PHY_10BT_FEATURES; I wonder if we should also modify 'phydev->advertising' like phy_set_max_speed() does... MBR, Sergei -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 4/4] net: diag: Support destroying TCP sockets.
This implements SOCK_DESTROY for TCP sockets. It causes all blocking calls on the socket to fail fast with ECONNABORTED and causes a protocol close of the socket. It informs the other end of the connection by sending a RST, i.e., initiating a TCP ABORT as per RFC 793. ECONNABORTED was chosen for consistency with FreeBSD. Signed-off-by: Lorenzo Colitti--- include/net/tcp.h | 4 net/ipv4/Kconfig| 13 + net/ipv4/tcp.c | 34 ++ net/ipv4/tcp_diag.c | 19 +++ net/ipv4/tcp_ipv4.c | 3 +++ net/ipv6/tcp_ipv6.c | 3 +++ 6 files changed, 76 insertions(+) diff --git a/include/net/tcp.h b/include/net/tcp.h index f80e74c..505cef5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1170,6 +1170,10 @@ void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); +#if CONFIG_INET_DIAG_DESTROY +int tcp_abort(struct sock *sk); +#endif + static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { rx_opt->dsack = 0; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 416dfa0..31c4496 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -436,6 +436,19 @@ config INET_UDP_DIAG Support for UDP socket monitoring interface used by the ss tool. If unsure, say Y. +config INET_DIAG_DESTROY + bool "INET: allow privileged process to administratively close sockets" + depends on INET_DIAG && (IPV6 || IPV6=n) + default n + ---help--- + Provides a SOCK_DESTROY operation that allows privileged processes + (e.g., a connection manager or a network administration tool such as + ss) to close sockets opened by other processes. Closing a socket in + this way interrupts any blocking read/writes/connect operations on + the socket and causes future socket calls to behave as if the socket + had been disconnected. + If unsure, say N. + menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c82cca1..fc5068d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3080,6 +3080,40 @@ void tcp_done(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_done); +#ifdef CONFIG_INET_DIAG_DESTROY +int tcp_abort(struct sock *sk) +{ + if (!sk_fullsock(sk)) { + sock_gen_put(sk); + return -EOPNOTSUPP; + } + + /* Don't race with userspace socket closes such as tcp_close. */ + lock_sock(sk); + + /* Don't race with BH socket closes such as inet_csk_listen_stop. */ + local_bh_disable(); + bh_lock_sock(sk); + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_err = ECONNABORTED; + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); + sk->sk_error_report(sk); + if (tcp_need_reset(sk->sk_state)) + tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_done(sk); + } + + bh_unlock_sock(sk); + local_bh_enable(); + release_sock(sk); + sock_put(sk); + return 0; +} +EXPORT_SYMBOL_GPL(tcp_abort); +#endif + extern struct tcp_congestion_ops tcp_reno; static __initdata unsigned long thash_entries; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index b316040..8d435f17 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -10,6 +10,8 @@ */ #include +#include +#include #include #include @@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, return inet_diag_dump_one_icsk(_hashinfo, in_skb, nlh, req); } +#ifdef CONFIG_INET_DIAG_DESTROY +static int tcp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk = inet_diag_find_one_icsk(net, _hashinfo, req); + + if (IS_ERR(sk)) + return PTR_ERR(sk); + + return sock_diag_destroy(sk); +} +#endif + static const struct inet_diag_handler tcp_diag_handler = { .dump= tcp_diag_dump, .dump_one= tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_type = IPPROTO_TCP, .idiag_info_size = sizeof(struct tcp_info), +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = tcp_diag_destroy, +#endif }; static int __init tcp_diag_init(void) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index db00343..5e28bf1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2342,6 +2342,9 @@ struct proto tcp_prot = { .destroy_cgroup = tcp_destroy_cgroup, .proto_cgroup = tcp_proto_cgroup, #endif +#ifdef CONFIG_INET_DIAG_DESTROY + .diag_destroy = tcp_abort, +#endif }; EXPORT_SYMBOL(tcp_prot); diff --git a/net/ipv6/tcp_ipv6.c
[PATCH v5 3/4] net: diag: Support SOCK_DESTROY for inet sockets.
This passes the SOCK_DESTROY operation to the underlying protocol diag handler, or returns -EINVAL if that handler does not define a destroy operation. Most of this patch is just renaming functions. This is not strictly necessary, but it would be fairly counterintuitive to have the code to destroy inet sockets be in a function whose name starts with inet_diag_get. Signed-off-by: Lorenzo Colitti--- include/linux/inet_diag.h | 4 net/ipv4/inet_diag.c | 23 +++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index e7032f04..7c27fa1 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -24,6 +24,10 @@ struct inet_diag_handler { void(*idiag_get_info)(struct sock *sk, struct inet_diag_msg *r, void *info); + + int (*destroy)(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req); + __u16 idiag_type; __u16 idiag_info_size; }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index cfabb8f..8bb8e7a 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -426,7 +426,7 @@ out: } EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); -static int inet_diag_get_exact(struct sk_buff *in_skb, +static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, const struct nlmsghdr *nlh, const struct inet_diag_req_v2 *req) { @@ -436,8 +436,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, handler = inet_diag_lock_handler(req->sdiag_protocol); if (IS_ERR(handler)) err = PTR_ERR(handler); - else + else if (cmd == SOCK_DIAG_BY_FAMILY) err = handler->dump_one(in_skb, nlh, req); + else if (cmd == SOCK_DESTROY && handler->destroy) + err = handler->destroy(in_skb, req); + else + err = -EOPNOTSUPP; inet_diag_unlock_handler(handler); return err; @@ -950,7 +954,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, req.idiag_states = rc->idiag_states; req.id = rc->id; - return inet_diag_get_exact(in_skb, nlh, ); + return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, ); } static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -984,7 +988,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) return inet_diag_get_exact_compat(skb, nlh); } -static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct inet_diag_req_v2); struct net *net = sock_net(skb->sk); @@ -992,7 +996,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (nlmsg_len(h) < hdrlen) return -EINVAL; - if (h->nlmsg_flags & NLM_F_DUMP) { + if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && + h->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(h, hdrlen)) { struct nlattr *attr; @@ -1011,7 +1016,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) } } - return inet_diag_get_exact(skb, h, nlmsg_data(h)); + return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h)); } static @@ -1062,14 +1067,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk) static const struct sock_diag_handler inet_diag_handler = { .family = AF_INET, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, + .destroy = inet_diag_handler_cmd, }; static const struct sock_diag_handler inet6_diag_handler = { .family = AF_INET6, - .dump = inet_diag_handler_dump, + .dump = inet_diag_handler_cmd, .get_info = inet_diag_handler_get_info, + .destroy = inet_diag_handler_cmd, }; int inet_diag_register(const struct inet_diag_handler *h) -- 2.6.0.rc2.230.g3dd15c0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] sh_eth: uninline sh_eth_{write|read}()
From: Sergei ShtylyovDate: Sun, 13 Dec 2015 01:44:50 +0300 > Commit 3365711df024 ("sh_eth: WARN on access to a register not implemented in > in a particular chip") added WARN_ON() to sh_eth_{read|write}(), thus making > it unacceptable for these functions to be *inline* anymore. Remove *inline* > and move the functions from the header to the driver itself. Below is our > code economy with ARM gcc 4.7.3: > > $ size drivers/net/ethernet/renesas/sh_eth.o{~,} >text data bss dec hex filename > 32489 1140 0 33629835d > drivers/net/ethernet/renesas/sh_eth.o~ > 25413 1140 0 2655367b9 > drivers/net/ethernet/renesas/sh_eth.o > > Suggested-by: Ben Hutchings > Signed-off-by: Sergei Shtylyov Applied. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] ipv6: use a random ifid for headerless devices
Hello, On 08.12.2015 19:57, Bjørn Mork wrote: > Hannes Frederic Sowawrites: >> On 05.12.2015 20:02, Bjørn Mork wrote: >>> Hannes Frederic Sowa writes: On Thu, Dec 3, 2015, at 20:29, Bjørn Mork wrote: > After looking more at addrconf, I started wondering if we couldn't abuse > ipv6_generate_stable_address() for this purpose? We could add a new > addr_gen_mode which would trigger automatic generation of a secret if > stable_secret is uninitialized. This would be good enough to ensure > stability until the interface is destroyed. And it would still allow > the adminstrator to select IN6_ADDR_GEN_MODE_STABLE_PRIVACY by entering > a new secret. I am fine with your proposal but I would really like to see it only happen on the per-interface stable_secret instance. >>> >>> Do you think something like the patch below will be OK? >> >> I wouldn't call it IN6_ADDR_GEN_MODE_AUTO, this doesn't say anything. >> But the idea is already good. > > No, I didn't like that name either. I just couldn't come up with > anything descriptive, short and non-redundant. "random", "generated", > "stable" are even worse. And that's about where my imagination ended. > We need a child here :) Sorry for answering so late... What do you think about simply using IN6_ADDR_GEN_MODE_RANDOM? >>> Or would it be better to drop the additional mode and just generate a >>> random secret if the mode is IN6_ADDR_GEN_MODE_STABLE_PRIVACY and the >>> secrets are missing? Or would that be changing the userspace ABI? This >>> is not clear to me... >> >> I would not like to do that somehow. The problem is that the stable >> secrets get written by user space probably during boot-up, but we don't >> know when. That's why I would also not set the ->initialized flag, so >> user can overwrite it to the final secret later on. We block it otherwise. > > I am not sure I follow... There is nothing preventing userspace from > initializing the secret before or after generation of the random secret. I actually missed that. Shortly before sending the patch I decided to allow to reinitialize the stable_secret. Before I had a check in there to not being able to rewrite the stable_secret after it became initialized. So we are good here. Sorry for the confusion. > Writing to /proc/sys/net/ipv6/conf//stable_secret will update the > secret and set the mode to IN6_ADDR_GEN_MODE_STABLE_PRIVACY as before, > even if we have generated a random secret first. I have verified that > this part works as expected. Thanks! > I guess we should check >ipv6.devconf_dflt->stable_secret too > before choosing the default mode. IN6_ADDR_GEN_MODE_STABLE_PRIVACY is a > more approproate default if a default secret is set. IMHO, this should > really be the case without the proposed change too, but it isn't. The > current behaviour confuses me: Setting 'default' changes all existing > interfaces, but does not change the default for new interfaces. Is that > right? Nope, that is a good point. I think we should do that unconditionally. If we have a stable secret set, which we can use, we always should use this address generation mode. Can you send the addition of this as a separate patch so we can propose it for stable? Otherwise I can do that, too. >> My proposal would be to use the stable privacy generator in case the >> device does not have a device address for EUI-48 generation with a >> secret which we simply generate on the stack. Let's factor out the part >> of the generator which depends on the inet6_dev and cnf bits for that. > > Not sure I get this part either. The point was to have stable addresses > for the lifetime of the netdev. We can generate the secret on the > stack, but we will still need to stash it somewhere. That could of > course be to a new field. But I don't see the point since there is no > way you can combine this mode with IN6_ADDR_GEN_MODE_STABLE_PRIVACY. > Only one mode can be active at, and that mode can then own the secret. Ok, your argument makes sense. > As long as we can manage to introduce this without changing any existing > behaviour, of course. Besides the naming I think your patch looks fine. Thank you, Hannes -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] ipv6: use a random ifid for headerless devices
Hannes Frederic Sowawrites: > Sorry for answering so late... No problem. There is no rush here AFAICS. Thanks for taking the time to look at this. > What do you think about simply using IN6_ADDR_GEN_MODE_RANDOM? Yes, that's fine with me (actually what I first used :) >> I guess we should check >ipv6.devconf_dflt->stable_secret too >> before choosing the default mode. IN6_ADDR_GEN_MODE_STABLE_PRIVACY is a >> more approproate default if a default secret is set. IMHO, this should >> really be the case without the proposed change too, but it isn't. The >> current behaviour confuses me: Setting 'default' changes all existing >> interfaces, but does not change the default for new interfaces. Is that >> right? > > Nope, that is a good point. I think we should do that unconditionally. > If we have a stable secret set, which we can use, we always should use > this address generation mode. Can you send the addition of this as a > separate patch so we can propose it for stable? Otherwise I can do that, > too. I can do that if it can wait for whenever I get around to actually submit this. No guarantee that will be in time for v4.5. >>> My proposal would be to use the stable privacy generator in case the >>> device does not have a device address for EUI-48 generation with a >>> secret which we simply generate on the stack. Let's factor out the part >>> of the generator which depends on the inet6_dev and cnf bits for that. >> >> Not sure I get this part either. The point was to have stable addresses >> for the lifetime of the netdev. We can generate the secret on the >> stack, but we will still need to stash it somewhere. That could of >> course be to a new field. But I don't see the point since there is no >> way you can combine this mode with IN6_ADDR_GEN_MODE_STABLE_PRIVACY. >> Only one mode can be active at, and that mode can then own the secret. > > Ok, your argument makes sense. > >> As long as we can manage to introduce this without changing any existing >> behaviour, of course. > > Besides the naming I think your patch looks fine. Thanks! Will fixup that and formally submit when I find some time. Bjørn -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[Patch net] pptp: verify sockaddr_len in pptp_bind() and pptp_connect()
Reported-by: Dmitry VyukovSigned-off-by: Cong Wang --- drivers/net/ppp/pptp.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index fc69e41..597c53e 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -419,6 +419,9 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr, struct pptp_opt *opt = >proto.pptp; int error = 0; + if (sockaddr_len < sizeof(struct sockaddr_pppox)) + return -EINVAL; + lock_sock(sk); opt->src_addr = sp->sa_addr.pptp; @@ -440,6 +443,9 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, struct flowi4 fl4; int error = 0; + if (sockaddr_len < sizeof(struct sockaddr_pppox)) + return -EINVAL; + if (sp->sa_protocol != PX_PROTO_PPTP) return -EINVAL; -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 1/5] geneve: Add geneve udp port offload for ethernet devices
Resending the series with a cover-letter that got missed. Thanks Anjali On 12/14/2015 11:57 AM, Anjali Singhai Jain wrote: Add ndo_ops to add/del UDP ports to a device that supports geneve offload. v2: Comment fix. Signed-off-by: Anjali Singhai JainSigned-off-by: Kiran Patil --- drivers/net/geneve.c | 23 +++ include/linux/netdevice.h | 20 +++- 2 files changed, 42 insertions(+), 1 deletion(-) -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next] ipv6: addrconf: drop ieee802154 specific things
This patch removes ARPHRD_IEEE802154 from addrconf handling. In the earlier days of 802.15.4 6LoWPAN, the interface type was ARPHRD_IEEE802154 which introduced several issues, because 802.15.4 interfaces used the same type. Since commit 965e613d299c ("ieee802154: 6lowpan: fix ARPHRD to ARPHRD_6LOWPAN") we use ARPHRD_6LOWPAN for 6LoWPAN interfaces. This patch will remove ARPHRD_IEEE802154 which is currently deadcode, because ARPHRD_IEEE802154 doesn't reach the minimum 1280 MTU of IPv6. Also we use 6LoWPAN EUI64 specific defines instead using link-layer constanst from 802.15.4 link-layer header. Cc: David S. MillerCc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Signed-off-by: Alexander Aring --- net/ipv6/addrconf.c | 8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5e9111d..7082fb7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -70,7 +70,7 @@ #include #include -#include +#include #include #include #include @@ -1947,9 +1947,9 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev) { - if (dev->addr_len != IEEE802154_ADDR_LEN) + if (dev->addr_len != EUI64_ADDR_LEN) return -1; - memcpy(eui, dev->dev_addr, 8); + memcpy(eui, dev->dev_addr, EUI64_ADDR_LEN); eui[0] ^= 2; return 0; } @@ -2041,7 +2041,6 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) case ARPHRD_IPGRE: return addrconf_ifid_gre(eui, dev); case ARPHRD_6LOWPAN: - case ARPHRD_IEEE802154: return addrconf_ifid_eui64(eui, dev); case ARPHRD_IEEE1394: return addrconf_ifid_ieee1394(eui, dev); @@ -3066,7 +3065,6 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_FDDI) && (dev->type != ARPHRD_ARCNET) && (dev->type != ARPHRD_INFINIBAND) && - (dev->type != ARPHRD_IEEE802154) && (dev->type != ARPHRD_IEEE1394) && (dev->type != ARPHRD_TUNNEL6) && (dev->type != ARPHRD_6LOWPAN)) { -- 2.6.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
re: [PATCH] chelsio: add support for other 10G boards
Hello Stephen Hemminger, The patch f1d3d38af757: "[PATCH] chelsio: add support for other 10G boards" from Dec 1, 2006, leads to the following static checker warning: drivers/net/ethernet/chelsio/cxgb/subr.c:630 t1_link_start() warn: was shift intended here '(mac->adapter->params.nports < 2)' drivers/net/ethernet/chelsio/cxgb/subr.c 623 int t1_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc) 624 { 625 unsigned int fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX); 626 627 if (lc->supported & SUPPORTED_Autoneg) { 628 lc->advertising &= ~(ADVERTISED_ASYM_PAUSE | ADVERTISED_PAUSE); 629 if (fc) { 630 if (fc == ((PAUSE_RX | PAUSE_TX) & 631 (mac->adapter->params.nports < 2))) This condition is never weird. PAUSE_RX is 1. PAUSE_TX is 2. The nports < 2 condition is either 0 or 1. We know fc is in 1-3 range. We could re-write it as: if (fc == 1 && mac->adapter->params.nports < 2) The static checker is suggesting that we could do nports << 2 but then the condition would never be true so that can't be right. 632 lc->advertising |= ADVERTISED_PAUSE; 633 else { 634 lc->advertising |= ADVERTISED_ASYM_PAUSE; 635 if (fc == PAUSE_RX) 636 lc->advertising |= ADVERTISED_PAUSE; 637 } 638 } 639 phy->ops->advertise(phy, lc->advertising); regards, dan carpenter -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 4/5] geneve: Add geneve_get_rx_port support
This patch adds an op that the drivers can call into to get existing geneve ports. Signed-off-by: Anjali Singhai Jain--- drivers/net/geneve.c | 24 include/net/geneve.h | 8 2 files changed, 32 insertions(+) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 89325e4..31b19fd 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1109,6 +1109,30 @@ static struct device_type geneve_type = { .name = "geneve", }; +/* Calls the ndo_add_geneve_port of the caller in order to + * supply the listening GENEVE udp ports. Callers are expected + * to implement the ndo_add_geneve_port. + */ +void geneve_get_rx_port(struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct geneve_net *gn = net_generic(net, geneve_net_id); + struct geneve_sock *gs; + sa_family_t sa_family; + struct sock *sk; + __be16 port; + + rcu_read_lock(); + list_for_each_entry_rcu(gs, >sock_list, list) { + sk = gs->sock->sk; + sa_family = sk->sk_family; + port = inet_sk(sk)->inet_sport; + dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, port); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(geneve_get_rx_port); + /* Initialize the device structure. */ static void geneve_setup(struct net_device *dev) { diff --git a/include/net/geneve.h b/include/net/geneve.h index 3106ed6..e6c23dc 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -62,6 +62,14 @@ struct genevehdr { struct geneve_opt options[]; }; +#if IS_ENABLED(CONFIG_GENEVE) +void geneve_get_rx_port(struct net_device *netdev); +#else +static inline void geneve_get_rx_port(struct net_device *netdev) +{ +} +#endif + #ifdef CONFIG_INET struct net_device *geneve_dev_create_fb(struct net *net, const char *name, u8 name_assign_type, u16 dst_port); -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ANNOUNCE] Another round of NetDev 1.1 updates
Hi! We would like to share with you another round of incremental updates on accepted sessions in netdev 1.1, the community-driven Linux networking conference held back-to-back with netconf in Sevilla, Spain, February 10-12, 2016. = Keynote = * "Hardware Checksumming: Less is More" (David S. Miller) = BoF = * "Unlocking SR-IOV in Linux" (John Fastabend) = Talks = * "Measuring wifi performance across all Google Fiber customers" (Avery Pennarun) * "Load balancing with nftables" (Laura Garcia) = Tutorials = * "Running Cellular Network Infrastructure on Linux" (Harald Welte) Remember session proposal submission is open until Dec 20th. Registration is open at: https://www.netdevconf.org/1.1/registration.html _ / If you miss netdev 1.1, \ \ you'll regret! / - \ ^__^ \ (oo)\___ (__)\ )\/\ ||w | || || For more info, visit: * netdev 1.1: https://www.netdevconf.org/1.1/ * netconf 2016: http://vger.kernel.org/netconf2016.html We would also like to thank those that already confirmed sponsorship: Cumulus Networks, Facebook, VmWare, Google, Mellanox, Mojatatu Networks, OISF/Suricata and Zen Load Balancer. Thank you. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 0/5] Add support for Geneve udp port offload
This patch series adds new ndo ops for Geneve add/del port, so as to help offload Geneve tunnel functionalities such as RX checksum, RSS, filters etc. i40e driver has been tested with the changes to make sure the offloads happen. We do understand that this is not the ideal solution and most likely will be redone with a more generic offload framework. But this certainly will enable us to start seeing benefits of the accelerations for Geneve tunnels. As a side note, we did find an existing issue in i40e driver where a service task can modify tunnel data structures with no locks held to help linearize access. A separate patch will be taking care of that issue. A question out to the community is regarding the driver Kconfig parameters for VxLAN and Geneve, it would be ideal to drop those if there is a way to help resolve vxlan/geneve_get_rx_port symbols while the tunnel modules are not loaded. Performance numbers: With the offloads enable on X722 devices with remote checksum enabled and no other tuning in terms of cpu governer etc on my test machine: With offload Throughput: 5527Mbits/sec with a single thread %cpu: ~43% per core with 4 threads Without offload Throughput: 2364Mbits/sec with a single thread %cpu: ~99% per core with 4 threads These numbers will get better for X722 as it is being worked. But this does bring out the delta in terms of when the stack is notified with csum_level 1 and CHECKSUM_UNNECESSARY vs not without the RX offload. --- v2: Comment fix. v3: Add Performance data. v4: Comment fix and split a patch into two. v5: Add cover letter. Anjali Singhai Jain (5): geneve: Add geneve udp port offload for ethernet devices i40e: geneve tunnel offload support i40e: Kernel dependency update for i40e to support geneve offload geneve: Add geneve_get_rx_port support i40e: Call geneve_get_rx_port to get the existing Geneve ports drivers/net/ethernet/intel/Kconfig | 10 ++ drivers/net/ethernet/intel/i40e/i40e.h | 16 +-- drivers/net/ethernet/intel/i40e/i40e_main.c | 170 +++- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 8 +- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 2 +- drivers/net/geneve.c| 47 include/linux/netdevice.h | 20 +++- include/net/geneve.h| 8 ++ 8 files changed, 237 insertions(+), 44 deletions(-) -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 3/5] i40e: Kernel dependency update for i40e to support geneve offload
Update the Kconfig file with dependency for supporting GENEVE tunnel offloads. Signed-off-by: Anjali Singhai JainSigned-off-by: Kiran Patil --- drivers/net/ethernet/intel/Kconfig | 10 ++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 4163b16..fa593dd 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -280,6 +280,16 @@ config I40E_VXLAN Say Y here if you want to use Virtual eXtensible Local Area Network (VXLAN) in the driver. +config I40E_GENEVE + bool "Generic Network Virtualization Encapsulation (GENEVE) Support" + depends on I40E && GENEVE && !(I40E=y && GENEVE=m) + default n + ---help--- + This allows one to create GENEVE virtual interfaces that provide + Layer 2 Networks over Layer 3 Networks. GENEVE is often used + to tunnel virtual network infrastructure in virtualized environments. + Say Y here if you want to use GENEVE in the driver. + config I40E_DCB bool "Data Center Bridging (DCB) Support" default n -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 2/5] i40e: geneve tunnel offload support
This patch adds driver hooks to implement ndo_ops to add/del udp port in the HW to identify GENEVE tunnels. Signed-off-by: Anjali Singhai JainSigned-off-by: Kiran Patil --- drivers/net/ethernet/intel/i40e/i40e.h | 16 +-- drivers/net/ethernet/intel/i40e/i40e_main.c | 167 ++-- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 8 +- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 2 +- 4 files changed, 150 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index b7bc014..c202f9b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -245,6 +245,11 @@ struct i40e_tc_configuration { struct i40e_tc_info tc_info[I40E_MAX_TRAFFIC_CLASS]; }; +struct i40e_udp_port_config { + __be16 index; + u8 type; +}; + /* struct that defines the Ethernet device */ struct i40e_pf { struct pci_dev *pdev; @@ -281,11 +286,9 @@ struct i40e_pf { u32 fd_atr_cnt; u32 fd_tcp_rule; -#ifdef CONFIG_I40E_VXLAN - __be16 vxlan_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS]; - u16 pending_vxlan_bitmap; + struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS]; + u16 pending_udp_bitmap; -#endif enum i40e_interrupt_policy int_policy; u16 rx_itr_default; u16 tx_itr_default; @@ -322,9 +325,7 @@ struct i40e_pf { #define I40E_FLAG_FD_ATR_ENABLED BIT_ULL(22) #define I40E_FLAG_PTP BIT_ULL(25) #define I40E_FLAG_MFP_ENABLED BIT_ULL(26) -#ifdef CONFIG_I40E_VXLAN -#define I40E_FLAG_VXLAN_FILTER_SYNCBIT_ULL(27) -#endif +#define I40E_FLAG_UDP_FILTER_SYNC BIT_ULL(27) #define I40E_FLAG_PORT_ID_VALIDBIT_ULL(28) #define I40E_FLAG_DCB_CAPABLE BIT_ULL(29) #define I40E_FLAG_RSS_AQ_CAPABLE BIT_ULL(31) @@ -336,6 +337,7 @@ struct i40e_pf { #define I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE BIT_ULL(38) #define I40E_FLAG_LINK_POLLING_ENABLED BIT_ULL(39) #define I40E_FLAG_VEB_MODE_ENABLED BIT_ULL(40) +#define I40E_FLAG_GENEVE_OFFLOAD_CAPABLE BIT_ULL(41) #define I40E_FLAG_NO_PCI_LINK_CHECKBIT_ULL(42) /* tracks features that get auto disabled by errors */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b118deb..81a6693 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -27,9 +27,12 @@ /* Local includes */ #include "i40e.h" #include "i40e_diag.h" -#ifdef CONFIG_I40E_VXLAN +#if IS_ENABLED(CONFIG_VXLAN) #include #endif +#if IS_ENABLED(CONFIG_GENEVE) +#include +#endif const char i40e_driver_name[] = "i40e"; static const char i40e_driver_string[] = @@ -7036,30 +7039,30 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) i40e_flush(hw); } -#ifdef CONFIG_I40E_VXLAN /** - * i40e_sync_vxlan_filters_subtask - Sync the VSI filter list with HW + * i40e_sync_udp_filters_subtask - Sync the VSI filter list with HW * @pf: board private structure **/ -static void i40e_sync_vxlan_filters_subtask(struct i40e_pf *pf) +static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) { +#if IS_ENABLED(CONFIG_VXLAN) || IS_ENABLED(CONFIG_GENEVE) struct i40e_hw *hw = >hw; i40e_status ret; __be16 port; int i; - if (!(pf->flags & I40E_FLAG_VXLAN_FILTER_SYNC)) + if (!(pf->flags & I40E_FLAG_UDP_FILTER_SYNC)) return; - pf->flags &= ~I40E_FLAG_VXLAN_FILTER_SYNC; + pf->flags &= ~I40E_FLAG_UDP_FILTER_SYNC; for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { - if (pf->pending_vxlan_bitmap & BIT_ULL(i)) { - pf->pending_vxlan_bitmap &= ~BIT_ULL(i); - port = pf->vxlan_ports[i]; + if (pf->pending_udp_bitmap & BIT_ULL(i)) { + pf->pending_udp_bitmap &= ~BIT_ULL(i); + port = pf->udp_ports[i].index; if (port) ret = i40e_aq_add_udp_tunnel(hw, ntohs(port), -I40E_AQC_TUNNEL_TYPE_VXLAN, +pf->udp_ports[i].type, NULL, NULL); else ret = i40e_aq_del_udp_tunnel(hw, i, NULL); @@ -7072,13 +7075,13 @@ static void i40e_sync_vxlan_filters_subtask(struct i40e_pf *pf) i40e_stat_str(>hw, ret), i40e_aq_str(>hw, pf->hw.aq.asq_last_status)); - pf->vxlan_ports[i] = 0; +
[PATCH v5 1/5] geneve: Add geneve udp port offload for ethernet devices
Add ndo_ops to add/del UDP ports to a device that supports geneve offload. v2: Comment fix. Signed-off-by: Anjali Singhai JainSigned-off-by: Kiran Patil --- drivers/net/geneve.c | 23 +++ include/linux/netdevice.h | 20 +++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 0750d7a..89325e4 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -380,8 +380,11 @@ static struct socket *geneve_create_sock(struct net *net, bool ipv6, static void geneve_notify_add_rx_port(struct geneve_sock *gs) { + struct net_device *dev; struct sock *sk = gs->sock->sk; + struct net *net = sock_net(sk); sa_family_t sa_family = sk->sk_family; + __be16 port = inet_sk(sk)->inet_sport; int err; if (sa_family == AF_INET) { @@ -390,6 +393,14 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs) pr_warn("geneve: udp_add_offload failed with status %d\n", err); } + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + if (dev->netdev_ops->ndo_add_geneve_port) + dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, +port); + } + rcu_read_unlock(); } static int geneve_hlen(struct genevehdr *gh) @@ -530,8 +541,20 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, static void geneve_notify_del_rx_port(struct geneve_sock *gs) { + struct net_device *dev; struct sock *sk = gs->sock->sk; + struct net *net = sock_net(sk); sa_family_t sa_family = sk->sk_family; + __be16 port = inet_sk(sk)->inet_sport; + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + if (dev->netdev_ops->ndo_del_geneve_port) + dev->netdev_ops->ndo_del_geneve_port(dev, sa_family, +port); + } + + rcu_read_unlock(); if (sa_family == AF_INET) udp_del_offload(>udp_offloads); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1bb21ff..82065a7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1013,6 +1013,19 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * a new port starts listening. The operation is protected by the * vxlan_net->sock_lock. * + * void (*ndo_add_geneve_port)(struct net_device *dev, + * sa_family_t sa_family, __be16 port); + * Called by geneve to notify a driver about the UDP port and socket + * address family that geneve is listnening to. It is called only when + * a new port starts listening. The operation is protected by the + * geneve_net->sock_lock. + * + * void (*ndo_del_geneve_port)(struct net_device *dev, + * sa_family_t sa_family, __be16 port); + * Called by geneve to notify the driver about a UDP port and socket + * address family that geneve is not listening to anymore. The operation + * is protected by the geneve_net->sock_lock. + * * void (*ndo_del_vxlan_port)(struct net_device *dev, * sa_family_t sa_family, __be16 port); * Called by vxlan to notify the driver about a UDP port and socket @@ -1217,7 +1230,12 @@ struct net_device_ops { void(*ndo_del_vxlan_port)(struct net_device *dev, sa_family_t sa_family, __be16 port); - + void(*ndo_add_geneve_port)(struct net_device *dev, + sa_family_t sa_family, + __be16 port); + void(*ndo_del_geneve_port)(struct net_device *dev, + sa_family_t sa_family, + __be16 port); void* (*ndo_dfwd_add_station)(struct net_device *pdev, struct net_device *dev); void(*ndo_dfwd_del_station)(struct net_device *pdev, -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 5/5] i40e: Call geneve_get_rx_port to get the existing Geneve ports
This patch adds a call to geneve_get_rx_port in i40e so that when it comes up it can learn about the existing geneve tunnels. Signed-off-by: Anjali Singhai Jain--- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 81a6693..11059be 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5339,6 +5339,9 @@ int i40e_open(struct net_device *netdev) #ifdef CONFIG_I40E_VXLAN vxlan_get_rx_port(netdev); #endif +#ifdef CONFIG_I40E_GENEVE + geneve_get_rx_port(netdev); +#endif return 0; } -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html