Re: [RFC PATCH net-next 5/6] net: dsa: mv88e6060: add register defines header file
On Mon, Nov 02, 2015 at 11:57:27AM +0100, Neil Armstrong wrote: > To align with the mv88e6xxx code, add a similar header file > with all the register defines. > The file is based on the mv88e6xxx header for coherency. Hi Neil I did a --side-by-side diff between this and mv88e6xxx.h. I don't think there is enough the same to allow using mv88e6xxx.c functions with the mv88e6060 driver :-( Andrew > > Signed-off-by: Neil Armstrong> --- > drivers/net/dsa/mv88e6060.h | 108 > > 1 file changed, 108 insertions(+) > create mode 100644 drivers/net/dsa/mv88e6060.h > > diff --git a/drivers/net/dsa/mv88e6060.h b/drivers/net/dsa/mv88e6060.h > new file mode 100644 > index 000..ed3b3ac > --- /dev/null > +++ b/drivers/net/dsa/mv88e6060.h > @@ -0,0 +1,108 @@ > +/* > + * net/dsa/mv88e6060.h - Marvell 88e6060 switch chip support > + * Copyright (c) 2008 Marvell Semiconductor > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + */ > + > +#ifndef __MV88E6060_H > +#define __MV88E6060_H > + > +#define MV88E6060_PORTS 6 > + > +#define REG_PORT(p) (0x8 + (p)) > +#define PORT_STATUS 0x00 > +#define PORT_STATUS_PAUSE_EN BIT(15) > +#define PORT_STATUS_MY_PAUSE BIT(14) > +#define PORT_STATUS_FC (PORT_STATUS_MY_PAUSE | > PORT_STATUS_PAUSE_EN) > +#define PORT_STATUS_RESOLVED BIT(13) > +#define PORT_STATUS_LINK BIT(12) > +#define PORT_STATUS_PORTMODE BIT(11) > +#define PORT_STATUS_PHYMODE BIT(10) > +#define PORT_STATUS_DUPLEX BIT(9) > +#define PORT_STATUS_SPEEDBIT(8) > +#define PORT_SWITCH_ID 0x03 > +#define PORT_SWITCH_ID_6060 0x0600 > +#define PORT_SWITCH_ID_6060_MASK 0xfff0 > +#define PORT_SWITCH_ID_6060_R1 0x0601 > +#define PORT_SWITCH_ID_6060_R2 0x0602 > +#define PORT_CONTROL 0x04 > +#define PORT_CONTROL_FORCE_FLOW_CTRL BIT(15) > +#define PORT_CONTROL_TRAILER BIT(14) > +#define PORT_CONTROL_HEADER BIT(11) > +#define PORT_CONTROL_INGRESS_MODEBIT(8) > +#define PORT_CONTROL_VLAN_TUNNEL BIT(7) > +#define PORT_CONTROL_STATE_MASK 0x03 > +#define PORT_CONTROL_STATE_DISABLED 0x00 > +#define PORT_CONTROL_STATE_BLOCKING 0x01 > +#define PORT_CONTROL_STATE_LEARNING 0x02 > +#define PORT_CONTROL_STATE_FORWARDING0x03 > +#define PORT_VLAN_MAP0x06 > +#define PORT_VLAN_MAP_DBNUM_SHIFT12 > +#define PORT_VLAN_MAP_TABLE_MASK 0x1f > +#define PORT_ASSOC_VECTOR0x0b > +#define PORT_ASSOC_VECTOR_MONITORBIT(15) > +#define PORT_ASSOC_VECTOR_PAV_MASK 0x1f > +#define PORT_RX_CNTR 0x10 > +#define PORT_TX_CNTR 0x11 > + > +#define REG_GLOBAL 0x0f > +#define GLOBAL_STATUS0x00 > +#define GLOBAL_STATUS_SW_MODE_MASK (0x3 << 12) > +#define GLOBAL_STATUS_SW_MODE_0 (0x0 << 12) > +#define GLOBAL_STATUS_SW_MODE_1 (0x1 << 12) > +#define GLOBAL_STATUS_SW_MODE_2 (0x2 << 12) > +#define GLOBAL_STATUS_SW_MODE_3 (0x3 << 12) > +#define GLOBAL_STATUS_INIT_READY BIT(11) > +#define GLOBAL_STATUS_ATU_FULL BIT(3) > +#define GLOBAL_STATUS_ATU_DONE BIT(2) > +#define GLOBAL_STATUS_PHY_INTBIT(1) > +#define GLOBAL_STATUS_EEINT BIT(0) > +#define GLOBAL_MAC_010x01 > +#define GLOBAL_MAC_01_DIFF_ADDR BIT(8) > +#define GLOBAL_MAC_230x02 > +#define GLOBAL_MAC_450x03 > +#define GLOBAL_CONTROL 0x04 > +#define GLOBAL_CONTROL_DISCARD_EXCESSBIT(13) > +#define GLOBAL_CONTROL_MAX_FRAME_1536BIT(10) > +#define GLOBAL_CONTROL_RELOAD_EEPROM BIT(9) > +#define GLOBAL_CONTROL_CTRMODE BIT(8) > +#define GLOBAL_CONTROL_ATU_FULL_EN BIT(3) > +#define GLOBAL_CONTROL_ATU_DONE_EN BIT(2) > +#define GLOBAL_CONTROL_PHYINT_EN BIT(1) > +#define GLOBAL_CONTROL_EEPROM_DONE_ENBIT(0) > +#define GLOBAL_ATU_CONTROL 0x0a > +#define GLOBAL_ATU_CONTROL_SWRESET BIT(15) > +#define GLOBAL_ATU_CONTROL_LEARNDIS BIT(14) > +#define GLOBAL_ATU_CONTROL_ATUSIZE_256 (0x0 << 12) > +#define GLOBAL_ATU_CONTROL_ATUSIZE_512 (0x1 << 12) > +#define GLOBAL_ATU_CONTROL_ATUSIZE_1024 (0x2 << 12) > +#define GLOBAL_ATU_CONTROL_ATE_AGE_SHIFT 4 > +#define GLOBAL_ATU_CONTROL_ATE_AGE_MASK (0xff << 4) > +#define GLOBAL_ATU_CONTROL_ATE_AGE_5MIN (0x13 << 4) > +#define GLOBAL_ATU_OP0x0b > +#define GLOBAL_ATU_OP_BUSY BIT(15) > +#define GLOBAL_ATU_OP_NOP(0 << 12) > +#define GLOBAL_ATU_OP_FLUSH_ALL ((1 << 12) | GLOBAL_ATU_OP_BUSY) > +#define GLOBAL_ATU_OP_FLUSH_UNLOCKED ((2 << 12) | GLOBAL_ATU_OP_BUSY) > +#define GLOBAL_ATU_OP_LOAD_DB((3 << 12) | GLOBAL_ATU_OP_BUSY) > +#define GLOBAL_ATU_OP_GET_NEXT_DB((4 << 12) |
Re: [PATCH v2 net-next] net: dsa: mv88e6xxx: assert SMI lock
On Fri, Oct 30, 2015 at 06:56:45PM -0400, Vivien Didelot wrote: > It's easy to forget to lock the smi_mutex before calling the low-level > _mv88e6xxx_reg_{read,write}, so add a assert_smi_lock function in them. > > Signed-off-by: Vivien DidelotAcked-by: Andrew Lunn Since there is no followup fixes patch, i assume we actually have it correct at the moment? Thanks Andrew > --- > drivers/net/dsa/mv88e6xxx.c | 25 ++--- > 1 file changed, 14 insertions(+), 11 deletions(-) > > diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c > index b1b14f5..78a179b 100644 > --- a/drivers/net/dsa/mv88e6xxx.c > +++ b/drivers/net/dsa/mv88e6xxx.c > @@ -24,6 +24,16 @@ > #include > #include "mv88e6xxx.h" > > +static void assert_smi_lock(struct dsa_switch *ds) > +{ > + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); > + > + if (unlikely(!mutex_is_locked(>smi_mutex))) { > + dev_err(ds->master_dev, "SMI lock not held!\n"); > + dump_stack(); > + } > +} > + > /* If the switch's ADDR[4:0] strap pins are strapped to zero, it will > * use all 32 SMI bus addresses on its SMI bus, and all switch registers > * will be directly accessible on some {device address,register address} > @@ -80,12 +90,13 @@ int __mv88e6xxx_reg_read(struct mii_bus *bus, int > sw_addr, int addr, int reg) > return ret & 0x; > } > > -/* Must be called with SMI mutex held */ > static int _mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg) > { > struct mii_bus *bus = dsa_host_dev_to_mii_bus(ds->master_dev); > int ret; > > + assert_smi_lock(ds); > + > if (bus == NULL) > return -EINVAL; > > @@ -143,12 +154,13 @@ int __mv88e6xxx_reg_write(struct mii_bus *bus, int > sw_addr, int addr, > return 0; > } > > -/* Must be called with SMI mutex held */ > static int _mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, > u16 val) > { > struct mii_bus *bus = dsa_host_dev_to_mii_bus(ds->master_dev); > > + assert_smi_lock(ds); > + > if (bus == NULL) > return -EINVAL; > > @@ -204,7 +216,6 @@ int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 > *addr) > return 0; > } > > -/* Must be called with SMI mutex held */ > static int _mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum) > { > if (addr >= 0) > @@ -212,7 +223,6 @@ static int _mv88e6xxx_phy_read(struct dsa_switch *ds, int > addr, int regnum) > return 0x; > } > > -/* Must be called with SMI mutex held */ > static int _mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, > u16 val) > { > @@ -538,7 +548,6 @@ out: > mutex_unlock(>smi_mutex); > } > > -/* Must be called with SMI mutex held */ > static int _mv88e6xxx_stats_wait(struct dsa_switch *ds) > { > int ret; > @@ -553,7 +562,6 @@ static int _mv88e6xxx_stats_wait(struct dsa_switch *ds) > return -ETIMEDOUT; > } > > -/* Must be called with SMI mutex held */ > static int _mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port) > { > int ret; > @@ -576,7 +584,6 @@ static int _mv88e6xxx_stats_snapshot(struct dsa_switch > *ds, int port) > return 0; > } > > -/* Must be called with SMI mutex held */ > static void _mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val) > { > u32 _val; > @@ -789,7 +796,6 @@ void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, > } > } > > -/* Must be called with SMI lock held */ > static int _mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, > u16 mask) > { > @@ -839,14 +845,12 @@ int mv88e6xxx_eeprom_busy_wait(struct dsa_switch *ds) > GLOBAL2_EEPROM_OP_BUSY); > } > > -/* Must be called with SMI lock held */ > static int _mv88e6xxx_atu_wait(struct dsa_switch *ds) > { > return _mv88e6xxx_wait(ds, REG_GLOBAL, GLOBAL_ATU_OP, > GLOBAL_ATU_OP_BUSY); > } > > -/* Must be called with SMI mutex held */ > static int _mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int addr, > int regnum) > { > @@ -865,7 +869,6 @@ static int _mv88e6xxx_phy_read_indirect(struct dsa_switch > *ds, int addr, > return _mv88e6xxx_reg_read(ds, REG_GLOBAL2, GLOBAL2_SMI_DATA); > } > > -/* Must be called with SMI mutex held */ > static int _mv88e6xxx_phy_write_indirect(struct dsa_switch *ds, int addr, >int regnum, u16 val) > { > -- > 2.6.2 > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH net-next 0/6] net: dsa: mv88e6060: cleanup and fix setup
Hi Neil, On Nov. Monday 02 (45) 11:57 AM, Neil Armstrong wrote: > This patchset introduces somes fixes and a registers addressing cleanup for > the mv88e6060 DSA driver. > > The first patch removes the poll_link as mv88e6xxx. > The 3 following patchs fixes the setup in regards of the datasheet. > The 2 last patches introduces a clean header and replaces all magic values. > > Neil Armstrong (6): > net: dsa: mv88e6060: remove poll_link callback > net: dsa: mv88e6060: use the correct InitReady bit > net: dsa: mv88e6060: use the correct MaxFrameSize bit > net: dsa: mv88e6060: use the correct bit shift for mac0 > net: dsa: mv88e6060: add register defines header file > net: dsa: mv88e6060: replace magic values with register defines Nice cleanup. I'll just be a bit picky here, so you may not consider my comment for this patchset, but maybe for the future ones. Unless I'm mistaken, there is no reason to group all these patches together. The first 4 patches are independent fixes, and thus could have been sent separately to netdev -net. Then the last 2 ones could have been squashed together, because I don't see a real value to seperate them since you duplicate some defines, e.g. REG_PORT. And this patch would be a candidate for netdev -net-next. Thanks, -v -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 net-next] net: dsa: mv88e6xxx: assert SMI lock
Hi Andrew, On Nov. Monday 02 (45) 04:02 PM, Andrew Lunn wrote: > On Fri, Oct 30, 2015 at 06:56:45PM -0400, Vivien Didelot wrote: > > It's easy to forget to lock the smi_mutex before calling the low-level > > _mv88e6xxx_reg_{read,write}, so add a assert_smi_lock function in them. > > > > Signed-off-by: Vivien Didelot> > Acked-by: Andrew Lunn > > > Since there is no followup fixes patch, i assume we actually have it > correct at the moment? Yes, this v2 contains the 2 fixups you mentioned (removing the inline keyword and keep the dsa_host_dev_to_mii_bus call as is). Thus this version is correct. Thanks, -v -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kernel panic in 4.2.3, rb_erase in sch_fq
On Mon, 2015-11-02 at 16:11 +0200, Denys Fedoryshchenko wrote: > Hi! > > Actually seems i was getting this panic for a while (once per week) on > loaded pppoe server, but just now was able to get full panic message. > After checking commit logs on sch_fq.c i didnt seen any fixes, so > probably upgrading to newer kernel wont help? I do not think we support sch_fq as a HTB leaf. If you want both HTB and sch_fq, you need to setup a bonding device. HTB on bond0 sch_fq on the slaves Sure, the kernel should not crash, but HTB+sch_fq on same net device is certainly not something that will work anyway. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] Convert smsc911x to use ACPI as well as DT
On 09/09/2015 11:10 AM, Marc Zyngier wrote: Jeremy, I can see two issues here: we have a screaming interrupt, and we seem to corrupt some workqueue. How did you get this to work? Firmware release? Marc, I'm responding because its been a month or so since my last response, and I haven't forgotten about this issue. First, any custom tianocore build (*) should work. The required changes have been in the last few linaro snapshots as well (http://snapshots.linaro.org/components/kernel/linaro-edk2/, currently at 40) but I personally haven't had a lot of luck with the prebuilt images due to problems unrelated to this change. Others may have more luck. * For those that don't know, tianno core is at: https://github.com/tianocore/edk2.git Use the master branch After setting the environment variables/dependencies appropriately: make -f ArmPlatformPkg/ArmJunoPkg/Makefile all Will create a functional ACPI firmware image for all recent kernels, including ACPI/PCIe ones. Thanks for everyone's patience on this, -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/8] mm: memcontrol: account socket memory in unified hierarchy
On Thu, Oct 29, 2015 at 10:52:28AM -0700, Johannes Weiner wrote: ... > Now, you mentioned that you'd rather see the socket buffers accounted > at the allocator level, but I looked at the different allocation paths > and network protocols and I'm not convinced that this makes sense. We > don't want to be in the hotpath of every single packet when a lot of > them are small, short-lived management blips that don't involve user > space to let the kernel dispose of them. > > __sk_mem_schedule() on the other hand is already wired up to exactly > those consumers we are interested in for memory isolation: those with > bigger chunks of data attached to them and those that have exploding > receive queues when userspace fails to read(). UDP and TCP. > > I mean, there is a reason why the global memory limits apply to only > those types of packets in the first place: everything else is noise. > > I agree that it's appealing to account at the allocator level and set > page->mem_cgroup etc. but in this case we'd pay extra to capture a lot > of noise, and I don't want to pay that just for aesthetics. In this > case it's better to track ownership on the socket level and only count > packets that can accumulate a significant amount of memory consumed. Sigh, you seem to be right. Moreover, I can't even think of a neat way to account skb pages to memcg, because rcv skbs are generated in device drivers, where we don't know which socket/memcg it will go to. We could recharge individual pages when skb gets to the network or transport layer, but it would result in unjustified overhead. > > > > We tried using the per-memcg tcp limits, and that prevents the OOMs > > > for sure, but it's horrendous for network performance. There is no > > > "stop growing" phase, it just keeps going full throttle until it hits > > > the wall hard. > > > > > > Now, we could probably try to replicate the global knobs and add a > > > per-memcg soft limit. But you know better than anyone else how hard it > > > is to estimate the overall workingset size of a workload, and the > > > margins on containerized loads are razor-thin. Performance is much > > > more sensitive to input errors, and often times parameters must be > > > adjusted continuously during the runtime of a workload. It'd be > > > disasterous to rely on yet more static, error-prone user input here. > > > > Yeah, but the dynamic approach proposed in your patch set doesn't > > guarantee we won't hit OOM in memcg due to overgrown buffers. It just > > reduces this possibility. Of course, memcg OOM is far not as disastrous > > as the global one, but still it usually means the workload breakage. > > Right now, the entire machine breaks. Confining it to a faulty memcg, > as well as reducing the likelihood of that OOM in many cases seems > like a good move in the right direction, no? It seems. However, memcg OOM is also bad, we should strive to avoid it if we can. > > And how likely are memcg OOMs because of this anyway? There is of Frankly, I've no idea. Your arguments below sound reassuring though. > course a scenario imaginable where the packets pile up, followed by > some *other* part of the workload, the one that doesn't read() and > process packets, trying to expand--which then doesn't work and goes > OOM. But that seems like a complete corner case. In the vast majority > of cases, the application will be in full operation and just fail to > read() fast enough--because the network bandwidth is enormous compared > to the container's size, or because it shares the CPU with thousands > of other workloads and there is scheduling latency. > > This would be the perfect point to reign in the transmit window... > > > The static approach is error-prone for sure, but it has existed for > > years and worked satisfactory AFAIK. > > ...but that point is not a fixed amount of memory consumed. It depends > on the workload and the random interactions it's having with thousands > of other containers on that same machine. > > The point of containers is to maximize utilization of your hardware > and systematically eliminate slack in the system. But it's exactly > that slack on dedicated bare-metal machines that allowed us to take a > wild guess at the settings and then tune them based on observing a > handful of workloads. This approach is not going to work anymore when > we pack the machine to capacity and still expect every single > container out of thousands to perform well. We need that automation. But we do use static approach when setting memory limits, no? memory.{low,high,max} - they are all static. I understand it's appealing to have just one knob - memory size - like in case of virtual machines, but it doesn't seem to work with containers. You added memory.low and memory.high knobs. VMs don't have anything like that. How is one supposed to set them? Depends on the workload, I guess. Also, there is the pids cgroup for limiting the number of pids that can be used by a cgroup, because
net: lockdep warning in ip_mc_msfget (net/ipv4/igmp.c:2400)
Hi all, While fuzzing with syzkaller inside a KVM tools guest running the latest -next, I saw the following warning: [ 2391.993558] == [ 2391.995441] [ INFO: possible circular locking dependency detected ] [ 2391.995771] 4.3.0-rc6-next-20151022-sasha-00042-g2b253a1-dirty #2618 Not tainted [ 2391.995771] --- [ 2391.995771] syzkaller_execu/14105 is trying to acquire lock: [ 2391.995771] (rtnl_mutex){+.+.+.}, at: rtnl_lock (net/core/rtnetlink.c:71) [ 2391.995771] Mutex: counter: 1 owner: None [ 2391.995771] [ 2391.995771] but task is already holding lock: [ 2391.995771] (sk_lock-AF_INET){+.+.+.}, at: do_ip_getsockopt (net/ipv4/ip_sockglue.c:1274) [ 2391.995771] [ 2391.995771] which lock already depends on the new lock. [ 2391.995771] [ 2391.995771] [ 2391.995771] the existing dependency chain (in reverse order) is: [ 2391.995771] -> #1 (sk_lock-AF_INET){+.+.+.}: [ 2391.995771] lock_acquire (kernel/locking/lockdep.c:3620) [ 2391.995771] lock_sock_nested (include/linux/bottom_half.h:31 net/core/sock.c:2411) [ 2391.995771] do_ip_setsockopt.isra.9 (net/ipv4/ip_sockglue.c:623) [ 2391.995771] ip_setsockopt (net/ipv4/ip_sockglue.c:1202) [ 2391.995771]ff, 0x0) [ 2391.995771] sock_common_setsockopt (net/core/sock.c:2610) [ 2391.995771] SyS_setsockopt (net/socket.c:1756 net/socket.c:1736) [ 2391.995771] entry_SYSCALL_64_fastpath (arch/x86/entry/entry_64.S:188) [ 2391.995771] -> #0 (rtnl_mutex){+.+.+.}: [ 2391.995771] __lock_acquire (kernel/locking/lockdep.c:1877 kernel/locking/lockdep.c:1982 kernel/locking/lockdep.c:2168 kernel/locking/lockdep.c:3239) [ 2391.995771] lock_acquire (kernel/locking/lockdep.c:3620) [ 2391.995771] mutex_lock_nested (kernel/locking/mutex.c:526 kernel/locking/mutex.c:618) [ 2391.995771] rtnl_lock (net/core/rtnetlink.c:71) [ 2391.995771] ip_mc_msfget (net/ipv4/igmp.c:2400) [ 2391.995771] do_ip_getsockopt (net/ipv4/ip_sockglue.c:1401) [ 2391.995771] ip_getsockopt (net/ipv4/ip_sockglue.c:1498) [ 2391.995771] raw_getsockopt (net/ipv4/raw.c:851) [ 2391.995771] sock_common_getsockopt (net/core/sock.c:2569) [ 2391.995771] SyS_getsockopt (net/socket.c:1787 net/socket.c:1770) [ 2391.995771] entry_SYSCALL_64_fastpath (arch/x86/entry/entry_64.S:188) [ 2391.995771] [ 2391.995771] other info that might help us debug this: [ 2391.995771] [ 2391.995771] Possible unsafe locking scenario: [ 2391.995771] [ 2391.995771]CPU0CPU1 [ 2391.995771] [ 2391.995771] lock(sk_lock-AF_INET); [ 2391.995771]lock(rtnl_mutex); [ 2391.995771]lock(sk_lock-AF_INET); [ 2391.995771] lock(rtnl_mutex); [ 2391.995771] [ 2391.995771] *** DEADLOCK *** [ 2391.995771] [ 2391.995771] 1 lock held by syzkaller_execu/14105: [ 2391.995771] #0: (sk_lock-AF_INET){+.+.+.}, at: do_ip_getsockopt (net/ipv4/ip_sockglue.c:1274) [ 2391.995771] [ 2391.995771] stack backtrace: [ 2391.995771] CPU: 1 PID: 14105 Comm: syzkaller_execu Not tainted 4.3.0-rc6-next-20151022-sasha-00042-g2b253a1-dirty #2618 [ 2391.995771] 0001 c179c8c9 8800a403f550 ade32a2b [ 2391.995771] bb7f5a50 bb84a4a0 bb7f5a50 8800a403f5a0 [ 2391.995771] ac43fca8 8800a403f690 a3e18000 8800a3e18000 [ 2391.995771] Call Trace: [ 2391.995771] dump_stack (lib/dump_stack.c:52) [ 2391.995771] print_circular_bug (kernel/locking/lockdep.c:1250) [ 2391.995771] __lock_acquire (kernel/locking/lockdep.c:1877 kernel/locking/lockdep.c:1982 kernel/locking/lockdep.c:2168 kernel/locking/lockdep.c:3239) [ 2391.995771] lock_acquire (kernel/locking/lockdep.c:3620) [ 2391.995771] mutex_lock_nested (kernel/locking/mutex.c:526 kernel/locking/mutex.c:618) [ 2391.995771] rtnl_lock (net/core/rtnetlink.c:71) [ 2391.995771] ip_mc_msfget (net/ipv4/igmp.c:2400) [ 2391.995771] do_ip_getsockopt (net/ipv4/ip_sockglue.c:1401) [ 2391.995771] ip_getsockopt (net/ipv4/ip_sockglue.c:1498) [ 2391.995771] raw_getsockopt (net/ipv4/raw.c:851) [ 2391.995771] sock_common_getsockopt (net/core/sock.c:2569) [ 2391.995771] SyS_getsockopt (net/socket.c:1787 net/socket.c:1770) [ 2391.995771] entry_SYSCALL_64_fastpath (arch/x86/entry/entry_64.S:188) Thanks, Sasha -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next 2/4] sfc: allocate rx pages on the same node as the interrupt
On 28/10/15 15:59, Eric Dumazet wrote: > On Wed, 2015-10-28 at 15:01 +, Shradha Shah wrote: >> From: Daniel Pieczko>> >> When the interrupt servicing a channel is on a NUMA node that is >> not local to the device, performance is improved by allocating >> rx pages on the node local to the interrupt (remote to the device) >> >> The performance-optimal case, where interrupts and applications >> are pinned to CPUs on the same node as the device, is not altered >> by this change. >> >> This change gave a 1% improvement in transaction rate using Nginx >> with all interrupts and Nginx threads on the node remote to the >> device. It also gave a small reduction in round-trip latency, >> again with the interrupt and application on a different node to >> the device. >> >> Allocating rx pages based on the channel->irq_node value is only >> valid for the initial driver-load interrupt affinities; if an >> interrupt is moved later, the wrong node may be used for the >> allocation. >> >> Signed-off-by: Shradha Shah >> --- >> drivers/net/ethernet/sfc/efx.c| 1 + >> drivers/net/ethernet/sfc/net_driver.h | 3 +++ >> drivers/net/ethernet/sfc/rx.c | 14 +- >> 3 files changed, 13 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c >> index 974637d..89fbd03 100644 >> --- a/drivers/net/ethernet/sfc/efx.c >> +++ b/drivers/net/ethernet/sfc/efx.c >> @@ -445,6 +445,7 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct >> efx_channel *old_channel) >> channel->efx = efx; >> channel->channel = i; >> channel->type = _default_channel_type; >> +channel->irq_node = NUMA_NO_NODE; >> >> for (j = 0; j < EFX_TXQ_TYPES; j++) { >> tx_queue = >tx_queue[j]; >> diff --git a/drivers/net/ethernet/sfc/net_driver.h >> b/drivers/net/ethernet/sfc/net_driver.h >> index ad56231..0ab9080a 100644 >> --- a/drivers/net/ethernet/sfc/net_driver.h >> +++ b/drivers/net/ethernet/sfc/net_driver.h >> @@ -419,6 +419,7 @@ enum efx_sync_events_state { >> * @sync_events_state: Current state of sync events on this channel >> * @sync_timestamp_major: Major part of the last ptp sync event >> * @sync_timestamp_minor: Minor part of the last ptp sync event >> + * @irq_node: NUMA node of interrupt >> */ >> struct efx_channel { >> struct efx_nic *efx; >> @@ -477,6 +478,8 @@ struct efx_channel { >> enum efx_sync_events_state sync_events_state; >> u32 sync_timestamp_major; >> u32 sync_timestamp_minor; >> + >> +int irq_node; >> }; >> >> #ifdef CONFIG_NET_RX_BUSY_POLL >> diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c >> index 3f0e129..c5ef1e8 100644 >> --- a/drivers/net/ethernet/sfc/rx.c >> +++ b/drivers/net/ethernet/sfc/rx.c >> @@ -168,11 +168,15 @@ static int efx_init_rx_buffers(struct efx_rx_queue >> *rx_queue, bool atomic) >> * context in such a case. So, use __GFP_NO_WARN >> * in case of atomic. >> */ >> -page = alloc_pages(__GFP_COLD | __GFP_COMP | >> - (atomic ? >> -(GFP_ATOMIC | __GFP_NOWARN) >> -: GFP_KERNEL), >> - efx->rx_buffer_order); >> +struct efx_channel *channel; >> + >> +channel = efx_rx_queue_channel(rx_queue); >> +page = alloc_pages_node(channel->irq_node, __GFP_COMP | >> +(atomic ? >> + (GFP_ATOMIC | __GFP_NOWARN) >> + : GFP_KERNEL), >> +efx->rx_buffer_order); >> + >> if (unlikely(page == NULL)) >> return -ENOMEM; >> dma_addr = >> > > Sorry, I do not understand this patch, and why the following one is not > squashed on this one. > > irq_node is always NUMA_NO_NODE (in this patch) > > So you claim a 1% improvement, switching from alloc_pages(...) to > alloc_pages_node(NUMA_NO_NODE, ...) ??? > You're correct that this doesn't make sense as it is. There is something missing in this patch (channel->irq_node should be set) and also changing the order of some patches could make this clearer. The series will need to be resent. Daniel -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH net-next 0/6] net: dsa: mv88e6060: cleanup and fix setup
On Mon, Nov 02, 2015 at 11:57:01AM +0100, Neil Armstrong wrote: > This patchset introduces somes fixes and a registers addressing cleanup for > the mv88e6060 DSA driver. > > The first patch removes the poll_link as mv88e6xxx. > The 3 following patchs fixes the setup in regards of the datasheet. > The 2 last patches introduces a clean header and replaces all magic values. Hi Neil Nice patchset. Once you have fixed Sergei's comment, it looks good to go. Acked-by: Andrew LunnAndrew -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 net-next] net: dsa: mv88e6xxx: lookup switch name
On Fri, Oct 30, 2015 at 07:39:48PM -0400, Vivien Didelot wrote: > All the mv88e6xxx drivers use the exact same code in their probe > function to lookup the switch name given its ID. Thus introduce a > mv88e6xxx_switch_id structure and a mv88e6xxx_lookup_name function in > the common mv88e6xxx code. > > In the meantime make __mv88e6xxx_reg_{read,write} static since we do not > need to expose these low-level r/w routines anymore. Acked-by: Andrew LunnThanks Andrew -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net] net: avoid NULL deref in inet_ctl_sock_destroy()
From: Eric DumazetUnder low memory conditions, tcp_sk_init() and icmp_sk_init() can both iterate on all possible cpus and call inet_ctl_sock_destroy(), with eventual NULL pointer. Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov --- include/net/inet_common.h |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 279f83591971..109e3ee9108c 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -41,7 +41,8 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, static inline void inet_ctl_sock_destroy(struct sock *sk) { - sock_release(sk->sk_socket); + if (sk) + sock_release(sk->sk_socket); } #endif -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] bpf: convert hashtab lock to raw lock
On 10/31/2015 11:37 AM, Daniel Borkmann wrote: On 10/31/2015 02:47 PM, Steven Rostedt wrote: On Fri, 30 Oct 2015 17:03:58 -0700 Alexei Starovoitovwrote: On Fri, Oct 30, 2015 at 03:16:26PM -0700, Yang Shi wrote: When running bpf samples on rt kernel, it reports the below warning: BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:917 in_atomic(): 1, irqs_disabled(): 128, pid: 477, name: ping Preemption disabled at:[] kprobe_perf_func+0x30/0x228 ... diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 83c209d..972b76b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -17,7 +17,7 @@ struct bpf_htab { struct bpf_map map; struct hlist_head *buckets; -spinlock_t lock; +raw_spinlock_t lock; How do we address such things in general? I bet there are tons of places around the kernel that call spin_lock from atomic. I'd hate to lose the benefits of lockdep of non-raw spin_lock just to make rt happy. You wont lose any benefits of lockdep. Lockdep still checks raw_spin_lock(). The only difference between raw_spin_lock and spin_lock is that in -rt spin_lock turns into an rt_mutex() and raw_spin_lock stays a spin lock. ( Btw, Yang, would have been nice if your commit description would have already included such info, not only that you convert it, but also why it's okay to do so. ) I think Thomas's document will include all the information about rt spin lock/raw spin lock, etc. Alexei & Daniel, If you think such info is necessary, I definitely could add it into the commit log in v2. The error is that in -rt, you called a mutex and not a spin lock while atomic. You are right, I think this happens due to the preempt_disable() in the trace_call_bpf() handler. So, I think the patch seems okay. The dep_map is btw union'ed in the struct spinlock case to the same offset of the dep_map from raw_spinlock. It's a bit inconvenient, though, when we add other library code as maps in future, f.e. things like rhashtable as they would first need to be converted to raw_spinlock_t as well, but judging from the git log, it looks like common practice. Yes, it is common practice for converting sleepable spin lock to raw spin lock in -rt to avoid scheduling in atomic context bug. Thanks, Yang Thanks, Daniel -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net] sfc: push partner queue for skb->xmit_more
When the IP stack passes SKBs the sfc driver puts them in 2 different TX queues (called partners), one for checksummed and one for not checksummed. If the SKB has xmit_more set the driver will delay pushing the work to the NIC. When later it does decide to push the buffers this patch ensures it also pushes the partner queue, if that also has any delayed work. Before this fix the work in the partner queue would be left for a long time and cause a netdev watchdog. Fixes: 70b33fb ("sfc: add support for skb->xmit_more") Reported-by: Jianlin ShiSigned-off-by: Martin Habets --- drivers/net/ethernet/sfc/ef10.c | 4 +++- drivers/net/ethernet/sfc/farch.c | 4 +++- drivers/net/ethernet/sfc/net_driver.h | 2 ++ drivers/net/ethernet/sfc/tx.c | 30 -- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index ff649ebef637..286cc6b69d57 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1849,7 +1849,9 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue) unsigned int write_ptr; efx_qword_t *txd; - BUG_ON(tx_queue->write_count == tx_queue->insert_count); + tx_queue->xmit_more_available = false; + if (unlikely(tx_queue->write_count == tx_queue->insert_count)) + return; do { write_ptr = tx_queue->write_count & tx_queue->ptr_mask; diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index f08266f0eca2..5a1c5a8f278a 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -321,7 +321,9 @@ void efx_farch_tx_write(struct efx_tx_queue *tx_queue) unsigned write_ptr; unsigned old_write_count = tx_queue->write_count; - BUG_ON(tx_queue->write_count == tx_queue->insert_count); + tx_queue->xmit_more_available = false; + if (unlikely(tx_queue->write_count == tx_queue->insert_count)) + return; do { write_ptr = tx_queue->write_count & tx_queue->ptr_mask; diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index c530e1c4cb4f..24038ef96d9f 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -219,6 +219,7 @@ struct efx_tx_buffer { * @tso_packets: Number of packets via the TSO xmit path * @pushes: Number of times the TX push feature has been used * @pio_packets: Number of times the TX PIO feature has been used + * @xmit_more_available: Are any packets waiting to be pushed to the NIC * @empty_read_count: If the completion path has seen the queue as empty * and the transmission path has not yet checked this, the value of * @read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0. @@ -253,6 +254,7 @@ struct efx_tx_queue { unsigned int tso_packets; unsigned int pushes; unsigned int pio_packets; + bool xmit_more_available; /* Statistics to supplement MAC stats */ unsigned long tx_packets; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 1833a0146571..67f6afaa022f 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -431,8 +431,20 @@ finish_packet: efx_tx_maybe_stop_queue(tx_queue); /* Pass off to hardware */ - if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) + if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) { + struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue); + + /* There could be packets left on the partner queue if those +* SKBs had skb->xmit_more set. If we do not push those they +* could be left for a long time and cause a netdev watchdog. +*/ + if (txq2->xmit_more_available) + efx_nic_push_buffers(txq2); + efx_nic_push_buffers(tx_queue); + } else { + tx_queue->xmit_more_available = skb->xmit_more; + } tx_queue->tx_packets++; @@ -722,6 +734,7 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) tx_queue->read_count = 0; tx_queue->old_read_count = 0; tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; + tx_queue->xmit_more_available = false; /* Set up TX descriptor ring */ efx_nic_init_tx(tx_queue); @@ -747,6 +760,7 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) ++tx_queue->read_count; } + tx_queue->xmit_more_available = false; netdev_tx_reset_queue(tx_queue->core_txq); } @@ -1302,8 +1316,20 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, efx_tx_maybe_stop_queue(tx_queue); /* Pass off to hardware */ - if
Re: [RFC PATCH net-next 2/6] net: dsa: mv88e6060: use the correct InitReady bit
Hello. On 11/2/2015 1:57 PM, Neil Armstrong wrote: According to the mv88e6060 datasheet, the InitReady bit position is 11 and the polarity is inverted. Use the bit correctly to detect the end of initialization. Signed-off-by: Neil Armstrong--- drivers/net/dsa/mv88e6060.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 6885ef5..c10880f 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -102,7 +102,7 @@ static int mv88e6060_switch_reset(struct dsa_switch *ds) timeout = jiffies + 1 * HZ; while (time_before(jiffies, timeout)) { ret = REG_READ(REG_GLOBAL, 0x00); - if ((ret & 0x8000) == 0x) + if ((ret & 0x800) != 0x) You could as well drop != 0. [...] MBR, Sergei -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next] net/core: generic support for disabling netdev features down stack
There are some netdev features, which when disabled on an upper device, such as a bonding master or a bridge, must be disabled and cannot be re-enabled on underlying devices. This is a rework of an earlier more heavy-handed appraoch, which simply disables and prevents re-enabling of netdev features listed in a new define in include/net/netdev_features.h, NETIF_F_UPPER_DISABLES. Any upper device that disables a flag in that feature mask, the disabling will propagate down the stack, and any lower device that has any upper device with one of those flags disabled should not be able to enable said flag. Initially, only LRO is included for proof of concept, and because this code effectively does the same thing as dev_disable_lro(), though it will also activate from the ethtool path, which was one of the goals here. [root@dell-per730-01 ~]# ethtool -k bond0 |grep large large-receive-offload: on [root@dell-per730-01 ~]# ethtool -k p5p1 |grep large large-receive-offload: on [root@dell-per730-01 ~]# ethtool -K bond0 lro off [root@dell-per730-01 ~]# ethtool -k bond0 |grep large large-receive-offload: off [root@dell-per730-01 ~]# ethtool -k p5p1 |grep large large-receive-offload: off dmesg dump: [ 1033.277986] bond0: Disabling feature 0x8000 on lower dev p5p2. [ 1034.067949] bnx2x :06:00.1 p5p2: using MSI-X IRQs: sp 74 fp[0] 76 ... fp[7] 83 [ 1034.753612] bond0: Disabling feature 0x8000 on lower dev p5p1. [ 1035.591019] bnx2x :06:00.0 p5p1: using MSI-X IRQs: sp 62 fp[0] 64 ... fp[7] 71 This has been successfully tested with bnx2x, qlcnic and netxen network cards as slaves in a bond interface. Turning LRO on or off on the master also turns it on or off on each of the slaves, new slaves are added with LRO in the same state as the master, and LRO can't be toggled on the slaves. Also, this should largely remove the need for dev_disable_lro(), and most, if not all, of its call sites can be replaced by simply making sure NETIF_F_LRO isn't included in the relevant device's feature flags. Note that this patch is driven by bug reports from users saying it was confusing that bonds and slaves had different settings for the same features, and while it won't be 100% in sync if a lower device doesn't support a feature like LRO, I think this is a good step in the right direction. CC: "David S. Miller"CC: Eric Dumazet CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: Jiri Pirko CC: Nikolay Aleksandrov CC: Michal Kubecek CC: Alexander Duyck CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson --- Note: this replaces "[RFC PATCH net-next] net/core: initial support for stacked dev feature toggles" for consideration. include/linux/netdev_features.h | 11 + net/core/dev.c | 52 + 2 files changed, 63 insertions(+) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 9672781..0f5837a 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -125,6 +125,11 @@ enum { #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) #define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) +#define for_each_netdev_feature(mask_addr, feature) \ + int bit; \ + for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) \ + feature = __NETIF_F_BIT(bit); + /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ #define NETIF_F_NEVER_CHANGE (NETIF_F_VLAN_CHALLENGED | \ @@ -167,6 +172,12 @@ enum { */ #define NETIF_F_ALL_FOR_ALL(NETIF_F_NOCACHE_COPY | NETIF_F_FSO) +/* + * If upper/master device has these features disabled, they must be disabled + * on all lower/slave devices as well. + */ +#define NETIF_F_UPPER_DISABLES NETIF_F_LRO + /* changeable features with no special hardware requirements */ #define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) diff --git a/net/core/dev.c b/net/core/dev.c index 13f49f8..3a8dbbc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6288,9 +6288,51 @@ static void rollback_registered(struct net_device *dev) list_del(); } +static netdev_features_t netdev_sync_upper_features(struct net_device *lower, + struct net_device *upper, netdev_features_t features) +{ + netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES; + netdev_features_t feature; + + for_each_netdev_feature(_disables, feature) { + if (!(upper->wanted_features & feature) + && (features & feature)) { + netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it
Re: [PATCH net-next] net/core: generic support for disabling netdev features down stack
On 11/02/2015 09:53 AM, Jarod Wilson wrote: There are some netdev features, which when disabled on an upper device, such as a bonding master or a bridge, must be disabled and cannot be re-enabled on underlying devices. This is a rework of an earlier more heavy-handed appraoch, which simply disables and prevents re-enabling of netdev features listed in a new define in include/net/netdev_features.h, NETIF_F_UPPER_DISABLES. Any upper device that disables a flag in that feature mask, the disabling will propagate down the stack, and any lower device that has any upper device with one of those flags disabled should not be able to enable said flag. Initially, only LRO is included for proof of concept, and because this code effectively does the same thing as dev_disable_lro(), though it will also activate from the ethtool path, which was one of the goals here. [root@dell-per730-01 ~]# ethtool -k bond0 |grep large large-receive-offload: on [root@dell-per730-01 ~]# ethtool -k p5p1 |grep large large-receive-offload: on [root@dell-per730-01 ~]# ethtool -K bond0 lro off [root@dell-per730-01 ~]# ethtool -k bond0 |grep large large-receive-offload: off [root@dell-per730-01 ~]# ethtool -k p5p1 |grep large large-receive-offload: off dmesg dump: [ 1033.277986] bond0: Disabling feature 0x8000 on lower dev p5p2. [ 1034.067949] bnx2x :06:00.1 p5p2: using MSI-X IRQs: sp 74 fp[0] 76 ... fp[7] 83 [ 1034.753612] bond0: Disabling feature 0x8000 on lower dev p5p1. [ 1035.591019] bnx2x :06:00.0 p5p1: using MSI-X IRQs: sp 62 fp[0] 64 ... fp[7] 71 This has been successfully tested with bnx2x, qlcnic and netxen network cards as slaves in a bond interface. Turning LRO on or off on the master also turns it on or off on each of the slaves, new slaves are added with LRO in the same state as the master, and LRO can't be toggled on the slaves. Also, this should largely remove the need for dev_disable_lro(), and most, if not all, of its call sites can be replaced by simply making sure NETIF_F_LRO isn't included in the relevant device's feature flags. Note that this patch is driven by bug reports from users saying it was confusing that bonds and slaves had different settings for the same features, and while it won't be 100% in sync if a lower device doesn't support a feature like LRO, I think this is a good step in the right direction. CC: "David S. Miller"CC: Eric Dumazet CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: Jiri Pirko CC: Nikolay Aleksandrov CC: Michal Kubecek CC: Alexander Duyck CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson --- Note: this replaces "[RFC PATCH net-next] net/core: initial support for stacked dev feature toggles" for consideration. include/linux/netdev_features.h | 11 + net/core/dev.c | 52 + 2 files changed, 63 insertions(+) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 9672781..0f5837a 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -125,6 +125,11 @@ enum { #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) #define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) +#define for_each_netdev_feature(mask_addr, feature) \ + int bit; \ + for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) \ + feature = __NETIF_F_BIT(bit); + /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ #define NETIF_F_NEVER_CHANGE (NETIF_F_VLAN_CHALLENGED | \ @@ -167,6 +172,12 @@ enum { */ #define NETIF_F_ALL_FOR_ALL (NETIF_F_NOCACHE_COPY | NETIF_F_FSO) +/* + * If upper/master device has these features disabled, they must be disabled + * on all lower/slave devices as well. + */ +#define NETIF_F_UPPER_DISABLES NETIF_F_LRO + /* changeable features with no special hardware requirements */ #define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) diff --git a/net/core/dev.c b/net/core/dev.c index 13f49f8..3a8dbbc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6288,9 +6288,51 @@ static void rollback_registered(struct net_device *dev) list_del(); } +static netdev_features_t netdev_sync_upper_features(struct net_device *lower, + struct net_device *upper, netdev_features_t features) +{ + netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES; + netdev_features_t feature; + + for_each_netdev_feature(_disables, feature) { + if (!(upper->wanted_features & feature) + && (features & feature)) { + netdev_dbg(lower,
Re: [BUG] Any-IP IPv6 support broken
> On 29 Oct 2015, at 17:44, Maciej Żenczykowskiwrote: > > What are you trying to do? I would like to have a "bind-to-subnet" semantic with IPv6. This is currently working with IPv4, and the setup is the follow: - setup a dummy network device configured with any-IP - add an any-IP route - bind() on the dummy device in this way we can effectively bind a process to a particular subnet (by binding it to a dummy device which is receiving all the packets from a particular subnet). The point of using dummy devices is that we can configure multiple ones (and so we can bind multiple processes to multiple subnets). > > Does what you're trying to do work on an older kernel? Which kernel > version does it break at? > > btw. afaik any-ip doesn't work with IPv4 on any un-patched kernel (the > IPv4 support patch was reverted). This is actually working with IPv4 (as I said I'm using a recent kernel, 4.1), and the fact that you say it's not supposed to work leads me to think that maybe we are not talking about the same feature. Cheers, gilberto -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[v7, 0/6] Freescale DPAA FMan
From: Igal LibermanThe Freescale Data Path Acceleration Architecture (DPAA) is a set of hardware components on specific QorIQ multicore processors. This architecture provides the infrastructure to support simplified sharing of networking interfaces and accelerators by multiple CPU cores and the accelerators. One of the DPAA accelerators is the Frame Manager (FMan) which contains a series of hardware blocks: ports, Ethernet MACs, a multi user RAM (MURAM) and Storage Profile (SP). This patch set introduce the FMan drivers. Each driver configures and initializes the corresponding FMan hardware module (described above). The MAC driver offers support for three different types of MACs (eTSEC, TGEC, MEMAC). v6 --> v7: - Addressed compilation issue on non-PPC targets - Removed B4860 rev 1 support v5 --> v6: - Addressed feedback from Scott: - Moved kernel doc to source files - Removed a series of configurable settings - Miscellaneous code updates v4 --> v5: - Addressed feedback from David Miller: - Removed driver layering - Reduce namespace pollution - Reduce code complexity and size v3 --> v4: - Remove device_initcall call in driver registration (redundant) - Remove hot/cold labels - Minor update in FMan Clock read from device-tree - Update fixed-link support - Addressed feedback from Stephen Hemminger - Remove bogus blank line v2 --> v3: - Addressed feedback from Scott: - Remove typedefs - Remove unnecessary memory barriers - Remove unnecessary casting - Remove KConfig options - Remove early_params - Remove Hungarian notation - Remove __packed__ attribute and padding from structures - Remove unlikely attribute (where it's not needed) - Use proper error codes and remove unnecessary prints - Use proper values for sleep routines - Replace complex Macros with functions - Improve device tree processing code - Use symbolic defines - Add time-out in busy-wait loops - Removed exit code (loadable module support will be added later) - Fixed "fixed-link" issue raised by Joakim Tjernlund v1 --> v2: - Addressed feedback from Paul Bolle: - General feedback of FMan Driver layer - Remove Errata defines - Aligned comments to Kernel Doc - Remove Loadable Module support (not yet supported) - Removed not needed KConfig dependencies - Addressed feedback from Scott Wood - Use Kernel ioread/iowrite services - Squash FLIB source and header patches together This submission is based on the prior Freescale DPAA FMan V3,RFC submission. Several issues addresses in this submission: - Reduced MAC layering and complexity - Reduced code base - T1024/T2080 10G best effort support Igal Liberman (6): fsl/fman: Add FMan MURAM support fsl/fman: Add FMan support fsl/fman: Add FMan MAC support fsl/fman: Add FMan SP support fsl/fman: Add FMan Port Support fsl/fman: Add FMan MAC driver drivers/net/ethernet/freescale/Kconfig |1 + drivers/net/ethernet/freescale/Makefile|2 + drivers/net/ethernet/freescale/fman/Kconfig|8 + drivers/net/ethernet/freescale/fman/Makefile |7 + .../net/ethernet/freescale/fman/crc_mac_addr_ext.h | 314 +++ drivers/net/ethernet/freescale/fman/fman.c | 2876 drivers/net/ethernet/freescale/fman/fman.h | 325 +++ drivers/net/ethernet/freescale/fman/fman_dtsec.c | 1609 +++ drivers/net/ethernet/freescale/fman/fman_dtsec.h | 59 + drivers/net/ethernet/freescale/fman/fman_mac.h | 276 ++ drivers/net/ethernet/freescale/fman/fman_memac.c | 1307 + drivers/net/ethernet/freescale/fman/fman_memac.h | 60 + drivers/net/ethernet/freescale/fman/fman_muram.c | 159 ++ drivers/net/ethernet/freescale/fman/fman_muram.h | 51 + drivers/net/ethernet/freescale/fman/fman_port.c| 1779 drivers/net/ethernet/freescale/fman/fman_port.h| 151 + drivers/net/ethernet/freescale/fman/fman_sp.c | 167 ++ drivers/net/ethernet/freescale/fman/fman_sp.h | 103 + drivers/net/ethernet/freescale/fman/fman_tgec.c| 798 ++ drivers/net/ethernet/freescale/fman/fman_tgec.h| 55 + drivers/net/ethernet/freescale/fman/mac.c | 980 +++ drivers/net/ethernet/freescale/fman/mac.h | 97 + 22 files changed, 11184 insertions(+) create mode 100644 drivers/net/ethernet/freescale/fman/Kconfig create mode 100644
Re: [PATCH v2 net-next] net: dsa: mv88e6xxx: assert SMI lock
On Nov. Monday 02 (45) 04:02 PM, Andrew Lunn wrote: > On Fri, Oct 30, 2015 at 06:56:45PM -0400, Vivien Didelot wrote: > > It's easy to forget to lock the smi_mutex before calling the low-level > > _mv88e6xxx_reg_{read,write}, so add a assert_smi_lock function in them. > > > > Signed-off-by: Vivien Didelot> > Acked-by: Andrew Lunn > > > Since there is no followup fixes patch, i assume we actually have it > correct at the moment? Ho, I just caught what you meant ;-) >From my (minimal) tests, I didn't see any stack dump yet from setup, FDB or VLAN operations, looks good so far. Thanks, -v -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v4 2/8] dpaa_eth: add support for DPAA Ethernet
This introduces the Freescale Data Path Acceleration Architecture (DPAA) Ethernet driver (dpaa_eth) that builds upon the DPAA QMan, BMan, PAMU and FMan drivers to deliver Ethernet connectivity on the Freescale DPAA QorIQ platforms. Signed-off-by: Madalin Bucur--- drivers/net/ethernet/freescale/Kconfig |2 + drivers/net/ethernet/freescale/Makefile|1 + drivers/net/ethernet/freescale/dpaa/Kconfig| 22 + drivers/net/ethernet/freescale/dpaa/Makefile | 11 + drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 819 drivers/net/ethernet/freescale/dpaa/dpaa_eth.h | 432 +++ .../net/ethernet/freescale/dpaa/dpaa_eth_common.c | 1299 .../net/ethernet/freescale/dpaa/dpaa_eth_common.h | 98 ++ drivers/net/ethernet/freescale/dpaa/dpaa_eth_sg.c | 408 ++ 9 files changed, 3092 insertions(+) create mode 100644 drivers/net/ethernet/freescale/dpaa/Kconfig create mode 100644 drivers/net/ethernet/freescale/dpaa/Makefile create mode 100644 drivers/net/ethernet/freescale/dpaa/dpaa_eth.c create mode 100644 drivers/net/ethernet/freescale/dpaa/dpaa_eth.h create mode 100644 drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c create mode 100644 drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h create mode 100644 drivers/net/ethernet/freescale/dpaa/dpaa_eth_sg.c diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index f3f89cc..92198be 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -92,4 +92,6 @@ config GIANFAR and MPC86xx family of chips, the eTSEC on LS1021A and the FEC on the 8540. +source "drivers/net/ethernet/freescale/dpaa/Kconfig" + endif # NET_VENDOR_FREESCALE diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile index 4097c58..ae13dc5 100644 --- a/drivers/net/ethernet/freescale/Makefile +++ b/drivers/net/ethernet/freescale/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_FS_ENET) += fs_enet/ obj-$(CONFIG_FSL_PQ_MDIO) += fsl_pq_mdio.o obj-$(CONFIG_FSL_XGMAC_MDIO) += xgmac_mdio.o obj-$(CONFIG_GIANFAR) += gianfar_driver.o +obj-$(CONFIG_FSL_DPAA_ETH) += dpaa/ obj-$(CONFIG_PTP_1588_CLOCK_GIANFAR) += gianfar_ptp.o gianfar_driver-objs := gianfar.o \ gianfar_ethtool.o diff --git a/drivers/net/ethernet/freescale/dpaa/Kconfig b/drivers/net/ethernet/freescale/dpaa/Kconfig new file mode 100644 index 000..022d5aa --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa/Kconfig @@ -0,0 +1,22 @@ +menuconfig FSL_DPAA_ETH + tristate "DPAA Ethernet" + depends on FSL_SOC && FSL_BMAN && FSL_QMAN && FSL_FMAN + select PHYLIB + select FSL_FMAN_MAC + ---help--- + Data Path Acceleration Architecture Ethernet driver, + supporting the Freescale QorIQ chips. + Depends on Freescale Buffer Manager and Queue Manager + driver and Frame Manager Driver. + +if FSL_DPAA_ETH + +config FSL_DPAA_ETH_FRIENDLY_IF_NAME + bool "Use fmX-macY names for the DPAA interfaces" + default y + ---help--- + The DPAA Ethernet netdevices are created for each FMan port available + on a certain board. Enable this to get interface names derived from + the underlying FMan hardware for a simple identification. + +endif # FSL_DPAA_ETH diff --git a/drivers/net/ethernet/freescale/dpaa/Makefile b/drivers/net/ethernet/freescale/dpaa/Makefile new file mode 100644 index 000..3847ec7 --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa/Makefile @@ -0,0 +1,11 @@ +# +# Makefile for the Freescale DPAA Ethernet controllers +# + +# Include FMan headers +FMAN= $(srctree)/drivers/net/ethernet/freescale/fman +ccflags-y += -I$(FMAN) + +obj-$(CONFIG_FSL_DPAA_ETH) += fsl_dpa.o + +fsl_dpa-objs += dpaa_eth.o dpaa_eth_sg.o dpaa_eth_common.o diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c new file mode 100644 index 000..8381616 --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -0,0 +1,819 @@ +/* Copyright 2008 - 2015 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. +
[net-next v4 6/8] dpaa_eth: add ethtool statistics
Add a series of counters to be exported through ethtool: - add detailed counters for reception errors; - add detailed counters for QMan enqueue reject events; - count the number of fragmented skbs received from the stack; - count all frames received on the Tx confirmation path; - add congestion group statistics; - count the number of interrupts for each CPU. Signed-off-by: Ioana CiorneiSigned-off-by: Madalin Bucur --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 12 ++ drivers/net/ethernet/freescale/dpaa/dpaa_eth.h | 34 .../net/ethernet/freescale/dpaa/dpaa_eth_common.c | 40 - .../net/ethernet/freescale/dpaa/dpaa_eth_common.h | 2 + drivers/net/ethernet/freescale/dpaa/dpaa_eth_sg.c | 1 + drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c | 183 + 6 files changed, 270 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 894f1a7..0b3332a 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -102,6 +102,15 @@ static void _dpa_rx_error(struct net_device *net_dev, percpu_priv->stats.rx_errors++; + if (fd->status & FM_FD_ERR_DMA) + percpu_priv->rx_errors.dme++; + if (fd->status & FM_FD_ERR_PHYSICAL) + percpu_priv->rx_errors.fpe++; + if (fd->status & FM_FD_ERR_SIZE) + percpu_priv->rx_errors.fse++; + if (fd->status & FM_FD_ERR_PRS_HDR_ERR) + percpu_priv->rx_errors.phe++; + dpa_fd_release(net_dev, fd); } @@ -167,6 +176,8 @@ static void _dpa_tx_conf(struct net_device *net_dev, percpu_priv->stats.tx_errors++; } + percpu_priv->tx_confirm++; + skb = _dpa_cleanup_tx_fd(priv, fd); dev_kfree_skb(skb); @@ -302,6 +313,7 @@ static void priv_ern(struct qman_portal *portal, percpu_priv->stats.tx_dropped++; percpu_priv->stats.tx_fifo_errors++; + count_ern(percpu_priv, msg); /* If we intended this buffer to go into the pool * when the FM was done, we need to put it in diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h index 87577cf..ccaadd9 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h @@ -192,6 +192,25 @@ struct dpa_bp { void (*free_buf_cb)(void *addr); }; +struct dpa_rx_errors { + u64 dme;/* DMA Error */ + u64 fpe;/* Frame Physical Error */ + u64 fse;/* Frame Size Error */ + u64 phe;/* Header Error */ +}; + +/* Counters for QMan ERN frames - one counter per rejection code */ +struct dpa_ern_cnt { + u64 cg_tdrop; /* Congestion group taildrop */ + u64 wred; /* WRED congestion */ + u64 err_cond; /* Error condition */ + u64 early_window; /* Order restoration, frame too early */ + u64 late_window;/* Order restoration, frame too late */ + u64 fq_tdrop; /* FQ taildrop */ + u64 fq_retired; /* FQ is retired */ + u64 orp_zero; /* ORP disabled */ +}; + struct dpa_napi_portal { struct napi_struct napi; struct qman_portal *p; @@ -201,7 +220,13 @@ struct dpa_napi_portal { struct dpa_percpu_priv_s { struct net_device *net_dev; struct dpa_napi_portal *np; + u64 in_interrupt; + u64 tx_confirm; + /* fragmented (non-linear) skbuffs received from the stack */ + u64 tx_frag_skbuffs; struct rtnl_link_stats64 stats; + struct dpa_rx_errors rx_errors; + struct dpa_ern_cnt ern_cnt; }; struct dpa_priv_s { @@ -228,6 +253,14 @@ struct dpa_priv_s { * (and the same) congestion group. */ struct qman_cgr cgr; + /* If congested, when it began. Used for performance stats. */ + u32 congestion_start_jiffies; + /* Number of jiffies the Tx port was congested. */ + u32 congested_jiffies; + /* Counter for the number of times the CGR +* entered congestion state +*/ + u32 cgr_congested_count; } cgr_data; /* Use a per-port CGR for ingress traffic. */ bool use_ingress_cgr; @@ -289,6 +322,7 @@ static inline int dpaa_eth_napi_schedule(struct dpa_percpu_priv_s *percpu_priv, np->p = portal; napi_schedule(>napi); + percpu_priv->in_interrupt++; return 1; } } diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c index
Re: [PATCH net] net: avoid NULL deref in inet_ctl_sock_destroy()
On Mon, 2015-11-02 at 17:53 +0100, Hannes Frederic Sowa wrote: > On Mon, Nov 2, 2015, at 16:50, Eric Dumazet wrote: > > From: Eric Dumazet> > > > Under low memory conditions, tcp_sk_init() and icmp_sk_init() > > can both iterate on all possible cpus and call inet_ctl_sock_destroy(), > > with eventual NULL pointer. > > > > Signed-off-by: Eric Dumazet > > Reported-by: Dmitry Vyukov > > Eric, was this a private report or some of those floating around > publicly? Dmitry Vyukov filled two internal bug reports at Google, not sure if he mentioned the issue elsewhere. Google-Bug-Id: 25415196 Google-Bug-Id: 25416355 (But you do not have access to them) -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH net-next] net/core: initial support for stacked dev feature toggles
Alexander Duyck wrote: On 10/30/2015 09:25 AM, Jarod Wilson wrote: ... Rather than outright dropping the second bit though, I was thinking maybe just drop a note in dmesg along the lines of "hey, you shut off LRO, it is still enabled on upper dev foo", to placate end-users. I would rather not see it. It would be mostly noise. It is perfectly valid to have LRO advertised on an upper device, but not supported on a lower one. It basically just means that the path will allow LRO frames through, it doesn't guarantee that we are going to provide them. Okay, dropping this. ... Same thing here. If a lower dev has it disabled then leave it disabled. I believe your goal is to make it so that dev_disable_lro() can shut down LRO when it is making packets in the data-path unusable. This is already the case since commit fbe168ba91f7 ("net: generic dev_disable_lro() stacked device handling"). That commit makes sure dev_disable_lro() is propagated down the stack and also makes sure new slaves added to a bond/team with LRO disabled have it disabled too. What it does not do is propagating LRO disabling down if it is disabled in ways that do not call dev_disable_lro() (e.g. via ethtool). I'm not sure if this should be done or not, both options have their pros and cons. Making it work with ethtool was one of my primary goals with this change, as it was users prodding things with ethtool that prompted the "hey, this doesn't make sense" bug reports. I'd say make it work like dev_disable_lro already does. Disabling LRO propagates down, enabling LRO only enables it on the specific device. The way to think of it is as a warning flag. With LRO enabled this device may report frames larger than MTU to the stack and will mangle checksums. Without LRO all of the frames received should be restricted to MTU. That is why you have to force the disabling down to all lower devices, and why you cannot enable it if an upper device has it disabled. However, I believe enabling LRO shouldn't be propagated down. Hm. Devices that should never have LRO enabled still won't get it enabled, so I'm not clear what harm it would cause.I tend to think you How do you define "devices that should never have LRO enabled"? No NETIF_F_LRO flag set in hw_features is what I was thinking. The fact is LRO is very messy in terms of the way it functions. Different drivers handle it different ways. Usually it results in the Rx checksum being mangled, it provides frames larger than MTU, and uses fraglist instead of frags on some drivers. do want this sync'ing down the stack if set on an upper dev (i.e., ethtool -K bond0 lro on), for consistency's sake. You can always come back through afterwards and disable things on lower devs individually if they're really not wanted, since we're in agreement that we shouldn't prevent disabling features on lower devices. Think of it this way. Lets say I have a NIC that I know is problematic when LRO is enabled, it might cause a kernel panic due to an skb overrun. So I have a bond with it and some other NIC which can run with LRO enabled without issues. How do I enable LRO on the other device without causing a kernel panic, and without tearing apart the existing bond? With the approach you have described I can't because I have to enable it at the bond and doing so will enable it on the NIC with the faulty implementation. I'd argue that if enabling LRO on a device causes a panic, that device probably shouldn't be advertising LRO support, and the driver ought to be fixed, but that's somewhat tangential. I'm already sold on only disabling down the stack. This is why we cannot enable LRO unless all upper devices support it, and why we should propagate disabling LRO down to all lower devices. Trying to force it on for a lower device just because the upper device supports it is a bad idea because there are multiple LRO implementations and they all behave very differently. That's a bit concerning, given that we default to LRO on in a bond, as should all the slaves, regardless of which LRO implementation the device has (so long as the driver claims to support LRO, anyway). But again, that's probably a separate issue, I've got a forthcoming patch that I'm still beating around and touching up, but I think looks sane and lines up with what you've suggested. If nothing else you might start looking at working with a mask of bits that function like this. You could probably start with GRO, LRO, and RXCSUM and work your way up from there. If they aren't set on the upper devices you cannot enable them, and if they are cleared then they must be cleared on all lower devices. For step one, I've added a feature mask and a new helper that iterates over it looking for set feature flags. In the case of the bnx2x equipped host I'm currently testing on, adding RXCSUM had an interesting and as yet unexplained side-effect of preventing LRO from being enabled on the bnx2x cards -- ethtool showed "off
Re: [PATCH net] net: avoid NULL deref in inet_ctl_sock_destroy()
On Mon, Nov 2, 2015 at 6:00 PM, Eric Dumazetwrote: > On Mon, 2015-11-02 at 17:53 +0100, Hannes Frederic Sowa wrote: >> On Mon, Nov 2, 2015, at 16:50, Eric Dumazet wrote: >> > From: Eric Dumazet >> > >> > Under low memory conditions, tcp_sk_init() and icmp_sk_init() >> > can both iterate on all possible cpus and call inet_ctl_sock_destroy(), >> > with eventual NULL pointer. >> > >> > Signed-off-by: Eric Dumazet >> > Reported-by: Dmitry Vyukov >> >> Eric, was this a private report or some of those floating around >> publicly? > > Dmitry Vyukov filled two internal bug reports at Google, > not sure if he mentioned the issue elsewhere. No, I did not. Can I now? > Google-Bug-Id: 25415196 > Google-Bug-Id: 25416355 > > (But you do not have access to them) > > > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next] af_unix: optimize unix_writable by inlining
David Millerwrites: > From: Aaron Conole > Date: Mon, 2 Nov 2015 12:01:59 -0500 > >> unix_writable() originally was inlined, but was changed as part of >> commit 1586a5877db9 ("af_unix: do not report POLLOUT on >> listeners"). Re-enable the inline flag. >> >> Signed-off-by: Aaron Conole > > This is never appropriate. > > The compiler should be fixed to inline functions properly when > appropriate for the optimization level requested. Okay, apologies for the noise. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next] net: fix percpu memory leaks
On Mon, Nov 2, 2015, at 18:03, Eric Dumazet wrote: > From: Eric Dumazet> > This patch fixes following problems : > > 1) percpu_counter_init() can return an error, therefore > init_frag_mem_limit() must propagate this error so that > inet_frags_init_net() can do the same up to its callers. > > 2) If ip[46]_frags_ns_ctl_register() fail, we must unwind >properly and free the percpu_counter. > > Without this fix, we leave freed object in percpu_counters > global list (if CONFIG_HOTPLUG_CPU) leading to crashes. > > This bug was detected by KASAN and syzkaller tool > (http://github.com/google/syzkaller) > > Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation > mem accounting") > Signed-off-by: Eric Dumazet > Reported-by: Dmitry Vyukov > Cc: Hannes Frederic Sowa > Cc: Jesper Dangaard Brouer Acked-by: Hannes Frederic Sowa Syzkaller tool looks amazing, seems like it got support for unshare :). Thanks! -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next] mpls: support for dead routes
Please check on what was intended. julia On Tue, 3 Nov 2015, kbuild test robot wrote: > CC: kbuild-...@01.org > In-Reply-To: <1446498529-50275-1-git-send-email-ro...@cumulusnetworks.com> > TO: Roopa Prabhu> CC: ebied...@xmission.com, rshea...@brocade.com, da...@davemloft.net, > netdev@vger.kernel.org > CC: da...@davemloft.net, netdev@vger.kernel.org > > Hi Roopa, > > [auto build test WARNING on net-next/master -- if it's inappropriate base, > please suggest rules for selecting the more suitable base] > > url: > https://github.com/0day-ci/linux/commits/Roopa-Prabhu/mpls-support-for-dead-routes/20151103-051211 > :: branch date: 88 minutes ago > :: commit date: 88 minutes ago > > >> net/mpls/af_mpls.c:702:6-22: duplicated argument to && or || > > git remote add linux-review https://github.com/0day-ci/linux > git remote update linux-review > git checkout 21fa92d07a7254a2042ded6647fc4b91332c6d0e > vim +702 net/mpls/af_mpls.c > > f8efb73c Roopa Prabhu2015-10-23 686 int nhs = 0; > f8efb73c Roopa Prabhu2015-10-23 687 int err = 0; > f8efb73c Roopa Prabhu2015-10-23 688 > f8efb73c Roopa Prabhu2015-10-23 689 change_nexthops(rt) { > f8efb73c Roopa Prabhu2015-10-23 690 int attrlen; > f8efb73c Roopa Prabhu2015-10-23 691 > f8efb73c Roopa Prabhu2015-10-23 692 nla_via = NULL; > f8efb73c Roopa Prabhu2015-10-23 693 nla_newdst = NULL; > f8efb73c Roopa Prabhu2015-10-23 694 > f8efb73c Roopa Prabhu2015-10-23 695 err = -EINVAL; > f8efb73c Roopa Prabhu2015-10-23 696 if (!rtnh_ok(rtnh, > remaining)) > f8efb73c Roopa Prabhu2015-10-23 697 goto errout; > f8efb73c Roopa Prabhu2015-10-23 698 > 1c78efa8 Robert Shearman 2015-10-23 699 /* neither weighted > multipath nor any flags > 1c78efa8 Robert Shearman 2015-10-23 700 * are supported > 1c78efa8 Robert Shearman 2015-10-23 701 */ > 21fa92d0 Roopa Prabhu2015-11-02 @702 if (rtnh->rtnh_flags || > rtnh->rtnh_flags) > 1c78efa8 Robert Shearman 2015-10-23 703 goto errout; > 1c78efa8 Robert Shearman 2015-10-23 704 > f8efb73c Roopa Prabhu2015-10-23 705 attrlen = > rtnh_attrlen(rtnh); > f8efb73c Roopa Prabhu2015-10-23 706 if (attrlen > 0) { > f8efb73c Roopa Prabhu2015-10-23 707 struct nlattr > *attrs = rtnh_attrs(rtnh); > f8efb73c Roopa Prabhu2015-10-23 708 > f8efb73c Roopa Prabhu2015-10-23 709 nla_via = > nla_find(attrs, attrlen, RTA_VIA); > f8efb73c Roopa Prabhu2015-10-23 710 nla_newdst = > nla_find(attrs, attrlen, RTA_NEWDST); > > --- > 0-DAY kernel test infrastructureOpen Source Technology Center > https://lists.01.org/pipermail/kbuild-all Intel Corporation > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next V1 0/7] Mellanox mlx5e driver update, Nov 3 2015
On Tue, Nov 3, 2015 at 8:07 AM, Or Gerlitzwrote: > This series contains bunch of small fixes to the mlx5e driver from Achiad. Oops, I missed your email from 2h ago... but these all ARE bug fixes, so hopefully I didn't really violated the directive (I guess I should have just asked, but again, I missed the email...) Or. > Applies on net-next commit e7b63ff "Merge branch 'master' of > git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next" > Changes from V0: > - removed the driver patch that dealt with IRQ affinity changes during > NAPI poll, as this is a generic problem which needs generic solution. > > Achiad Shochat (7): > net/mlx5e: Avoid NULL pointer access in case of configuration failure > net/mlx5e: Wait for RX buffers initialization in a more proper manner > net/mlx5_core: Use the the real irqn in eq->irqn > net/mlx5e: Don't allow more than max supported channels > net/mlx5e: Return error in case mlx5e_set_features() fails > net/mlx5e: Re-eanble client vlan TX acceleration > net/mlx5e: Fix LSO vlan insertion > > drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 + > .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 5 ++--- > drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 16 - > drivers/net/ethernet/mellanox/mlx5/core/en_tx.c| 26 > +++--- > drivers/net/ethernet/mellanox/mlx5/core/eq.c | 8 +++ > 5 files changed, 46 insertions(+), 15 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next V1 0/7] Mellanox mlx5e driver update, Nov 3 2015
Hi Dave, This series contains bunch of small fixes to the mlx5e driver from Achiad. Applies on net-next commit e7b63ff "Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next" Or. Changes from V0: - removed the driver patch that dealt with IRQ affinity changes during NAPI poll, as this is a generic problem which needs generic solution. Achiad Shochat (7): net/mlx5e: Avoid NULL pointer access in case of configuration failure net/mlx5e: Wait for RX buffers initialization in a more proper manner net/mlx5_core: Use the the real irqn in eq->irqn net/mlx5e: Don't allow more than max supported channels net/mlx5e: Return error in case mlx5e_set_features() fails net/mlx5e: Re-eanble client vlan TX acceleration net/mlx5e: Fix LSO vlan insertion drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 + .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 5 ++--- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 16 - drivers/net/ethernet/mellanox/mlx5/core/en_tx.c| 26 +++--- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 8 +++ 5 files changed, 46 insertions(+), 15 deletions(-) -- 2.3.7 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next V1 1/7] net/mlx5e: Avoid NULL pointer access in case of configuration failure
From: Achiad ShochatIn case a configuration operation that involves closing and re-opening resources (e.g RX/TX queue size change) fails at the re-opening stage these resources will remain closed. So when executing (following) configuration operations (e.g ifconfig down) we cannot assume that these resources are available. Signed-off-by: Achiad Shochat Signed-off-by: Or Gerlitz --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index bb801a9..9df6f9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1404,6 +1404,12 @@ int mlx5e_close_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + /* May already be CLOSED in case a previous configuration operation +* (e.g RX/TX queue size change) that involves close failed. +*/ + if (!test_bit(MLX5E_STATE_OPENED, >state)) + return 0; + clear_bit(MLX5E_STATE_OPENED, >state); mlx5e_redirect_rqts(priv); -- 2.3.7 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next V1 4/7] net/mlx5e: Don't allow more than max supported channels
From: Achiad ShochatConsider MLX5E_MAX_NUM_CHANNELS @ethtool set/get_channels Signed-off-by: Achiad Shochat Signed-off-by: Or Gerlitz --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 ++ drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 5 ++--- drivers/net/ethernet/mellanox/mlx5/core/en_main.c| 3 +-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0983a20..f2ae62d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -617,5 +617,11 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc); } +static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) +{ + return min_t(int, mdev->priv.eq_table.num_comp_vectors, +MLX5E_MAX_NUM_CHANNELS); +} + extern const struct ethtool_ops mlx5e_ethtool_ops; u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index bce9126..2e022e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -345,9 +345,8 @@ static void mlx5e_get_channels(struct net_device *dev, struct ethtool_channels *ch) { struct mlx5e_priv *priv = netdev_priv(dev); - int ncv = priv->mdev->priv.eq_table.num_comp_vectors; - ch->max_combined = ncv; + ch->max_combined = mlx5e_get_max_num_channels(priv->mdev); ch->combined_count = priv->params.num_channels; } @@ -355,7 +354,7 @@ static int mlx5e_set_channels(struct net_device *dev, struct ethtool_channels *ch) { struct mlx5e_priv *priv = netdev_priv(dev); - int ncv = priv->mdev->priv.eq_table.num_comp_vectors; + int ncv = mlx5e_get_max_num_channels(priv->mdev); unsigned int count = ch->combined_count; bool was_opened; int err = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0bab33c..febf711 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2047,8 +2047,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) { struct net_device *netdev; struct mlx5e_priv *priv; - int nch = min_t(int, mdev->priv.eq_table.num_comp_vectors, - MLX5E_MAX_NUM_CHANNELS); + int nch = mlx5e_get_max_num_channels(mdev); int err; if (mlx5e_check_required_hca_cap(mdev)) -- 2.3.7 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next V1 5/7] net/mlx5e: Return error in case mlx5e_set_features() fails
From: Achiad ShochatIn case mlx5e_set_features() fails, return the failure status rather than 0. Signed-off-by: Achiad Shochat Signed-off-by: Or Gerlitz --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index febf711..28eaed5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1843,7 +1843,7 @@ static int mlx5e_set_features(struct net_device *netdev, mlx5e_disable_vlan_filter(priv); } - return 0; + return err; } static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) -- 2.3.7 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH/RFC v2] ravb: use clock rate as basis for GTI.TIV
The GTI.TIV may be set to 2GHz^2 / rate, where rate is that of the clock of the device. Rather than assuming a rate of 130MHz use the actual rate of the clock. The motivation for this is to use the correct rate on the r8a7795/Salvator-X which is advertised as 133MHz but may differ depending on the extal present on the Salvator-X. Signed-off-by: Simon Horman--- Tested on the topic/gen3-latest branch of Geert Uytterhoeven's renesas-drivers tree on kernel.org: 3f5a88be9fea ("[WIP] arm64: renesas: r8a7795: Convert to new CPG/MSSR bindings") v2 * Corrected typos in changelog, as pointed out by Geert Uytterhoeven * Use do_div() rather than 64-bit division to allow compilation on 32-bit ARM --- drivers/net/ethernet/renesas/ravb.h | 3 +++ drivers/net/ethernet/renesas/ravb_main.c | 38 +++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 0623fff932e4..f9dee7436e81 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -576,6 +576,9 @@ enum GTI_BIT { GTI_TIV = 0x0FFF, }; +#define GTI_TIV_MAXGTI_TIV +#define GTI_TIV_MIN0x20 + /* GIC */ enum GIC_BIT { GIC_PTCE= 0x0001, /* Undocumented? */ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index aa7b2083cb53..599334d68afe 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -32,6 +32,8 @@ #include #include +#include + #include "ravb.h" #define RAVB_DEF_MSG_ENABLE \ @@ -1659,6 +1661,38 @@ static const struct of_device_id ravb_match_table[] = { }; MODULE_DEVICE_TABLE(of, ravb_match_table); +static int ravb_set_gti(struct net_device *ndev) +{ + + struct device *dev = ndev->dev.parent; + struct device_node *np = dev->of_node; + unsigned long rate; + struct clk *clk; + uint64_t inc; + + clk = of_clk_get(np, 0); + if (IS_ERR(clk)) { + dev_err(dev, "could not get clock\n"); + return PTR_ERR(clk); + } + + rate = clk_get_rate(clk); + clk_put(clk); + + inc = 10ULL << 20; + do_div(inc, rate); + + if (inc < GTI_TIV_MIN || inc > GTI_TIV_MAX) { + dev_err(dev, "gti.tiv increment 0x%llx is outside the range 0x%x - 0x%x\n", + inc, GTI_TIV_MIN, GTI_TIV_MAX); + return -EINVAL; + } + + ravb_write(ndev, inc, GTI); + + return 0; +} + static int ravb_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -1755,7 +1789,9 @@ static int ravb_probe(struct platform_device *pdev) CCC); /* Set GTI value */ - ravb_write(ndev, ((1000 << 20) / 130) & GTI_TIV, GTI); + error = ravb_set_gti(ndev); + if (error) + goto out_release; /* Request GTI loading */ ravb_write(ndev, ravb_read(ndev, GCCR) | GCCR_LTI, GCCR); -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next V1 7/7] net/mlx5e: Fix LSO vlan insertion
From: Achiad ShochatConsider vlan insertion impact on headers copy size also for LSO packets. Signed-off-by: Achiad Shochat Signed-off-by: Or Gerlitz --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 5105288..cd8f85a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -116,7 +116,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, * headers and occur before the data gather. * Therefore these headers must be copied into the WQE */ -#define MLX5E_MIN_INLINE (ETH_HLEN + 2/*vlan tag*/) +#define MLX5E_MIN_INLINE ETH_HLEN if (bf && (skb_headlen(skb) <= sq->max_inline)) return skb_headlen(skb); @@ -128,7 +128,7 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) { struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; int cpy1_sz = 2 * ETH_ALEN; - int cpy2_sz = ihs - cpy1_sz - VLAN_HLEN; + int cpy2_sz = ihs - cpy1_sz; skb_copy_from_linear_data(skb, vhdr, cpy1_sz); skb_pull_inline(skb, cpy1_sz); @@ -192,6 +192,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) if (skb_vlan_tag_present(skb)) { mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs); + ihs += VLAN_HLEN; } else { skb_copy_from_linear_data(skb, eseg->inline_hdr_start, ihs); skb_pull_inline(skb, ihs); -- 2.3.7 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next V1 6/7] net/mlx5e: Re-eanble client vlan TX acceleration
From: Achiad ShochatThis reverts commit cd58c714acb9 "net/mlx5e: Disable client vlan TX acceleration". Bring back client vlan insertion offload, the original performance issue was found and fixed in the next patch. Signed-off-by: Achiad Shochat Signed-off-by: Or Gerlitz --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 23 +-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 28eaed5..5fc4d2d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2004,6 +2004,7 @@ static void mlx5e_build_netdev(struct net_device *netdev) netdev->vlan_features|= NETIF_F_LRO; netdev->hw_features = netdev->vlan_features; + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index b73672f..5105288 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -124,6 +124,21 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, return MLX5E_MIN_INLINE; } +static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) +{ + struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; + int cpy1_sz = 2 * ETH_ALEN; + int cpy2_sz = ihs - cpy1_sz - VLAN_HLEN; + + skb_copy_from_linear_data(skb, vhdr, cpy1_sz); + skb_pull_inline(skb, cpy1_sz); + vhdr->h_vlan_proto = skb->vlan_proto; + vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); + skb_copy_from_linear_data(skb, >h_vlan_encapsulated_proto, + cpy2_sz); + skb_pull_inline(skb, cpy2_sz); +} + static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) { struct mlx5_wq_cyc *wq = >wq; @@ -175,8 +190,12 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) ETH_ZLEN); } - skb_copy_from_linear_data(skb, eseg->inline_hdr_start, ihs); - skb_pull_inline(skb, ihs); + if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs); + } else { + skb_copy_from_linear_data(skb, eseg->inline_hdr_start, ihs); + skb_pull_inline(skb, ihs); + } eseg->inline_hdr_sz = cpu_to_be16(ihs); -- 2.3.7 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next V1 2/7] net/mlx5e: Wait for RX buffers initialization in a more proper manner
From: Achiad ShochatUse jiffies rather than wait loop with msleep(). The wait loop didn't take into consideration time when the process was not executing. Signed-off-by: Achiad Shochat Signed-off-by: Or Gerlitz --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9df6f9a..0bab33c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -442,12 +442,12 @@ static void mlx5e_disable_rq(struct mlx5e_rq *rq) static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) { + unsigned long exp_time = jiffies + msecs_to_jiffies(2); struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_wq_ll *wq = >wq; - int i; - for (i = 0; i < 1000; i++) { + while (time_before(jiffies, exp_time)) { if (wq->cur_sz >= priv->params.min_rx_wqes) return 0; -- 2.3.7 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next V1 3/7] net/mlx5_core: Use the the real irqn in eq->irqn
From: Achiad ShochatInstead of storing the msix array index in eq->irqn (vecidx), store the real irq number. Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 1f01fe8..713ead5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -382,10 +382,10 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, name, pci_name(dev->pdev)); eq->eqn = out.eq_number; - eq->irqn = vecidx; + eq->irqn = priv->msix_arr[vecidx].vector; eq->dev = dev; eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(priv->msix_arr[vecidx].vector, mlx5_msix_handler, 0, + err = request_irq(eq->irqn, mlx5_msix_handler, 0, priv->irq_info[vecidx].name, eq); if (err) goto err_eq; @@ -421,12 +421,12 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) int err; mlx5_debug_eq_remove(dev, eq); - free_irq(dev->priv.msix_arr[eq->irqn].vector, eq); + free_irq(eq->irqn, eq); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); - synchronize_irq(dev->priv.msix_arr[eq->irqn].vector); + synchronize_irq(eq->irqn); mlx5_buf_free(dev, >buf); return err; -- 2.3.7 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next] mpls: Don't accept multipath configuration until the support is complete
Sergei Shtylyovwrites: > Hello. >> >> diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c >> index c70d750148b6..893cd2dc3979 100644 >> --- a/net/mpls/af_mpls.c >> +++ b/net/mpls/af_mpls.c >> @@ -1162,6 +1162,8 @@ static int rtm_to_route_config(struct sk_buff *skb, >> struct nlmsghdr *nlh, >> { >> cfg->rc_mp = nla_data(nla); >> cfg->rc_mp_len = nla_len(nla); >> +/* Fail until multipath support is complete */ >> +goto errout; >> break; > >Forgot to delete *break*? Nope. I did that deliberately, because this code is not supposed to stay this way for long. Eric -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next rfc V2 0/2] basic busy polling support for vhost_net
On 10/30/2015 07:58 PM, Jason Wang wrote: > > On 10/29/2015 04:45 PM, Jason Wang wrote: >> Hi all: >> >> This series tries to add basic busy polling for vhost net. The idea is >> simple: at the end of tx processing, busy polling for new tx added >> descriptor and rx receive socket for a while. The maximum number of >> time (in us) could be spent on busy polling was specified through >> module parameter. >> >> Test were done through: >> >> - 50 us as busy loop timeout >> - Netperf 2.6 >> - Two machines with back to back connected mlx4 >> - Guest with 8 vcpus and 1 queue >> >> Result shows very huge improvement on both tx (at most 158%) and rr >> (at most 53%) while rx is as much as in the past. Most cases the cpu >> utilization is also improved: >> > Just notice there's something wrong in the setup. So the numbers are > incorrect here. Will re-run and post correct number here. > > Sorry. Here's the updated testing result: 1) 1 vcpu 1 queue: TCP_RR size/session/+thu%/+normalize% 1/ 1/0%/ -25% 1/50/ +12%/0% 1/ 100/ +12%/ +1% 1/ 200/ +9%/ -1% 64/ 1/ +3%/ -21% 64/50/ +8%/0% 64/ 100/ +7%/0% 64/ 200/ +9%/0% 256/ 1/ +1%/ -25% 256/50/ +7%/ -2% 256/ 100/ +6%/ -2% 256/ 200/ +4%/ -2% 512/ 1/ +2%/ -19% 512/50/ +5%/ -2% 512/ 100/ +3%/ -3% 512/ 200/ +6%/ -2% 1024/ 1/ +2%/ -20% 1024/50/ +3%/ -3% 1024/ 100/ +5%/ -3% 1024/ 200/ +4%/ -2% Guest RX size/session/+thu%/+normalize% 64/ 1/ -4%/ -5% 64/ 4/ -3%/ -10% 64/ 8/ -3%/ -5% 512/ 1/ +15%/ +1% 512/ 4/ -5%/ -5% 512/ 8/ -2%/ -4% 1024/ 1/ -5%/ -16% 1024/ 4/ -2%/ -5% 1024/ 8/ -6%/ -6% 2048/ 1/ +10%/ +5% 2048/ 4/ -8%/ -4% 2048/ 8/ -1%/ -4% 4096/ 1/ -9%/ -11% 4096/ 4/ +1%/ -1% 4096/ 8/ +1%/0% 16384/ 1/ +20%/ +11% 16384/ 4/0%/ -3% 16384/ 8/ +1%/0% 65535/ 1/ +36%/ +13% 65535/ 4/ -10%/ -9% 65535/ 8/ -3%/ -2% Guest TX size/session/+thu%/+normalize% 64/ 1/ -7%/ -16% 64/ 4/ -14%/ -23% 64/ 8/ -9%/ -20% 512/ 1/ -62%/ -56% 512/ 4/ -62%/ -56% 512/ 8/ -61%/ -53% 1024/ 1/ -66%/ -61% 1024/ 4/ -77%/ -73% 1024/ 8/ -73%/ -67% 2048/ 1/ -74%/ -75% 2048/ 4/ -77%/ -74% 2048/ 8/ -72%/ -68% 4096/ 1/ -65%/ -68% 4096/ 4/ -66%/ -63% 4096/ 8/ -62%/ -57% 16384/ 1/ -25%/ -28% 16384/ 4/ -28%/ -17% 16384/ 8/ -24%/ -10% 65535/ 1/ -17%/ -14% 65535/ 4/ -22%/ -5% 65535/ 8/ -25%/ -9% - obvious improvement on TCP_RR (at most 12%) - improvement on guest RX - huge decreasing on Guest TX (at most -75%), this is probably because virtio-net driver suffers from buffer bloat by orphaning skb before transmission. The faster vhost it is, the smaller packet it could produced. To reduce the impact on this, turning off gso in guest can result the following result: size/session/+thu%/+normalize% 64/ 1/ +3%/ -11% 64/ 4/ +4%/ -10% 64/ 8/ +4%/ -10% 512/ 1/ +2%/ +5% 512/ 4/0%/ -1% 512/ 8/0%/0% 1024/ 1/ +11%/0% 1024/ 4/0%/ -1% 1024/ 8/ +3%/ +1% 2048/ 1/ +4%/ -1% 2048/ 4/ +8%/ +3% 2048/ 8/0%/ -1% 4096/ 1/ +4%/ -1% 4096/ 4/ +1%/0% 4096/ 8/ +2%/0% 16384/ 1/ +2%/ -2% 16384/ 4/ +3%/ +1% 16384/ 8/0%/ -1% 65535/ 1/ +9%/ +7% 65535/ 4/0%/ -3% 65535/ 8/ -1%/ -1% 2) 8 vcpus 1 queue: TCP_RR size/session/+thu%/+normalize% 1/ 1/ +5%/ -14% 1/50/ +2%/ +1% 1/ 100/0%/ -1% 1/ 200/0%/0% 64/ 1/0%/ -25% 64/50/ +5%/ +5% 64/ 100/0%/0% 64/ 200/0%/ -1% 256/ 1/0%/ -30% 256/50/0%/0% 256/ 100/ -2%/ -2% 256/ 200/0%/0% 512/ 1/ +1%/ -23% 512/50/ +1%/ +1% 512/ 100/ +1%/0% 512/ 200/ +1%/ +1% 1024/ 1/ +1%/ -23% 1024/50/ +5%/ +5% 1024/ 100/0%/ -1% 1024/ 200/0%/0% Guest RX size/session/+thu%/+normalize% 64/ 1/ +1%/ +1% 64/ 4/ -2%/ +1% 64/ 8/ +6%/ +19% 512/ 1/ +5%/ -7% 512/ 4/ -4%/ -4% 512/ 8/0%/0% 1024/ 1/ +1%/ +2% 1024/ 4/ -2%/ -2% 1024/ 8/ -1%/ +7% 2048/ 1/ +8%/ -2% 2048/ 4/0%/ +5% 2048/ 8/ -1%/ +13% 4096/ 1/ -1%/ +2% 4096/ 4/0%/ +6% 4096/ 8/ -2%/ +15% 16384/ 1/ -1%/0% 16384/ 4/ -2%/ -1% 16384/ 8/ -2%/ +2% 65535/ 1/ -2%/0% 65535/ 4/ -3%/ -3% 65535/ 8/ -2%/ +2% Guest TX size/session/+thu%/+normalize% 64/ 1/ +6%/
[PATCH net-next] mpls: Don't accept multipath configuration until the support is complete
Currently the multipath code has a nasty failure mode in that it will fail to notice link down or administrative device down and will instead black hole packets instead of sending them to their nexthop destination. Half the point of multipath is to gracefully handle forwarding path failures and as the current code does not handle forwarding failures the current code is dangerous to use. As mpls multipath has never been exported to userspace and as the implementation was not complete before the merge window disable the mpls multipath code by rejecting all multipath configuration requests. This will give us another kernel development cycle to cleanly sort out the issues, without any bad precedents to worry about. Signed-off-by: "Eric W. Biederman"--- net/mpls/af_mpls.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index c70d750148b6..893cd2dc3979 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1162,6 +1162,8 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, { cfg->rc_mp = nla_data(nla); cfg->rc_mp_len = nla_len(nla); + /* Fail until multipath support is complete */ + goto errout; break; } default: -- 2.2.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Resource leak in unshare
On Mon, 2015-11-02 at 13:01 -0600, Eric W. Biederman wrote: > Dmitry Vyukovwrites: > > > Hello, > > > > I am hitting the following warnings on > > bcee19f424a0d8c26ecf2607b73c690802658b29 (4.3): > > Do you have any trace of the earlier failures? > > This appears to be something caused by an earlier failure (possibly > whatever fails to allocate memory). Having network devices present > but being in the generic cleanup routines is wrong. > > If there is no additional information can you please rerun with the > following change applied? That should at least report which function is > failing, and give us a good clue where to start debugging this. At first, I would say sit is leaking percpu memory Load sit module, then : while : do ip netns add foo ip netns del foo done Will eat all memory eventually. ipip6_tunnel_init() and ipip6_fb_tunnel_init() are _both_ called for the sit0 device, this looks very wrong. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [GIT] Networking
On 10/28/2015 02:39 AM, Linus Torvalds wrote: I'm sorry, but we don't add idiotic new interfaces like this for idiotic new code like that. As one of the people who encouraged gcc to add this interface, I'll speak up in its favor: Getting overflow checking right in more complicated cases is a PITA. I'll admit that the "subtract from an unsigned integer if it won't go negative" isn't particularly useful, but there are other cases in which it's much more useful. The one I care about the most is for multiplication. Witness the never-ending debates about the proper way to implement things like kmalloc_array. We currently do: static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) { if (size != 0 && n > SIZE_MAX / size) return NULL; return __kmalloc(n * size, flags); } This is correct, and it's even reasonably efficient if size is a compile-time constant. (On x86, it still might not be quite optimal, since there'll be an extra cmp instruction. Sure, the difference could easily be a cycle or even less.) But if size is not a constant, then, unless the compiler is quite clever, this ends up generating a division, and that sucks. If we were willing to do: size_t total_bytes; #if efficient_overflow_detection_works if (__builtin_mul_overflow(n, size, _bytes)) return NULL; #else /* existing check goes here */ total_bytes = n * size; #endif return __kmalloc(n * size, flags); then we get optimal code generation on new compilers and the result isn't even that ugly to look at. For compiler flag settings in which signed overflow can cause subtle disasters, the signed addition overflow helpers can be nice, too. --Andy -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/1] commit c6825c0976fa7893692e0e43b09740b419b23c09 upstream.
> On Thu, Oct 29, 2015 at 6:21 PM, Neal P. Murphy >wrote: > > On Thu, 29 Oct 2015 17:01:24 -0700 > > Ani Sinha wrote: > > > >> On Wed, Oct 28, 2015 at 11:40 PM, Neal P. Murphy > >> wrote: > >> > On Wed, 28 Oct 2015 02:36:50 -0400 > >> > "Neal P. Murphy" wrote: > >> > > >> >> On Mon, 26 Oct 2015 21:06:33 +0100 > >> >> Pablo Neira Ayuso wrote: > >> >> > >> >> > Hi, > >> >> > > >> >> > On Mon, Oct 26, 2015 at 11:55:39AM -0700, Ani Sinha wrote: > >> >> > > netfilter: nf_conntrack: fix RCU race in nf_conntrack_find_get > >> >> > > >> >> > Please, no need to Cc everyone here. Please, submit your Netfilter > >> >> > patches to netfilter-de...@vger.kernel.org. > >> >> > > >> >> > Moreover, it would be great if the subject includes something > >> >> > descriptive on what you need, for this I'd suggest: > >> >> > > >> >> > [PATCH -stable 3.4,backport] netfilter: nf_conntrack: fix RCU race in > >> >> > nf_conntrack_find_get > >> >> > > >> >> > I'm including Neal P. Murphy, he said he would help testing these > >> >> > backports, getting a Tested-by: tag usually speeds up things too. > >> >> > >> > > >> > I've probably done about as much seat-of-the-pants testing as I can. All > >> > opening/closing the same destination IP/port. > >> > > >> > Host: Debian Jessie, 8-core Vishera 8350 at 4.4 GHz, 16GiB RAM at (I > >> > think) 2100MHz. > >> > > >> > Traffic generator 1: 6-CPU KVM running 64-bit Smoothwall Express 3.1 > >> > (linux 3.4.109 without these patches), with 8GiB RAM and 9GiB swap. > >> > Packets sent across PURPLE (to bypass NAT and firewall). > >> > > >> > Traffic generator 2: 32-bit KVM running Smoothwall Express 3.1 (linux > >> > 3.4.110 with these patches), 3GiB RAM and minimal swap. > >> > > >> > In the first set of tests, generator 1's traffic passed through > >> > Generator 2 as a NATting firewall, to the host's web server. In the > >> > second set of tests, generator 2's traffic went through NAT to the > >> > host's web server. > >> > > >> > The load tests: > >> > - 2500 processes using 2500 addresses and random src ports > >> > - 2500 processes using 2500 addresses and the same src port > >> > - 2500 processes using the same src address and port > >> > > >> > I also tested using stock NF timeouts and using 1 second timeouts. > >> > > >> > Bandwidth used got as high as 16Mb/s for some tests. > >> > > >> > Conntracks got up to 200 000 or so or bounced between 1 and 2, depending > >> > on the test and the timeouts. > >> > > >> > I did not reproduce the problem these patches solve. But more > >> > importantly, I saw no problems at all. Each time I terminated a test, > >> > RAM usage returned to about that of post-boot; so there were no apparent > >> > memory leaks. No kernel messages and no netfilter messages appeared > >> > during the tests. > >> > > >> > If I have time, I suppose I could run another set of tests: 2500 source > >> > processes using 2500 addresses times 200 ports to connect to 2500 > >> > addresses times 200 ports on a destination system. Each process opens > >> > 200 sockets, then closes them. And repeats ad infinitum. But I might > >> > have to be clever since I can't run 500 000 processes; but I could run > >> > 20 VMs; that would get it down to about 12 000 processes per VM. And I > >> > might have to figure out how to allow allow processes on the destination > >> > system to open hundreds or thousands of sockets. > >> > >> Should I resend the patch with a Tested-by: tag? > > > > ... Oh, wait. Not yet. The dawn just broke over ol' Marblehead here. I only > > tested TCP; I need to hammer UDP, too. > > > > Can I set the timeouts to zero? Or is one as low as I can go? > > Any progress with testing ? I applied the 'hammer' through a firewall with the patch. I used TCP, UDP and ICMP. I don't know if the patch fixes the problem. But I'm reasonably sure that it did not break normal operations. To test a different problem I fixed (a memory leak in my 64-bit counter patch for xt_ACCOUNT), I tested 60,000 addresses (most of a /16) through the firewall. Again, no troubles. I only observed two odd things which are likely completely unrelated to your patch. When I started the TCP test, then added the UDP test, only TCP would come through. If I stopped and restarted the TCP test, only UDP would come through. I suspect this is due to buffering. It's just a behaviour I haven't encountered since I started using Linux many years ago (around '98). The second, when I started the test, the firewall would lose contact with the upstream F/W's apcupsd daemon; again, this is likely due to the nature of the test: it likely floods input and output queues. I'd say you can probably resend with Tested-by. Neal -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at
[PATCH -stable 3.4,backport] commit c6825c0976fa7893692e0e43b09740b419b23c09 upstream.
netfilter: nf_conntrack: don't release a conntrack with non-zero refcnt With this patch, the conntrack refcount is initially set to zero and it is bumped once it is added to any of the list, so we fulfill Eric's golden rule which is that all released objects always have a refcount that equals zero. Andrey Vagin reports that nf_conntrack_free can't be called for a conntrack with non-zero ref-counter, because it can race with nf_conntrack_find_get(). A conntrack slab is created with SLAB_DESTROY_BY_RCU. Non-zero ref-counter says that this conntrack is used. So when we release a conntrack with non-zero counter, we break this assumption. CPU1CPU2 nf_conntrack_find() nf_ct_put() destroy_conntrack() ... init_conntrack __nf_conntrack_alloc (set use = 1) atomic_inc_not_zero(>use) (use = 2) if (!l4proto->new(ct, skb, dataoff, timeouts)) nf_conntrack_free(ct); (use = 2 !!!) ... __nf_conntrack_alloc (set use = 1) if (!nf_ct_key_equal(h, tuple, zone)) nf_ct_put(ct); (use = 0) destroy_conntrack() /* continue to work with CT */ After applying the path "[PATCH] netfilter: nf_conntrack: fix RCU race in nf_conntrack_find_get" another bug was triggered in destroy_conntrack(): <4>[67096.759334] [ cut here ] <2>[67096.759353] kernel BUG at net/netfilter/nf_conntrack_core.c:211! ... <4>[67096.759837] Pid: 498649, comm: atdd veid: 666 Tainted: G C ---2.6.32-042stab084.18 #1 042stab084_18 /DQ45CB <4>[67096.759932] RIP: 0010:[] [] destroy_conntrack+0x15c/0x190 [nf_conntrack] <4>[67096.760255] Call Trace: <4>[67096.760255] [] nf_conntrack_destroy+0x17/0x30 <4>[67096.760255] [] nf_conntrack_find_get+0x85/0x130 [nf_conntrack] <4>[67096.760255] [] nf_conntrack_in+0x352/0xb60 [nf_conntrack] <4>[67096.760255] [] ipv4_conntrack_local+0x51/0x60 [nf_conntrack_ipv4] <4>[67096.760255] [] nf_iterate+0x69/0xb0 <4>[67096.760255] [] ? dst_output+0x0/0x20 <4>[67096.760255] [] nf_hook_slow+0x74/0x110 <4>[67096.760255] [] ? dst_output+0x0/0x20 <4>[67096.760255] [] raw_sendmsg+0x775/0x910 <4>[67096.760255] [] ? flush_tlb_others_ipi+0x128/0x130 <4>[67096.760255] [] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [] inet_sendmsg+0x4a/0xb0 <4>[67096.760255] [] ? sock_sendmsg+0x13/0x140 <4>[67096.760255] [] sock_sendmsg+0x117/0x140 <4>[67096.760255] [] ? native_smp_send_reschedule+0x49/0x60 <4>[67096.760255] [] ? _spin_unlock_bh+0x1b/0x20 <4>[67096.760255] [] ? autoremove_wake_function+0x0/0x40 <4>[67096.760255] [] ? do_ip_setsockopt+0x90/0xd80 <4>[67096.760255] [] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [] sys_sendto+0x139/0x190 <4>[67096.760255] [] ? audit_syscall_entry+0x1d7/0x200 <4>[67096.760255] [] ? __audit_syscall_exit+0x265/0x290 <4>[67096.760255] [] compat_sys_socketcall+0x13f/0x210 <4>[67096.760255] [] ia32_sysret+0x0/0x5 I have reused the original title for the RFC patch that Andrey posted and most of the original patch description. Signed-off-by: Ani SinhaTested-by: "Neal P. Murphy" --- net/netfilter/nf_conntrack_core.c | 18 +- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9a171b2..9a46908 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -441,7 +441,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) goto out; add_timer(>timeout); - nf_conntrack_get(>ct_general); + smp_wmb(); + /* The caller holds a reference to this object */ + atomic_set(>ct_general.use, 2); __nf_conntrack_hash_insert(ct, hash, repl_hash); NF_CT_STAT_INC(net, insert); spin_unlock_bh(_conntrack_lock); @@ -732,11 +734,10 @@ __nf_conntrack_alloc(struct net *net, u16 zone, nf_ct_zone->id = zone; } #endif - /* -* changes to lookup keys must be done before setting refcnt to 1 + /* Because we use RCU lookups, we set ct_general.use to zero before +* this is inserted in any list. */ - smp_wmb(); - atomic_set(>ct_general.use, 1); + atomic_set(>ct_general.use, 0); return ct; #ifdef CONFIG_NF_CONNTRACK_ZONES @@ -759,6 +760,10 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alloc); void nf_conntrack_free(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); + /*
Re: [net-next v4 2/8] dpaa_eth: add support for DPAA Ethernet
On Mon, 2015-11-02 at 19:31 +0200, Madalin Bucur wrote: > This introduces the Freescale Data Path Acceleration Architecture > (DPAA) Ethernet driver (dpaa_eth) that builds upon the DPAA QMan, > BMan, PAMU and FMan drivers to deliver Ethernet connectivity on > the Freescale DPAA QorIQ platforms. [] > diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c > b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c [] > +static void _dpa_rx_error(struct net_device *net_dev, > + const struct dpa_priv_s *priv, > + struct dpa_percpu_priv_s *percpu_priv, > + const struct qm_fd *fd, > + u32 fqid) > +{ > + /* limit common, possibly innocuous Rx FIFO Overflow errors' > + * interference with zero-loss convergence benchmark results. > + */ > + if (likely(fd->status & FM_FD_ERR_PHYSICAL)) > + pr_warn_once("non-zero error counters in fman statistics > (sysfs)\n"); > + else > + if (net_ratelimit()) > + netif_err(priv, hw, net_dev, "Err FD status = 0x%08x\n", > + fd->status & FM_FD_STAT_RX_ERRORS); It's a bit of a pity the logging message code is a mix of pr_, dev_, netdev_ and netif_ Perhaps netif__ratelimited macros should be added. Something like: --- include/linux/netdevice.h | 54 +++ 1 file changed, 54 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 210d11a..555471d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4025,6 +4025,60 @@ do { \ }) #endif +#define netif_level_ratelimited(level, priv, type, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv) && net_ratelimit()) \ + netdev_##level(dev, fmt, ##args); \ +} while (0) + +#define netif_emerg_ratelimited(priv, type, dev, fmt, args...) \ + netif_level_ratelimited(emerg, priv, type, dev, fmt, ##args) +#define netif_alert_ratelimited(priv, type, dev, fmt, args...) \ + netif_level_ratelimited(alert, priv, type, dev, fmt, ##args) +#define netif_crit_ratelimited(priv, type, dev, fmt, args...) \ + netif_level_ratelimited(crit, priv, type, dev, fmt, ##args) +#define netif_err_ratelimited(priv, type, dev, fmt, args...) \ + netif_level_ratelimited(err, priv, type, dev, fmt, ##args) +#define netif_warn_ratelimited(priv, type, dev, fmt, args...) \ + netif_level_ratelimited(warn, priv, type, dev, fmt, ##args) +#define netif_notice_ratelimited(priv, type, dev, fmt, args...) \ + netif_level_ratelimited(notice, priv, type, dev, fmt, ##args) +#define netif_info_ratelimited(priv, type, dev, fmt, args...) \ + netif_level_ratelimited(info, priv, type, dev, fmt, ##args) + +#if defined(CONFIG_DYNAMIC_DEBUG) +/* descriptor check is first to prevent flooding with "callbacks suppressed" */ +#define netif_dbg_ratelimited(priv, type, dev, fmt, args...) \ +do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&\ + netif_msg_##type(priv) && net_ratelimit()) \ + __dynamic_netdev_dbg(, dev, fmt, ##args);\ +} while (0) +#elif defined(DEBUG) +#define netif_dbg_ratelimited(priv, type, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv) && net_ratelimit()) \ + netif_printk(priv, type, KERN_DEBUG, dev, fmt, ##args); \ +} while (0) +#else +#define netif_dbg_ratelimited(priv, type, dev, fmt, args...) \ +do { \ + if (0) \ + netif_printk(priv, type, KERN_DEBUG, dev, fmt, ##args); \ +} while (0) +#endif + +#if defined(VERBOSE_DEBUG) +#define netif_vdbg_ratelimited netif_dbg_ratelimited +#else +#define netif_vdbg(priv, type, dev, fmt, args...) \ +do { \ + if (0) \ + netif_printk(priv, type, KERN_DEBUG, dev, fmt, ##args); \ +} while (0) +#endif + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [net-next v4 1/8] devres: add devm_alloc_percpu()
On Mon, 2015-11-02 at 19:31 +0200, Madalin Bucur wrote: > Introduce managed counterparts for alloc_percpu() and free_percpu(). > Add devm_alloc_percpu() and devm_free_percpu() into the managed > interfaces list. trivia, could be fixed later > +/** > + * __devm_alloc_percpu - Resource-managed alloc_percpu > + * @dev: Device to allocate per-cpu memory for > + * @size: Size of per-cpu memory to allocate > + * @align: Alignement of per-cpu memory to allocate French spelling? alignment -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC][PATCH] net: arinc429: Add ARINC-429 stack
On 11/02/2015 08:41 PM, Aleksander Morgado wrote: > On Mon, Nov 2, 2015 at 12:14 PM, Oliver Hartkopp> wrote: >> >> What about defining some overlay data structure to map ARINC-429 frames into >> CAN frames? >> >> E.g. we could write the ARINC 32 bit data completely into data[0..3] and >> additionally copy the 8 bit label information (or should it better be 10 bit >> including the Source/Destination Identifiers?) additionally into the can_id. > > Note that the only bits which are always treated as non-data are the 8 > label bits (well, and the parity bit #31). The 2 SDI bits (#8, #9) may > be used as data bits when a high resolution is needed, like Lat/Long > encoded in binary words 310 and 311. I wouldn't make any assumption on > what's on those 2 bits; i.e. they're not always "source/destination". > You definitely know these details better than me. That's why I'm asking. Would hosting the 32 bit in the struct can_frame.data and just the 8 bit label in struct can_frame.can_id offer the functionality you need? Besides the arinc429_frame struct struct arinc429_frame { __u8label; /* 8 bit label */ __u8data[3];/* Up-to 23 bits are valid. */ }; everything else roughly looks like copy from PF_CAN with renaming. So when we can fit the arinc frames into CAN frames and re-use the existing CAN infrastructure - we are almost done. Regards, Oliver -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] vhost: move is_le setup to the backend
From: Greg KurzDate: Fri, 30 Oct 2015 12:42:35 +0100 > The vq->is_le field is used to fix endianness when accessing the vring via > the cpu_to_vhost16() and vhost16_to_cpu() helpers in the following cases: > > 1) host is big endian and device is modern virtio > > 2) host has cross-endian support and device is legacy virtio with a different >endianness than the host > > Both cases rely on the VHOST_SET_FEATURES ioctl, but 2) also needs the > VHOST_SET_VRING_ENDIAN ioctl to be called by userspace. Since vq->is_le > is only needed when the backend is active, it was decided to set it at > backend start. > > This is currently done in vhost_init_used()->vhost_init_is_le() but it > obfuscates the core vhost code. This patch moves the is_le setup to a > dedicated function that is called from the backend code. > > Note vhost_net is the only backend that can pass vq->private_data == NULL to > vhost_init_used(), hence the "if (sock)" branch. > > No behaviour change. > > Signed-off-by: Greg Kurz Michael, I'm assuming that you will be the one taking this. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 net-next] enic: assign affinity hint to interrupts
From: Govindarajulu Varadarajan <_gov...@gmx.com> Date: Fri, 30 Oct 2015 16:52:51 +0530 > The affinity hint is used by the user space daemon, irqbalancer, to > indicate a preferred CPU mask for irqs. This patch sets the irq affinity > hint to local numa core first, when exausted we try non-local numa cores. > > Also set tx xps cpus mask bassed on affinity hint. > > v2: remove the global affinity policy. > > Signed-off-by: Govindarajulu Varadarajan <_gov...@gmx.com> Applied. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC][PATCH] net: arinc429: Add ARINC-429 stack
Hi, > I was thinking about this and I mostly agree with you. Obviously, copying the > code this way was dumb. On the other hand, ARINC and CAN are two different > sort > of busses, so I'd propose something slightly different here to avoid confusion > and prevent the future extensions (or protocols) from adding unrelated cruft > into the CAN stack. Another major difference between CAN and ARINC429 is that ARINC is simplex. It does not need loopback and echo. For example HOLT IC chip HI-3593 has two receivers and single transmitter, which should be instantiated as separate devices, as each channel could be connected to different network. It would be nice if new ARINC framework will provide means to create RX or TX only network device and have -rx- or -tx- as part of device name. Label space in ARINC is much smaller than in CAN, is it really needed to have hash and masks? May be simple bitmap for 256 bits will fit better. At least it could be directly provided to mentioned HOLT chip to do filtering in hardware. -- Best regards, Andrey -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC][PATCH] net: arinc429: Add ARINC-429 stack
On Monday, November 02, 2015 at 09:15:21 PM, Vostrikov Andrey wrote: > Hi, Hi, > > I was thinking about this and I mostly agree with you. Obviously, copying > > the code this way was dumb. On the other hand, ARINC and CAN are two > > different sort of busses, so I'd propose something slightly different > > here to avoid confusion and prevent the future extensions (or protocols) > > from adding unrelated cruft into the CAN stack. > > Another major difference between CAN and ARINC429 is that ARINC is > simplex. It does not need loopback and echo. For example HOLT IC > chip HI-3593 has two receivers and single transmitter, which > should be instantiated as separate devices, as each channel could be > connected to different network. So this would effectively be three devices, correct ? I think you can just register a regular ARINC device for each channel and be done with it. Loopback and echo can be configurable. > It would be nice if new ARINC framework will provide means to create > RX or TX only network device and have -rx- or -tx- as part of device > name. I'd say you can fail the TX if you're trying to send via an RX-only channel. The naming can probably be also tweaked, but I don't see much value in that, especially since you can rename those interfaces by using udev rules. Checking if the interface supports RX/TX should be done by other means, not the name. > Label space in ARINC is much smaller than in CAN, is it really needed > to have hash and masks? May be simple bitmap for 256 bits will fit > better. At least it could be directly provided to mentioned HOLT chip > to do filtering in hardware. CAN does the can_id filtering this way and I find it familiar and convenient, so I don't see a reason not to re-use it. If the hardware has some special support for the frame filtering, it's the driver that should convert the filter specification into form which the hardware understands -- this sort of configuration is done only once at the beginning of operation, so some small overhead of the conversion of the filter setting should be acceptable, we're talking about generating 256 entries for the hardware from ID/mask tuple, no big deal here. Best regards, Marek Vasut -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 net-next] net: make skb_set_owner_w() more robust
From: Eric DumazetDate: Mon, 02 Nov 2015 12:09:25 -0800 > On Mon, 2015-11-02 at 20:05 +, Haiyang Zhang wrote: > >> Thanks for the fix! >> For some driver, like ours, this condition may not be "unlikely". >> So could you remove the "unlikely"? > > No, I wont remove the unlikely. > > Look, your main issue is about reallocating skbs, because of excessive > dev->needed_headroom. > > An unlikely() mismatch is 1000 times less expensive, why would you > care ? > > If you really care, fix your driver to not abuse skb->head to store 220 > bytes of private data. +1 And I've been saying this from the beginning. This driver must place it's private per-packet data in another location if it wants optimal behavior inside of the Linux networking stack. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [net-next PATCH v2] RDS: convert bind hash table to re-sizable hashtable
From: Santosh ShilimkarDate: Fri, 30 Oct 2015 08:49:10 -0700 > To further improve the RDS connection scalabilty on massive systems > where number of sockets grows into tens of thousands of sockets, there > is a need of larger bind hashtable. Pre-allocated 8K or 16K table is > not very flexible in terms of memory utilisation. The rhashtable > infrastructure gives us the flexibility to grow the hashtbable based > on use and also comes up with inbuilt efficient bucket(chain) handling. > > Reviewed-by: David Miller > Signed-off-by: Santosh Shilimkar > Signed-off-by: Santosh Shilimkar > --- > v2: Dropped empty new line from rds_add_bound() (David Miller) Applied. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH -stable 3.4,backport] commit c6825c0976fa7893692e0e43b09740b419b23c09 upstream.
netfilter: nf_conntrack: fix RCU race in nf_conntrack_find_get Lets look at destroy_conntrack: hlist_nulls_del_rcu(>tuplehash[IP_CT_DIR_ORIGINAL].hnnode); ... nf_conntrack_free(ct) kmem_cache_free(net->ct.nf_conntrack_cachep, ct); net->ct.nf_conntrack_cachep is created with SLAB_DESTROY_BY_RCU. The hash is protected by rcu, so readers look up conntracks without locks. A conntrack is removed from the hash, but in this moment a few readers still can use the conntrack. Then this conntrack is released and another thread creates conntrack with the same address and the equal tuple. After this a reader starts to validate the conntrack: * It's not dying, because a new conntrack was created * nf_ct_tuple_equal() returns true. But this conntrack is not initialized yet, so it can not be used by two threads concurrently. In this case BUG_ON may be triggered from nf_nat_setup_info(). Florian Westphal suggested to check the confirm bit too. I think it's right. task 1 task 2 task 3 nf_conntrack_find_get nf_conntrack_find destroy_conntrack hlist_nulls_del_rcu nf_conntrack_free kmem_cache_free __nf_conntrack_alloc kmem_cache_alloc memset(>tuplehash[IP_CT_DIR_MAX], if (nf_ct_is_dying(ct)) if (!nf_ct_tuple_equal() I'm not sure, that I have ever seen this race condition in a real life. Currently we are investigating a bug, which is reproduced on a few nodes. In our case one conntrack is initialized from a few tasks concurrently, we don't have any other explanation for this. <2>[46267.083061] kernel BUG at net/ipv4/netfilter/nf_nat_core.c:322! ... <4>[46267.083951] RIP: 0010:[] [] nf_nat_setup_info+0x564/0x590 [nf_nat] ... <4>[46267.085549] Call Trace: <4>[46267.085622] [] alloc_null_binding+0x5b/0xa0 [iptable_nat] <4>[46267.085697] [] nf_nat_rule_find+0x5c/0x80 [iptable_nat] <4>[46267.085770] [] nf_nat_fn+0x111/0x260 [iptable_nat] <4>[46267.085843] [] nf_nat_out+0x48/0xd0 [iptable_nat] <4>[46267.085919] [] nf_iterate+0x69/0xb0 <4>[46267.085991] [] ? ip_finish_output+0x0/0x2f0 <4>[46267.086063] [] nf_hook_slow+0x74/0x110 <4>[46267.086133] [] ? ip_finish_output+0x0/0x2f0 <4>[46267.086207] [] ? dst_output+0x0/0x20 <4>[46267.086277] [] ip_output+0xa4/0xc0 <4>[46267.086346] [] raw_sendmsg+0x8b4/0x910 <4>[46267.086419] [] inet_sendmsg+0x4a/0xb0 <4>[46267.086491] [] ? sock_update_classid+0x3a/0x50 <4>[46267.086562] [] sock_sendmsg+0x117/0x140 <4>[46267.086638] [] ? _spin_unlock_bh+0x1b/0x20 <4>[46267.086712] [] ? autoremove_wake_function+0x0/0x40 <4>[46267.086785] [] ? do_ip_setsockopt+0x90/0xd80 <4>[46267.086858] [] ? call_function_interrupt+0xe/0x20 <4>[46267.086936] [] ? ub_slab_ptr+0x20/0x90 <4>[46267.087006] [] ? ub_slab_ptr+0x20/0x90 <4>[46267.087081] [] ? kmem_cache_alloc+0xd8/0x1e0 <4>[46267.087151] [] sys_sendto+0x139/0x190 <4>[46267.087229] [] ? sock_setsockopt+0x16d/0x6f0 <4>[46267.087303] [] ? audit_syscall_entry+0x1d7/0x200 <4>[46267.087378] [] ? __audit_syscall_exit+0x265/0x290 <4>[46267.087454] [] ? compat_sys_setsockopt+0x75/0x210 <4>[46267.087531] [] compat_sys_socketcall+0x13f/0x210 <4>[46267.087607] [] ia32_sysret+0x0/0x5 <4>[46267.087676] Code: 91 20 e2 01 75 29 48 89 de 4c 89 f7 e8 56 fa ff ff 85 c0 0f 84 68 fc ff ff 0f b6 4d c6 41 8b 45 00 e9 4d fb ff ff e8 7c 19 e9 e0 <0f> 0b eb fe f6 05 17 91 20 e2 80 74 ce 80 3d 5f 2e 00 00 00 74 <1>[46267.088023] RIP [] nf_nat_setup_info+0x564/0x590 Signed-off-by: Ani SinhaTested-by: Neal P. Murphy --- net/netfilter/nf_conntrack_core.c | 21 + 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9a46908..fd0f7a3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -309,6 +309,21 @@ static void death_by_timeout(unsigned long ul_conntrack) nf_ct_put(ct); } +static inline bool +nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, + const struct nf_conntrack_tuple *tuple, + u16 zone) +{ + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + + /* A conntrack can be recreated with the equal tuple, +* so we need to check that the conntrack is confirmed +*/ + return nf_ct_tuple_equal(tuple, >tuple) && + nf_ct_zone(ct) == zone && + nf_ct_is_confirmed(ct); +} + /* * Warning : * - Caller must take a reference on returned object @@ -330,8 +345,7 @@ nf_conntrack_find(struct net *net, u16 zone, local_bh_disable(); begin: hlist_nulls_for_each_entry_rcu(h, n, >ct.hash[bucket], hnnode) { - if
Re: Resource leak in unshare
Dmitry Vyukovwrites: > Hello, > > I am hitting the following warnings on > bcee19f424a0d8c26ecf2607b73c690802658b29 (4.3): Do you have any trace of the earlier failures? This appears to be something caused by an earlier failure (possibly whatever fails to allocate memory). Having network devices present but being in the generic cleanup routines is wrong. If there is no additional information can you please rerun with the following change applied? That should at least report which function is failing, and give us a good clue where to start debugging this. Eric diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2c2eb1b629b1..125c94af22b8 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -292,6 +292,7 @@ out: return error; out_undo: + WARN(1, "net ops->init %pF returned with %d\n", ops->init, error); /* Walk through the list backwards calling the exit functions * for the pernet modules whose init functions did not fail. */ -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC][PATCH] net: arinc429: Add ARINC-429 stack
On Monday, November 02, 2015 at 12:14:27 PM, Oliver Hartkopp wrote: > On 02.11.2015 10:47, Marc Kleine-Budde wrote: > > On 11/02/2015 12:16 AM, Marek Vasut wrote: > >> The ARINC-429 is a technical standard, which describes, among others, > >> a data bus used by airplanes. The standard contains much more, since > >> it is based off the ISO/OSI model, but this patch implements just the > >> data bus protocol. > >> > >> This stack is derived from the SocketCAN implementation, already present > >> in the kernel and thus behaves in a very similar fashion. Thus far, we > >> support sending RAW ARINC-429 datagrams, configuration of the RX and TX > >> clock speed and filtering. > >> > >> The ARINC-429 datagram is four-byte long. The first byte is always the > >> LABEL, the function of remaining three bytes can vary, so we handle it > >> as an opaque PAYLOAD. The userspace tools can send these datagrams via > >> a standard socket. > >> > >> A LABEL-based filtering can be configured on each socket separately in > >> a way comparable to CAN -- user uses setsockopt() to push a list of > >> label,mask tuples into the kernel and the kernel will deliver a datagram > >> to the socket if ( & mask) == (label & mask), otherwise > >> the datagram is not delivered. > > > > What's difference compared to CAN besides a different MTU? The CAN stack > > is already capable to handle CAN and CAN-FD frames. Would it make sense > > to integrate the ARINC-429 into the existing CAN stack? > > That was my first impression too. Hi! > What about defining some overlay data structure to map ARINC-429 frames > into CAN frames? I agree about the code reuse, it was stupid to do such a blatant copy of the code all right. I don't think it's such a great idea to outright place ARINC support into the CAN stack though. They're two different busses after all. Please see below. > E.g. we could write the ARINC 32 bit data completely into data[0..3] and > additionally copy the 8 bit label information (or should it better be 10 > bit including the Source/Destination Identifiers?) additionally into the > can_id. > > From what I can see the filtering by label is similar to filtering by > can_id. And you would be able to use the can-gw functionality too. This is correct. > The only real difference is the bitrate configuration of the ARINC > interface. There might be additional ARINC-specific configuration bits involved, but thus far, that's correct. > I wonder if a similar approach would fit here as we discussed with the > University of Prague for a LIN implementation using the PF_CAN > infrastructure: OT: Hey, there is no "University of Prague", there are two universities in Prague to boot -- Charles University and Czech Technical University -- you mean the later ;-) > http://rtime.felk.cvut.cz/can/lin-bus/ > > It could probably boil down to a 'CAN interface' that is named arinc0 which > implements the serial driver like in slcan.c or sllin.c ... I was thinking about this and I mostly agree with you. Obviously, copying the code this way was dumb. On the other hand, ARINC and CAN are two different sort of busses, so I'd propose something slightly different here to avoid confusion and prevent the future extensions (or protocols) from adding unrelated cruft into the CAN stack. I would propose we (read: me) create some sort of "common" core, which would contain the following: - drivers/net/: big part of the device interface here is common big part of the virtual interface here is common -> CAN or ARINC can just add their own specific callbacks and be done with it - net/: there's a lot of common parts as well, like the filtering can be unified such that it can be used by both. A big part of the socket handling is also similar. This would also let the slcan or sllin or whatever stuff they made at CVUT just plug into this "common" core part. Now I wonder if we should introduce AF_ARINC or stick to AF_CAN for both. I'd be much happier to keep those two separate, again, to avoid confusion. What do you think please ? Best regards, Marek Vasut -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 net-next] net: make skb_set_owner_w() more robust
On Mon, 2015-11-02 at 20:05 +, Haiyang Zhang wrote: > Thanks for the fix! > For some driver, like ours, this condition may not be "unlikely". > So could you remove the "unlikely"? No, I wont remove the unlikely. Look, your main issue is about reallocating skbs, because of excessive dev->needed_headroom. An unlikely() mismatch is 1000 times less expensive, why would you care ? If you really care, fix your driver to not abuse skb->head to store 220 bytes of private data. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next 0/5] dp83640 driver fixes
From: Stefan SørensenDate: Fri, 30 Oct 2015 13:13:59 +0100 > This series fixes a number of minor bugs in the dp83640 driver. Looks like Richard wants changes to patch #1. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: pull-request: can 2015-10-30
From: Marc Kleine-BuddeDate: Fri, 30 Oct 2015 14:39:58 +0100 > this is a pull request for the upcoming v4.3 release. > > Marek Vasut provides a patch to use the correct attrlen in the nla_put() in > the > can_fill_info() function. Pulled, but this missed the v4.3 release so you'll need to push this explicitly out to -stable. Thanks. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/2] net: encx24j600: Fix SPI driver module autoload
From: Javier Martinez CanillasDate: Fri, 30 Oct 2015 13:49:16 +0100 > Recently I've been trying to fix module autoloading for all SPI drivers and > found that the encx24j600 driver does not fill module alias information due > missing a MODULE_DEVICE_TABLE() so module autload won't work and the driver > Kconfig symbol is tristate which means the driver can be built as a module. > > But also the SPI id table is not correctly defined so this series fixes both > issues. Series applied to net-next, thanks. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] net: rds: changing the return type from int to void
From: Saurabh SengarDate: Fri, 30 Oct 2015 19:46:44 +0530 > as result of function rds_iw_flush_mr_pool is nowhere checked, > changing its return type from int to void. > also removing the unused variable rc as there is nothing to return > > Signed-off-by: Saurabh Sengar > --- > v2 : modify patch description, as per the comments from Sergei Shtylyov Applied, thanks. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] bpf: convert hashtab lock to raw lock
From: Yang ShiDate: Fri, 30 Oct 2015 15:16:26 -0700 > When running bpf samples on rt kernel, it reports the below warning: ... > Convert hashtab lock to raw lock to avoid such warning. > > Signed-off-by: Yang Shi Applied to net-next, thanks. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [net-next v4 3/8] dpaa_eth: add support for S/G frames
On Mon, 2015-11-02 at 19:31 +0200, Madalin Bucur wrote: > Add support for Scater/Gather (S/G) frames. The FMan can place > the frame content into multiple buffers and provide a S/G Table > (SGT) into one first buffer with references to the others. trivia: scatter > diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c > b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c [] > @@ -1177,10 +1177,42 @@ void dpaa_eth_init_ports(struct mac_device *mac_dev, > port_fqs->rx_defq, _layout[RX]); > } > > +void dpa_release_sgt(struct qm_sg_entry *sgt) > +{ > + struct dpa_bp *dpa_bp; > + struct bm_buffer bmb[DPA_BUFF_RELEASE_MAX]; Where is "struct bm_buffer" defined? -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next v2] ipv4: use l4 hash for locally generated multipath flows
From: Paolo AbeniDate: Thu, 29 Oct 2015 22:20:40 +0100 > This patch changes how the multipath hash is computed for locally > generated flows: now the hash comprises l4 information. > > This allows better utilization of the available paths when the existing > flows have the same source IP and the same destination IP: with l3 hash, > even when multiple connections are in place simultaneously, a single path > will be used, while with l4 hash we can use all the available paths. > > v2 changes: > - use get_hash_from_flowi4() instead of implementing just another l4 hash > function > > Signed-off-by: Paolo Abeni Applied, thanks. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 net] i40e: Look up MAC address in Open Firmware or IDPROM
On (11/02/15 17:26), Nelson, Shannon wrote: > > I assume you mean .1q > > Yes, this is what I had in mind. I dont think we're quite there yet, even without vlans. If I turn on/off tcpdump, there's something about the way that the link is bounced that leaves the device down while tcpdump is running. Then after I exit tcpdump, it bounces things a few more times again, packets flow for a brief interval, and then there's silence. Seems like there's is a workq that results in i40e_service_task->i40e_sync_vsi_filters that periodically resets things. Doing 'ip link set eth0 promisc on' keeps things nice and steady. How is this all supposed to work if I change the macaddr from /sbin/ip using i40e_set_mac() and then jiggle the promisc (either just the flag, or with tcpdump)? (I cant tell because I dont have an x86 machine with i40e handy) To frame the question differently, where all should I be invoking the new i40e_macaddr_init() function from? --Sowmini -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH v2 net-next] net: make skb_set_owner_w() more robust
> -Original Message- > From: Eric Dumazet [mailto:eric.duma...@gmail.com] > Sent: Sunday, November 1, 2015 6:37 PM > To: David Miller> Cc: Haiyang Zhang ; eduma...@google.com; > netdev@vger.kernel.org; KY Srinivasan > Subject: [PATCH v2 net-next] net: make skb_set_owner_w() more robust > > From: Eric Dumazet > > skb_set_owner_w() is called from various places that assume > skb->sk always point to a full blown socket (as it changes > sk->sk_wmem_alloc) > > We'd like to attach skb to request sockets, and in the future > to timewait sockets as well. For these kind of pseudo sockets, > we need to take a traditional refcount and use sock_edemux() > as the destructor. > > It is now time to un-inline skb_set_owner_w(), being too big. > > Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets > instead of listener") > Signed-off-by: Eric Dumazet > Bisected-by: Haiyang Zhang > --- > v2: sock_edemux() must be guarded by CONFIG_INET > > include/net/sock.h| 17 ++--- > net/core/sock.c | 22 ++ > net/ipv4/tcp_output.c |4 +--- > 3 files changed, 25 insertions(+), 18 deletions(-) > > diff --git a/include/net/sock.h b/include/net/sock.h > index aeed5c95f3ca..f570e75e3da9 100644 > --- a/include/net/sock.h > +++ b/include/net/sock.h > @@ -1951,6 +1951,8 @@ static inline void skb_set_hash_from_sk(struct > sk_buff *skb, struct sock *sk) > } > } > > +void skb_set_owner_w(struct sk_buff *skb, struct sock *sk); > + > /* > * Queue a received datagram if it will fit. Stream and sequenced > * protocols can't normally use this as they need to fit buffers in > @@ -1959,21 +1961,6 @@ static inline void skb_set_hash_from_sk(struct > sk_buff *skb, struct sock *sk) > * Inlined as it's very short and called for pretty much every > * packet ever received. > */ > - > -static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) > -{ > - skb_orphan(skb); > - skb->sk = sk; > - skb->destructor = sock_wfree; > - skb_set_hash_from_sk(skb, sk); > - /* > - * We used to take a refcount on sk, but following operation > - * is enough to guarantee sk_free() wont free this sock until > - * all in-flight packets are completed > - */ > - atomic_add(skb->truesize, >sk_wmem_alloc); > -} > - > static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) > { > skb_orphan(skb); > diff --git a/net/core/sock.c b/net/core/sock.c > index 0ef30aa90132..7529eb9463be 100644 > --- a/net/core/sock.c > +++ b/net/core/sock.c > @@ -1656,6 +1656,28 @@ void sock_wfree(struct sk_buff *skb) > } > EXPORT_SYMBOL(sock_wfree); > > +void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) > +{ > + skb_orphan(skb); > + skb->sk = sk; > +#ifdef CONFIG_INET > + if (unlikely(!sk_fullsock(sk))) { Thanks for the fix! For some driver, like ours, this condition may not be "unlikely". So could you remove the "unlikely"? Thanks, - Haiyang
Re: [PATCH net-next] tcp/dccp: fix ireq->pktopts race
From: Eric DumazetDate: Fri, 30 Oct 2015 09:46:12 -0700 > From: Eric Dumazet > > IPv6 request sockets store a pointer to skb containing the SYN packet > to be able to transfer it to full blown socket when 3WHS is done > (ireq->pktopts -> np->pktoptions) > > As explained in commit 5e0724d027f0 ("tcp/dccp: fix hashdance race for > passive sessions"), we must transfer the skb only if we won the > hashdance race, if multiple cpus receive the 'ack' packet completing > 3WHS at the same time. > > Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") > Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") > Signed-off-by: Eric Dumazet Applied. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC][PATCH] net: arinc429: Add ARINC-429 stack
On Mon, Nov 2, 2015 at 12:14 PM, Oliver Hartkoppwrote: > > What about defining some overlay data structure to map ARINC-429 frames into > CAN frames? > > E.g. we could write the ARINC 32 bit data completely into data[0..3] and > additionally copy the 8 bit label information (or should it better be 10 bit > including the Source/Destination Identifiers?) additionally into the can_id. Note that the only bits which are always treated as non-data are the 8 label bits (well, and the parity bit #31). The 2 SDI bits (#8, #9) may be used as data bits when a high resolution is needed, like Lat/Long encoded in binary words 310 and 311. I wouldn't make any assumption on what's on those 2 bits; i.e. they're not always "source/destination". -- Aleksander https://aleksander.es -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next] mpls: Don't accept multipath configuration until the support is complete
Hello. On 11/02/2015 10:29 PM, Eric W. Biederman wrote: Currently the multipath code has a nasty failure mode in that it will fail to notice link down or administrative device down and will instead black hole packets instead of sending them to their nexthop destination. Half the point of multipath is to gracefully handle forwarding path failures and as the current code does not handle forwarding failures the current code is dangerous to use. As mpls multipath has never been exported to userspace and as the implementation was not complete before the merge window disable the mpls multipath code by rejecting all multipath configuration requests. This will give us another kernel development cycle to cleanly sort out the issues, without any bad precedents to worry about. Signed-off-by: "Eric W. Biederman"--- net/mpls/af_mpls.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index c70d750148b6..893cd2dc3979 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1162,6 +1162,8 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, { cfg->rc_mp = nla_data(nla); cfg->rc_mp_len = nla_len(nla); + /* Fail until multipath support is complete */ + goto errout; break; Forgot to delete *break*? } default: MBR, Sergei -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next 0/2] bridge: vlan: failure path and comment fixes
From: Nikolay AleksandrovDate: Fri, 30 Oct 2015 17:46:18 +0100 > This is a set from Ido which takes care of one failure path error in > nbp_vlan_init (patch 1) and a few comment errors (patch 2). > I must admit I didn't expect the port init continues after a vlan init > failure but should've checked to make sure. Thanks to Ido for catching > these! Series applied, thanks. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [net-next v4 6/8] dpaa_eth: add ethtool statistics
On Mon, 2015-11-02 at 19:31 +0200, Madalin Bucur wrote: > Add a series of counters to be exported through ethtool: > - add detailed counters for reception errors; > - add detailed counters for QMan enqueue reject events; > - count the number of fragmented skbs received from the stack; > - count all frames received on the Tx confirmation path; > - add congestion group statistics; > - count the number of interrupts for each CPU. [] > diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c > b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c [] > +static void dpa_get_strings(struct net_device *net_dev, u32 stringset, u8 > *data) > +{ > + unsigned int i, j, num_cpus, size; > + char string_cpu[ETH_GSTRING_LEN]; > + u8 *strings; > + > + strings = data; > + num_cpus = num_online_cpus(); > + size = DPA_STATS_GLOBAL_LEN * ETH_GSTRING_LEN; > + > + for (i = 0; i < DPA_STATS_PERCPU_LEN; i++) { > + for (j = 0; j < num_cpus; j++) { > + snprintf(string_cpu, ETH_GSTRING_LEN, "%s [CPU %d]", > + dpa_stats_percpu[i], j); > + memcpy(strings, string_cpu, ETH_GSTRING_LEN); > + strings += ETH_GSTRING_LEN; > + } > + snprintf(string_cpu, ETH_GSTRING_LEN, "%s [TOTAL]", > + dpa_stats_percpu[i]); > + memcpy(strings, string_cpu, ETH_GSTRING_LEN); > + strings += ETH_GSTRING_LEN; > + } > + memcpy(strings, dpa_stats_global, size); > +} This leaks uninitialized stack via a memcpy of uninitialized string_cpu bytes into user-space. Using char string_cpu[ETH_GSTRING_LEN] = {}; or a memset before each snprintf would fix it. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v7 02/10] ss: created formatters for json and hr
> Your perception is incorrect. > I am still maintaining iproute2. Phil is just providing lots of feedback > and new patches. No offense intendet! I know, Phil has already clarified things in the other responses. Maybe you overlooked those accidentally. > The size of the change makes it harder to digest, and I do think > adding JSON support is a good idea. Just concerned about the long > term maintainance overhead. Plus I want the other utilities to have > JSON output as well. Therefore this change is going to take longer > to adopt and hopefull we can figure out a good way to do this kind > of output. I see! Well, do you conceive a decentralized approach or a centralized one for all the utilities? Decentralizied would be as things are now – I mean what my patch series tries to amount to. > Almost want to go to C++ or something. Would that be viable or is that a wishful line of thougts? signature.asc Description: OpenPGP digital signature
RE: [PATCH][v2] net: phy: fix a bug in get_phy_c45_ids
> -Original Message- > From: David Miller [mailto:da...@davemloft.net] > Sent: Tuesday, November 03, 2015 12:02 PM > To: shh@gmail.com > Cc: netdev@vger.kernel.org; f.faine...@gmail.com; Xie Shaohui-B21989 > Subject: Re: [PATCH][v2] net: phy: fix a bug in get_phy_c45_ids > > From:> Date: Mon, 2 Nov 2015 18:48:33 +0800 > > > @@ -205,6 +205,37 @@ struct phy_device *phy_device_create(struct > > mii_bus *bus, int addr, int phy_id, } > > EXPORT_SYMBOL(phy_device_create); > > > > + /* get_phy_c45_devs_in_pkg - reads a MMD's devices in package > registers. > > + * @bus: the target MII bus > > Please remove the leading space on the first line of this new comment. OK. Fixed in V3. Thank you! --Shaohui -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net] net: avoid NULL deref in inet_ctl_sock_destroy()
On Mon, 2015-11-02 at 22:46 -0500, David Miller wrote: > From: Eric Dumazet> Date: Mon, 02 Nov 2015 07:50:07 -0800 > > > From: Eric Dumazet > > > > Under low memory conditions, tcp_sk_init() and icmp_sk_init() > > can both iterate on all possible cpus and call inet_ctl_sock_destroy(), > > with eventual NULL pointer. > > > > Signed-off-by: Eric Dumazet > > Reported-by: Dmitry Vyukov > > Applied. Thanks David. Bug origin was in linux-4.2 : commit 26abe14379f8e2fa3fd1bcf97c9a7ad9364886fe Author: Eric W. Biederman Date: Fri May 8 21:10:31 2015 -0500 net: Modify sk_alloc to not reference count the netns of kernel sockets. Now that sk_alloc knows when a kernel socket is being allocated modify it to not reference count the network namespace of kernel sockets. Keep track of if a socket needs reference counting by adding a flag to struct sock called sk_net_refcnt. Update all of the callers of sock_create_kern to stop using sk_change_net and sk_release_kernel as those hacks are no longer needed, to avoid reference counting a kernel socket. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next v2] mpls: support for dead routes
From: Roopa PrabhuAdds support for RTNH_F_DEAD and RTNH_F_LINKDOWN flags on mpls routes due to link events. Also adds code to ignore dead routes during route selection Signed-off-by: Roopa Prabhu --- RFC to v1: Addressed a few comments from Eric and Robert: - remove support for weighted nexthops - Use rt_nhn_alive in the rt structure to keep count of alive routes. What i have not done is: sort nexthops on link events. I am not comfortable recreating or sorting nexthops on every carrier change. This leaves scope for optimizing in the future v1 to v2: Fix dead nexthop checks as suggested by dave net/mpls/af_mpls.c | 191 net/mpls/internal.h | 3 + 2 files changed, 166 insertions(+), 28 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index c70d750..5e88118 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -96,22 +96,15 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) } EXPORT_SYMBOL_GPL(mpls_pkt_too_big); -static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, -struct sk_buff *skb, bool bos) +static u32 mpls_multipath_hash(struct mpls_route *rt, + struct sk_buff *skb, bool bos) { struct mpls_entry_decoded dec; struct mpls_shim_hdr *hdr; bool eli_seen = false; int label_index; - int nh_index = 0; u32 hash = 0; - /* No need to look further into packet if there's only -* one path -*/ - if (rt->rt_nhn == 1) - goto out; - for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos; label_index++) { if (!pskb_may_pull(skb, sizeof(*hdr) * label_index)) @@ -165,9 +158,37 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, } } - nh_index = hash % rt->rt_nhn; + return hash; +} + +static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, +struct sk_buff *skb, bool bos) +{ + u32 hash = 0; + int nh_index; + int n = 0; + + /* No need to look further into packet if there's only +* one path +*/ + if (rt->rt_nhn == 1) + goto out; + + if (rt->rt_nhn_alive <= 0) + return NULL; + + hash = mpls_multipath_hash(rt, skb, bos); + nh_index = hash % rt->rt_nhn_alive; + for_nexthops(rt) { + if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + continue; + if (n == nh_index) + return nh; + n++; + } endfor_nexthops(rt); + out: - return >rt_nh[nh_index]; + return >rt_nh[0]; } static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, @@ -365,6 +386,7 @@ static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen) GFP_KERNEL); if (rt) { rt->rt_nhn = num_nh; + rt->rt_nhn_alive = num_nh; rt->rt_max_alen = max_alen_aligned; } @@ -536,6 +558,15 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, RCU_INIT_POINTER(nh->nh_dev, dev); + if (!netif_carrier_ok(dev)) + nh->nh_flags |= RTNH_F_LINKDOWN; + + if (!(dev->flags & IFF_UP)) + nh->nh_flags |= RTNH_F_DEAD; + + if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + rt->rt_nhn_alive--; + return 0; errout: @@ -577,7 +608,7 @@ errout: } static int mpls_nh_build(struct net *net, struct mpls_route *rt, -struct mpls_nh *nh, int oif, +struct mpls_nh *nh, int oif, int hops, struct nlattr *via, struct nlattr *newdst) { int err = -ENOMEM; @@ -666,7 +697,7 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, /* neither weighted multipath nor any flags * are supported */ - if (rtnh->rtnh_hops || rtnh->rtnh_flags) + if (rtnh->rtnh_flags || rtnh->rtnh_flags) goto errout; attrlen = rtnh_attrlen(rtnh); @@ -681,8 +712,8 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, goto errout; err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, - rtnh->rtnh_ifindex, nla_via, - nla_newdst); + rtnh->rtnh_ifindex, rtnh->rtnh_hops, + nla_via, nla_newdst); if (err) goto errout; @@ -875,34 +906,100 @@ free: return
Re: [PATCH net-next] bridge: vlan: Use rcu_dereference instead of rtnl_dereference
Sun, Nov 01, 2015 at 05:31:45PM CET, ido...@mellanox.com wrote: >br_should_learn() is protected by RCU and not by RTNL, so use correct >flavor of nbp_vlan_group(). > >Fixes: 907b1e6e83ed ("bridge: vlan: use proper rcu for the vlgrp >member") >Signed-off-by: Ido Schimmel>Acked-by: Nikolay Aleksandrov Acked-by: Jiri Pirko -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[v7, 6/6] fsl/fman: Add FMan MAC driver
From: Igal LibermanThis patch adds the Ethernet MAC driver supporting the three different types of MACs: dTSEC, tGEC and mEMAC. Signed-off-by: Igal Liberman --- drivers/net/ethernet/freescale/fman/Makefile |3 +- drivers/net/ethernet/freescale/fman/mac.c| 980 ++ drivers/net/ethernet/freescale/fman/mac.h| 97 +++ 3 files changed, 1079 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/freescale/fman/mac.c create mode 100644 drivers/net/ethernet/freescale/fman/mac.h diff --git a/drivers/net/ethernet/freescale/fman/Makefile b/drivers/net/ethernet/freescale/fman/Makefile index 2eb0b9b..51fd2e6 100644 --- a/drivers/net/ethernet/freescale/fman/Makefile +++ b/drivers/net/ethernet/freescale/fman/Makefile @@ -1,6 +1,7 @@ subdir-ccflags-y += -I$(srctree)/drivers/net/ethernet/freescale/fman -obj-y += fsl_fman.o fsl_fman_mac.o +obj-y += fsl_fman.o fsl_fman_mac.o fsl_mac.o fsl_fman-objs := fman_muram.o fman.o fman_sp.o fman_port.o fsl_fman_mac-objs := fman_dtsec.o fman_memac.o fman_tgec.o +fsl_mac-objs += mac.o diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c new file mode 100644 index 000..9dd66bc --- /dev/null +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -0,0 +1,980 @@ +/* Copyright 2008-2015 Freescale Semiconductor, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mac.h" +#include "fman_mac.h" +#include "fman_dtsec.h" +#include "fman_tgec.h" +#include "fman_memac.h" + +#define MAC_DESCRIPTION "FSL FMan MAC API based driver" + +MODULE_LICENSE("Dual BSD/GPL"); + +MODULE_AUTHOR("Emil Medve "); + +MODULE_DESCRIPTION(MAC_DESCRIPTION); + +struct mac_priv_s { + struct device *dev; + void __iomem*vaddr; + u8 cell_index; + phy_interface_t phy_if; + struct fman *fman; + struct device_node *phy_node; + /* List of multicast addresses */ + struct list_headmc_addr_list; + struct platform_device *eth_dev; + struct fixed_phy_status *fixed_link; + u16 speed; + u16 max_speed; + + int (*enable)(struct fman_mac *mac_dev, enum comm_mode mode); + int (*disable)(struct fman_mac *mac_dev, enum comm_mode mode); +}; + +struct mac_address { + u8 addr[ETH_ALEN]; + struct list_head list; +}; + +static void mac_exception(void *_mac_dev, enum fman_mac_exceptions ex) +{ + struct mac_device *mac_dev; + struct mac_priv_s *priv; + + mac_dev = (struct mac_device *)_mac_dev; + priv = mac_dev->priv; + + if (ex == FM_MAC_EX_10G_RX_FIFO_OVFL) { + /* don't flag RX FIFO after the first */ + mac_dev->set_exception(mac_dev->fman_mac, +
[v7, 1/6] fsl/fman: Add FMan MURAM support
From: Igal LibermanAdd Frame Manager Multi-User RAM support. This internal FMan memory block is used by the FMan hardware modules, the management being made through the generic allocator. The FMan Internal memory, for example, is used for allocating transmit and receive FIFOs. Signed-off-by: Igal Liberman --- drivers/net/ethernet/freescale/Kconfig |1 + drivers/net/ethernet/freescale/Makefile |2 + drivers/net/ethernet/freescale/fman/Kconfig |8 ++ drivers/net/ethernet/freescale/fman/Makefile |5 + drivers/net/ethernet/freescale/fman/fman_muram.c | 159 ++ drivers/net/ethernet/freescale/fman/fman_muram.h | 51 +++ 6 files changed, 226 insertions(+) create mode 100644 drivers/net/ethernet/freescale/fman/Kconfig create mode 100644 drivers/net/ethernet/freescale/fman/Makefile create mode 100644 drivers/net/ethernet/freescale/fman/fman_muram.c create mode 100644 drivers/net/ethernet/freescale/fman/fman_muram.h diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index ff76d4e..f3f89cc 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -53,6 +53,7 @@ config FEC_MPC52xx_MDIO If compiled as module, it will be called fec_mpc52xx_phy. source "drivers/net/ethernet/freescale/fs_enet/Kconfig" +source "drivers/net/ethernet/freescale/fman/Kconfig" config FSL_PQ_MDIO tristate "Freescale PQ MDIO" diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile index 71debd1..4097c58 100644 --- a/drivers/net/ethernet/freescale/Makefile +++ b/drivers/net/ethernet/freescale/Makefile @@ -17,3 +17,5 @@ gianfar_driver-objs := gianfar.o \ gianfar_ethtool.o obj-$(CONFIG_UCC_GETH) += ucc_geth_driver.o ucc_geth_driver-objs := ucc_geth.o ucc_geth_ethtool.o + +obj-$(CONFIG_FSL_FMAN) += fman/ diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig new file mode 100644 index 000..66b7296 --- /dev/null +++ b/drivers/net/ethernet/freescale/fman/Kconfig @@ -0,0 +1,8 @@ +config FSL_FMAN + bool "FMan support" + depends on FSL_SOC || COMPILE_TEST + select GENERIC_ALLOCATOR + default n + help + Freescale Data-Path Acceleration Architecture Frame Manager + (FMan) support diff --git a/drivers/net/ethernet/freescale/fman/Makefile b/drivers/net/ethernet/freescale/fman/Makefile new file mode 100644 index 000..fc2e194 --- /dev/null +++ b/drivers/net/ethernet/freescale/fman/Makefile @@ -0,0 +1,5 @@ +subdir-ccflags-y += -I$(srctree)/drivers/net/ethernet/freescale/fman + +obj-y += fsl_fman.o + +fsl_fman-objs := fman_muram.o diff --git a/drivers/net/ethernet/freescale/fman/fman_muram.c b/drivers/net/ethernet/freescale/fman/fman_muram.c new file mode 100644 index 000..35d4a50 --- /dev/null +++ b/drivers/net/ethernet/freescale/fman/fman_muram.c @@ -0,0 +1,159 @@ +/* + * Copyright 2008-2015 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "fman_muram.h" + +#include +#include +#include
[v7, 4/6] fsl/fman: Add FMan SP support
From: Igal LibermanThe Storage Profiles contain parameters that are used by the FMan for frame reception and transmission. Signed-off-by: Igal Liberman --- drivers/net/ethernet/freescale/fman/Makefile |2 +- drivers/net/ethernet/freescale/fman/fman_sp.c | 167 + drivers/net/ethernet/freescale/fman/fman_sp.h | 103 +++ 3 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/freescale/fman/fman_sp.c create mode 100644 drivers/net/ethernet/freescale/fman/fman_sp.h diff --git a/drivers/net/ethernet/freescale/fman/Makefile b/drivers/net/ethernet/freescale/fman/Makefile index 43360d70..5141532 100644 --- a/drivers/net/ethernet/freescale/fman/Makefile +++ b/drivers/net/ethernet/freescale/fman/Makefile @@ -2,5 +2,5 @@ subdir-ccflags-y += -I$(srctree)/drivers/net/ethernet/freescale/fman obj-y += fsl_fman.o fsl_fman_mac.o -fsl_fman-objs := fman_muram.o fman.o +fsl_fman-objs := fman_muram.o fman.o fman_sp.o fsl_fman_mac-objs := fman_dtsec.o fman_memac.o fman_tgec.o diff --git a/drivers/net/ethernet/freescale/fman/fman_sp.c b/drivers/net/ethernet/freescale/fman/fman_sp.c new file mode 100644 index 000..f36c622 --- /dev/null +++ b/drivers/net/ethernet/freescale/fman/fman_sp.c @@ -0,0 +1,167 @@ +/* + * Copyright 2008 - 2015 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "fman_sp.h" +#include "fman.h" + +void fman_sp_set_buf_pools_in_asc_order_of_buf_sizes(struct fman_ext_pools +*fm_ext_pools, +u8 *ordered_array, +u16 *sizes_array) +{ + u16 buf_size = 0; + int i = 0, j = 0, k = 0; + + /* First we copy the external buffers pools information +* to an ordered local array +*/ + for (i = 0; i < fm_ext_pools->num_of_pools_used; i++) { + /* get pool size */ + buf_size = fm_ext_pools->ext_buf_pool[i].size; + + /* keep sizes in an array according to poolId +* for direct access +*/ + sizes_array[fm_ext_pools->ext_buf_pool[i].id] = buf_size; + + /* save poolId in an ordered array according to size */ + for (j = 0; j <= i; j++) { + /* this is the next free place in the array */ + if (j == i) + ordered_array[i] = + fm_ext_pools->ext_buf_pool[i].id; + else { + /* find the right place for this poolId */ + if (buf_size < sizes_array[ordered_array[j]]) { + /* move the pool_ids one place ahead +* to make room for this poolId +*/ + for (k = i; k > j; k--) + ordered_array[k] = +
[net-next v4 0/8] dpaa_eth: Add the Freescale DPAA Ethernet driver
This patch series adds the Ethernet driver for the Freescale QorIQ Data Path Acceleration Architecture (DPAA). This version includes changes following the feedback received on previous versions from Eric Dumazet, Bob Cochran, Joe Perches, Paul Bolle, Joakim Tjernlund, Scott Wood, David Miller - thank you. Together with the driver a managed version of alloc_percpu is provided that simplifies the release of per-CPU memory. The Freescale DPAA architecture consists in a series of hardware blocks that support the Ethernet connectivity. The Ethernet driver depends upon the following drivers that are currently in the Linux kernel or in review (the underlying drivers are not inter-dependent): - Peripheral Access Memory Unit (PAMU) drivers/iommu/fsl_* - Frame Manager (FMan) drivers/net/ethernet/freescale/fman - Queue Manager (QMan), Buffer Manager (BMan) drivers/soc/fsl/qbman dpaa_eth interfaces mapping to FMan MACs: dpaa_eth /eth0\ ... /ethN\ driver| | | | - --- - -Ports / Tx Rx \.../ Tx Rx \ FMan| | | | -MACs | MAC0 | | MACN | / dtsec0 \ ... / dtsecN \ (or tgec) / \ / \(or memac) - -- --- -- - FMan, FMan Port, FMan SP, FMan MURAM drivers - FMan HW blocks: MURAM, MACs, Ports, SP - dpaa_eth relation to QMan, FMan: dpaa_eth /eth0\ driver/ \ - -^- -^- -^- ---- QMan driver / \ / \ / \ \ / | BMan| |Rx | |Rx | |Tx | |Tx | | driver | - |Dfl| |Err| |Cnf| |FQs| | | QMan HW|FQ | |FQ | |FQ | | | | | / \ / \ / \ \ / | | - --- --- --- -v-- |FMan QMI | | | FMan HW FMan BMI | BMan HW | --- where the acronyms used above (and in the code) are: DPAA = Data Path Acceleration Architecture FMan = DPAA Frame Manager QMan = DPAA Queue Manager BMan = DPAA Buffers Manager QMI = QMan interface in FMan BMI = BMan interface in FMan FMan SP = FMan Storage Profiles MURAM = Multi-user RAM in FMan FQ = QMan Frame Queue Rx Dfl FQ = default reception FQ Rx Err FQ = Rx error frames FQ Tx Cnf FQ = Tx confirmation FQ Tx FQs = transmission frame queues dtsec = datapath three speed Ethernet controller (10/100/1000 Mbps) tgec = ten gigabit Ethernet controller (10 Gbps) memac = multirate Ethernet MAC (10/100/1000/1) The latest FMan driver patches were submitted by Igal Liberman: https://patchwork.ozlabs.org/project/netdev/list/?submitter=64715=*=[v7, The latest Q/BMan drivers were submitted by Roy Pledge: https://patchwork.ozlabs.org/project/linuxppc-dev/list/?submitter=66331=* Changes from v3: - removed bogus delay and comment in .ndo_stop implementation - addressed minor issues reported by David Miller Changes from v2: - removed debugfs, moved exports to ethtool statistics - removed congestion groups Kconfig params Changes from v1: - bpool level Kconfig options removed - print format using pr_fmt, cleaned up prints - __hot/__cold removed - gratuitous unlikely() removed - code style aligned, consistent spacing for declarations - comment formatting The complete patch set based on the latest net-next/master kernel can be found in the public git at: http://git.freescale.com/git/cgit.cgi/ppc/upstream/linux.git under the tag ldup_public_git_20151102: http://git.freescale.com/git/cgit.cgi/ppc/upstream/linux.git/log/?h=ldup_public_git_20151102 There is one u-boot patch that one needs to make sure it's applied to align u-boot to the latest device tree binding document specification used by the FMan driver. Please make sure your u-boot includes this patch: commit 97a8d010e029111e5711a45264a726bedbeb24c4 Author: Igal LibermanDate: Tue Aug 18 14:47:05 2015 +0300 net/fman: Support both new and legacy FMan Compatibles The patch was included in u-boot in v2015.10-rc3. Madalin Bucur (8): devres: add devm_alloc_percpu() dpaa_eth: add support for DPAA Ethernet dpaa_eth: add support for S/G frames dpaa_eth: add driver's Tx queue selection dpaa_eth: add ethtool functionality dpaa_eth: add ethtool statistics dpaa_eth: add sysfs exports dpaa_eth: add trace points Documentation/driver-model/devres.txt |4 + drivers/base/devres.c | 64 + drivers/net/ethernet/freescale/Kconfig |2 +
[net-next v4 3/8] dpaa_eth: add support for S/G frames
Add support for Scater/Gather (S/G) frames. The FMan can place the frame content into multiple buffers and provide a S/G Table (SGT) into one first buffer with references to the others. Signed-off-by: Madalin Bucur--- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 6 + drivers/net/ethernet/freescale/dpaa/dpaa_eth.h | 2 +- .../net/ethernet/freescale/dpaa/dpaa_eth_common.c | 50 ++- .../net/ethernet/freescale/dpaa/dpaa_eth_common.h | 2 + drivers/net/ethernet/freescale/dpaa/dpaa_eth_sg.c | 335 +++-- 5 files changed, 374 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 8381616..31d55b4 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -463,6 +463,12 @@ static int dpa_private_netdev_init(struct net_device *net_dev) net_dev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_LLTX); + /* Advertise S/G and HIGHDMA support for private interfaces */ + net_dev->hw_features |= NETIF_F_SG | NETIF_F_HIGHDMA; + /* Recent kernels enable GSO automatically, if +* we declare NETIF_F_SG. For conformity, we'll +* still declare GSO explicitly. +*/ net_dev->features |= NETIF_F_GSO; return dpa_netdev_init(net_dev, mac_addr, tx_timeout); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h index 1cc8682..1ba6617 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h @@ -347,7 +347,7 @@ static inline void clear_fd(struct qm_fd *fd) } static inline int _dpa_tx_fq_to_id(const struct dpa_priv_s *priv, - struct qman_fq *tx_fq) + struct qman_fq *tx_fq) { int i; diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c index 963be4d8..b36cbca 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c @@ -1177,10 +1177,42 @@ void dpaa_eth_init_ports(struct mac_device *mac_dev, port_fqs->rx_defq, _layout[RX]); } +void dpa_release_sgt(struct qm_sg_entry *sgt) +{ + struct dpa_bp *dpa_bp; + struct bm_buffer bmb[DPA_BUFF_RELEASE_MAX]; + u8 i = 0, j; + + memset(bmb, 0, sizeof(bmb)); + + do { + dpa_bp = dpa_bpid2pool(sgt[i].bpid); + DPA_ERR_ON(!dpa_bp); + + j = 0; + do { + DPA_ERR_ON(sgt[i].extension); + + bmb[j].hi = sgt[i].addr_hi; + bmb[j].lo = be32_to_cpu(sgt[i].addr_lo); + + j++; i++; + } while (j < ARRAY_SIZE(bmb) && + !sgt[i - 1].final && + sgt[i - 1].bpid == sgt[i].bpid); + + while (bman_release(dpa_bp->pool, bmb, j, 0)) + cpu_relax(); + } while (!sgt[i - 1].final); +} + void dpa_fd_release(const struct net_device *net_dev, const struct qm_fd *fd) { + struct qm_sg_entry *sgt; struct dpa_bp *dpa_bp; struct bm_buffer bmb; + dma_addr_t addr; + void *vaddr; memset(, 0, sizeof(bmb)); bm_buffer_set64(, fd->addr); @@ -1188,7 +1220,23 @@ void dpa_fd_release(const struct net_device *net_dev, const struct qm_fd *fd) dpa_bp = dpa_bpid2pool(fd->bpid); DPA_ERR_ON(!dpa_bp); - DPA_ERR_ON(fd->format == qm_fd_sg); + if (fd->format == qm_fd_sg) { + vaddr = phys_to_virt(fd->addr); + sgt = vaddr + dpa_fd_offset(fd); + + dma_unmap_single(dpa_bp->dev, qm_fd_addr(fd), dpa_bp->size, +DMA_BIDIRECTIONAL); + + dpa_release_sgt(sgt); + + addr = dma_map_single(dpa_bp->dev, vaddr, dpa_bp->size, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dpa_bp->dev, addr)) { + dev_err(dpa_bp->dev, "DMA mapping failed"); + return; + } + bm_buffer_set64(, addr); + } while (bman_release(dpa_bp->pool, , 1, 0)) cpu_relax(); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h index 68843c0..9df8f14 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h @@ -37,6 +37,7 @@ #include "dpaa_eth.h" +#define DPA_SGT_MAX_ENTRIES 16 /* maximum number of entries in SG Table */ #define DPA_BUFF_RELEASE_MAX 8 /* maximum number of buffers released at
[net-next v4 8/8] dpaa_eth: add trace points
Add trace points on the hot processing path. Signed-off-by: Ruxandra Ioana Radulescu--- drivers/net/ethernet/freescale/dpaa/Makefile | 1 + drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 12 ++ drivers/net/ethernet/freescale/dpaa/dpaa_eth.h | 4 + .../net/ethernet/freescale/dpaa/dpaa_eth_trace.h | 141 + 4 files changed, 158 insertions(+) create mode 100644 drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h diff --git a/drivers/net/ethernet/freescale/dpaa/Makefile b/drivers/net/ethernet/freescale/dpaa/Makefile index 141ade4..15ed1c4 100644 --- a/drivers/net/ethernet/freescale/dpaa/Makefile +++ b/drivers/net/ethernet/freescale/dpaa/Makefile @@ -9,3 +9,4 @@ ccflags-y += -I$(FMAN) obj-$(CONFIG_FSL_DPAA_ETH) += fsl_dpa.o fsl_dpa-objs += dpaa_eth.o dpaa_eth_sg.o dpaa_eth_common.o dpaa_ethtool.o dpaa_eth_sysfs.o +CFLAGS_dpaa_eth.o := -I$(src) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 3cd03f5..b939d9d 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -57,6 +57,12 @@ #include "dpaa_eth.h" #include "dpaa_eth_common.h" +/* CREATE_TRACE_POINTS only needs to be defined once. Other dpa files + * using trace events only need to #include + */ +#define CREATE_TRACE_POINTS +#include "dpaa_eth_trace.h" + /* Valid checksum indication */ #define DPA_CSUM_VALID 0x @@ -229,6 +235,9 @@ priv_rx_default_dqrr(struct qman_portal *portal, priv = netdev_priv(net_dev); dpa_bp = priv->dpa_bp; + /* Trace the Rx fd */ + trace_dpa_rx_fd(net_dev, fq, >fd); + /* IRQ handler, non-migratable; safe to use raw_cpu_ptr here */ percpu_priv = raw_cpu_ptr(priv->percpu_priv); count_ptr = raw_cpu_ptr(dpa_bp->percpu_count); @@ -285,6 +294,9 @@ priv_tx_conf_default_dqrr(struct qman_portal *portal, net_dev = ((struct dpa_fq *)fq)->net_dev; priv = netdev_priv(net_dev); + /* Trace the fd */ + trace_dpa_tx_conf_fd(net_dev, fq, >fd); + /* Non-migratable context, safe to use raw_cpu_ptr */ percpu_priv = raw_cpu_ptr(priv->percpu_priv); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h index cdc7595..7dee8de 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h @@ -36,6 +36,7 @@ #include "fman.h" #include "mac.h" +#include "dpaa_eth_trace.h" extern int dpa_rx_extra_headroom; extern int dpa_max_frm; @@ -407,6 +408,9 @@ static inline int dpa_xmit(struct dpa_priv_s *priv, if (fd->bpid == 0xff) fd->cmd |= qman_fq_fqid(priv->conf_fqs[queue]); + /* Trace this Tx fd */ + trace_dpa_tx_fd(priv->net_dev, egress_fq, fd); + for (i = 0; i < 10; i++) { err = qman_enqueue(egress_fq, fd, 0); if (err != -EBUSY) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h new file mode 100644 index 000..3b67477 --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h @@ -0,0 +1,141 @@ +/* Copyright 2013-2015 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING
Re: [PATCH] bpf: convert hashtab lock to raw lock
On 11/2/2015 12:59 AM, Thomas Gleixner wrote: On Sun, 1 Nov 2015, Alexei Starovoitov wrote: On Sat, Oct 31, 2015 at 09:47:36AM -0400, Steven Rostedt wrote: On Fri, 30 Oct 2015 17:03:58 -0700 Alexei Starovoitovwrote: On Fri, Oct 30, 2015 at 03:16:26PM -0700, Yang Shi wrote: When running bpf samples on rt kernel, it reports the below warning: BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:917 in_atomic(): 1, irqs_disabled(): 128, pid: 477, name: ping Preemption disabled at:[] kprobe_perf_func+0x30/0x228 ... diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 83c209d..972b76b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -17,7 +17,7 @@ struct bpf_htab { struct bpf_map map; struct hlist_head *buckets; - spinlock_t lock; + raw_spinlock_t lock; How do we address such things in general? I bet there are tons of places around the kernel that call spin_lock from atomic. I'd hate to lose the benefits of lockdep of non-raw spin_lock just to make rt happy. You wont lose any benefits of lockdep. Lockdep still checks raw_spin_lock(). The only difference between raw_spin_lock and spin_lock is that in -rt spin_lock turns into an rt_mutex() and raw_spin_lock stays a spin lock. I see. The patch makes sense then. Would be good to document this peculiarity of spin_lock. I'm working on a document. Thanks Steven and Thomas for your elaboration and comment. Yang Thanks, tglx -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] bpf: convert hashtab lock to raw lock
On Mon, 02 Nov 2015 09:12:29 -0800 "Shi, Yang"wrote: > Yes, it is common practice for converting sleepable spin lock to raw > spin lock in -rt to avoid scheduling in atomic context bug. Note, in a lot of cases we don't just convert spin_locks to raw because of atomic context. There's times we need to change the design where the lock is not taken in atomic context (switching preempt_disable() to a local_lock() for example). But bpf is much like ftrace and kprobes where they can be taken almost anywhere, and the do indeed need to be raw. -- Steve -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] bpf: convert hashtab lock to raw lock
On 11/2/2015 9:24 AM, Steven Rostedt wrote: On Mon, 02 Nov 2015 09:12:29 -0800 "Shi, Yang"wrote: Yes, it is common practice for converting sleepable spin lock to raw spin lock in -rt to avoid scheduling in atomic context bug. Note, in a lot of cases we don't just convert spin_locks to raw because of atomic context. There's times we need to change the design where the lock is not taken in atomic context (switching preempt_disable() to a local_lock() for example). Yes, definitely. Understood. Thanks, Yang But bpf is much like ftrace and kprobes where they can be taken almost anywhere, and the do indeed need to be raw. -- Steve -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[v7, 2/6] fsl/fman: Add FMan support
From: Igal LibermanAdd the Data Path Acceleration Architecture Frame Manger Driver. The FMan embeds a series of hardware blocks that implement a group of Ethernet interfaces. This patch adds The FMan configuration, initialization and runtime control routines. The FMan driver supports several hardware versions differentiated by things like: - Different type of MACs - Number of MAC and ports - Available resources - Different hardware errata Signed-off-by: Igal Liberman --- drivers/net/ethernet/freescale/fman/Makefile |2 +- drivers/net/ethernet/freescale/fman/fman.c | 2876 ++ drivers/net/ethernet/freescale/fman/fman.h | 325 +++ 3 files changed, 3202 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/freescale/fman/fman.c create mode 100644 drivers/net/ethernet/freescale/fman/fman.h diff --git a/drivers/net/ethernet/freescale/fman/Makefile b/drivers/net/ethernet/freescale/fman/Makefile index fc2e194..fb5a7f0 100644 --- a/drivers/net/ethernet/freescale/fman/Makefile +++ b/drivers/net/ethernet/freescale/fman/Makefile @@ -2,4 +2,4 @@ subdir-ccflags-y += -I$(srctree)/drivers/net/ethernet/freescale/fman obj-y += fsl_fman.o -fsl_fman-objs := fman_muram.o +fsl_fman-objs := fman_muram.o fman.o diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c new file mode 100644 index 000..f97a52b --- /dev/null +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -0,0 +1,2876 @@ +/* + * Copyright 2008-2015 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "fman.h" +#include "fman_muram.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* General defines */ +#define FMAN_LIODN_TBL 64 /* size of LIODN table */ +#define MAX_NUM_OF_MACS10 +#define FM_NUM_OF_FMAN_CTRL_EVENT_REGS 4 +#define BASE_RX_PORTID 0x08 +#define BASE_TX_PORTID 0x28 + +/* Modules registers offsets */ +#define BMI_OFFSET 0x0008 +#define QMI_OFFSET 0x00080400 +#define DMA_OFFSET 0x000C2000 +#define FPM_OFFSET 0x000C3000 +#define IMEM_OFFSET0x000C4000 +#define CGP_OFFSET 0x000DB000 + +/* Exceptions bit map */ +#define EX_DMA_BUS_ERROR 0x8000 +#define EX_DMA_READ_ECC0x4000 +#define EX_DMA_SYSTEM_WRITE_ECC0x2000 +#define EX_DMA_FM_WRITE_ECC0x1000 +#define EX_FPM_STALL_ON_TASKS 0x0800 +#define EX_FPM_SINGLE_ECC 0x0400 +#define EX_FPM_DOUBLE_ECC 0x0200 +#define EX_QMI_SINGLE_ECC 0x0100 +#define EX_QMI_DEQ_FROM_UNKNOWN_PORTID 0x0080 +#define EX_QMI_DOUBLE_ECC 0x0040 +#define EX_BMI_LIST_RAM_ECC0x0020 +#define EX_BMI_STORAGE_PROFILE_ECC 0x0010 +#define EX_BMI_STATISTICS_RAM_ECC 0x0008 +#define EX_IRAM_ECC0x0004 +#define EX_MURAM_ECC
[v7, 5/6] fsl/fman: Add FMan Port Support
From: Igal LibermanAdd the Data Path Acceleration Architecture Frame Manger Port Driver. The FMan driver uses a module called "Port" to represent the physical TX and RX ports. Each FMan version has different number of physical ports. This patch adds The FMan Port configuration, initialization and runtime control routines for both TX and RX. Signed-off-by: Igal Liberman --- drivers/net/ethernet/freescale/fman/Makefile|2 +- drivers/net/ethernet/freescale/fman/fman_port.c | 1779 +++ drivers/net/ethernet/freescale/fman/fman_port.h | 151 ++ 3 files changed, 1931 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/freescale/fman/fman_port.c create mode 100644 drivers/net/ethernet/freescale/fman/fman_port.h diff --git a/drivers/net/ethernet/freescale/fman/Makefile b/drivers/net/ethernet/freescale/fman/Makefile index 5141532..2eb0b9b 100644 --- a/drivers/net/ethernet/freescale/fman/Makefile +++ b/drivers/net/ethernet/freescale/fman/Makefile @@ -2,5 +2,5 @@ subdir-ccflags-y += -I$(srctree)/drivers/net/ethernet/freescale/fman obj-y += fsl_fman.o fsl_fman_mac.o -fsl_fman-objs := fman_muram.o fman.o fman_sp.o +fsl_fman-objs := fman_muram.o fman.o fman_sp.o fman_port.o fsl_fman_mac-objs := fman_dtsec.o fman_memac.o fman_tgec.o diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c new file mode 100644 index 000..56ecf2b --- /dev/null +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -0,0 +1,1779 @@ +/* + * Copyright 2008 - 2015 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "fman_port.h" +#include "fman.h" +#include "fman_sp.h" + +#include +#include +#include +#include +#include +#include + +/* Queue ID */ +#define DFLT_FQ_ID 0x00FF + +/* General defines */ +#define PORT_BMI_FIFO_UNITS0x100 + +#define MAX_PORT_FIFO_SIZE(bmi_max_fifo_size) \ + min((u32)bmi_max_fifo_size, (u32)1024 * FMAN_BMI_FIFO_UNITS) + +#define PORT_CG_MAP_NUM8 +#define PORT_PRS_RESULT_WORDS_NUM 8 +#define PORT_IC_OFFSET_UNITS 0x10 + +#define MIN_EXT_BUF_SIZE 64 + +#define BMI_PORT_REGS_OFFSET 0 +#define QMI_PORT_REGS_OFFSET 0x400 + +/* Default values */ +#define DFLT_PORT_BUFFER_PREFIX_CONTEXT_DATA_ALIGN \ + DFLT_FM_SP_BUFFER_PREFIX_CONTEXT_DATA_ALIGN + +#define DFLT_PORT_CUT_BYTES_FROM_END 4 + +#define DFLT_PORT_ERRORS_TO_DISCARDFM_PORT_FRM_ERR_CLS_DISCARD +#define DFLT_PORT_MAX_FRAME_LENGTH 9600 + +#define DFLT_PORT_RX_FIFO_PRI_ELEVATION_LEV(bmi_max_fifo_size) \ + MAX_PORT_FIFO_SIZE(bmi_max_fifo_size) + +#define DFLT_PORT_RX_FIFO_THRESHOLD(major, bmi_max_fifo_size) \ + (major == 6 ? \ + MAX_PORT_FIFO_SIZE(bmi_max_fifo_size) : \ + (MAX_PORT_FIFO_SIZE(bmi_max_fifo_size) * 3 / 4))\ + +#define DFLT_PORT_EXTRA_NUM_OF_FIFO_BUFS 0 + +/* QMI defines */ +#define
[PATCH net-next] net: fix percpu memory leaks
From: Eric DumazetThis patch fixes following problems : 1) percpu_counter_init() can return an error, therefore init_frag_mem_limit() must propagate this error so that inet_frags_init_net() can do the same up to its callers. 2) If ip[46]_frags_ns_ctl_register() fail, we must unwind properly and free the percpu_counter. Without this fix, we leave freed object in percpu_counters global list (if CONFIG_HOTPLUG_CPU) leading to crashes. This bug was detected by KASAN and syzkaller tool (http://github.com/google/syzkaller) Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Hannes Frederic Sowa Cc: Jesper Dangaard Brouer --- include/net/inet_frag.h | 15 +-- net/ieee802154/6lowpan/reassembly.c | 11 --- net/ipv4/inet_fragment.c|6 -- net/ipv4/ip_fragment.c | 12 +--- net/ipv6/netfilter/nf_conntrack_reasm.c | 12 +--- net/ipv6/reassembly.c | 12 +--- 6 files changed, 44 insertions(+), 24 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 53eead2da743..ac42bbb37b2d 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -108,7 +108,15 @@ struct inet_frags { int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); -void inet_frags_init_net(struct netns_frags *nf); +static inline int inet_frags_init_net(struct netns_frags *nf) +{ + return percpu_counter_init(>mem, 0, GFP_KERNEL); +} +static inline void inet_frags_uninit_net(struct netns_frags *nf) +{ + percpu_counter_destroy(>mem); +} + void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); @@ -154,11 +162,6 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, int i) __percpu_counter_add(>mem, i, frag_percpu_counter_batch); } -static inline void init_frag_mem_limit(struct netns_frags *nf) -{ - percpu_counter_init(>mem, 0, GFP_KERNEL); -} - static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) { unsigned int res; diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 12e8cf4bda9f..6b437e8760d3 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -580,14 +580,19 @@ static int __net_init lowpan_frags_init_net(struct net *net) { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); + int res; ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; - inet_frags_init_net(_lowpan->frags); - - return lowpan_frags_ns_sysctl_register(net); + res = inet_frags_init_net(_lowpan->frags); + if (res) + return res; + res = lowpan_frags_ns_sysctl_register(net); + if (res) + inet_frags_uninit_net(_lowpan->frags); + return res; } static void __net_exit lowpan_frags_exit_net(struct net *net) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index d0a7c0319e3d..fe144dae7372 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -209,12 +209,6 @@ int inet_frags_init(struct inet_frags *f) } EXPORT_SYMBOL(inet_frags_init); -void inet_frags_init_net(struct netns_frags *nf) -{ - init_frag_mem_limit(nf); -} -EXPORT_SYMBOL(inet_frags_init_net); - void inet_frags_fini(struct inet_frags *f) { cancel_work_sync(>frags_work); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 5482745d5d68..1fe55ae81781 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -839,6 +839,8 @@ static void __init ip4_frags_ctl_register(void) static int __net_init ipv4_frags_init_net(struct net *net) { + int res; + /* Fragment cache limits. * * The fragment memory accounting code, (tries to) account for @@ -862,9 +864,13 @@ static int __net_init ipv4_frags_init_net(struct net *net) */ net->ipv4.frags.timeout = IP_FRAG_TIME; - inet_frags_init_net(>ipv4.frags); - - return ip4_frags_ns_ctl_register(net); + res = inet_frags_init_net(>ipv4.frags); + if (res) + return res; + res = ip4_frags_ns_ctl_register(net); + if (res) + inet_frags_uninit_net(>ipv4.frags); + return res; } static void __net_exit ipv4_frags_exit_net(struct net *net) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 99610547fccc..d5efeb87350e 100644 ---
Re: [PATCH net-next] af_unix: optimize unix_writable by inlining
On Mon, 2015-11-02 at 12:01 -0500, Aaron Conole wrote: > unix_writable() originally was inlined, but was changed as part of > commit 1586a5877db9 ("af_unix: do not report POLLOUT on > listeners"). Re-enable the inline flag. > > Signed-off-by: Aaron Conole> Cc: Eric Dumazet > --- We leave this to the compiler nowadays. If you take a look at disassembly, you'll see your patch has no effect at all. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kernel panic in 4.2.3, rb_erase in sch_fq
On Mon, 2015-11-02 at 17:58 +0200, Denys Fedoryshchenko wrote: > On 2015-11-02 17:24, Eric Dumazet wrote: > > On Mon, 2015-11-02 at 16:11 +0200, Denys Fedoryshchenko wrote: > >> Hi! > >> > >> Actually seems i was getting this panic for a while (once per week) on > >> loaded pppoe server, but just now was able to get full panic message. > >> After checking commit logs on sch_fq.c i didnt seen any fixes, so > >> probably upgrading to newer kernel wont help? > > > > I do not think we support sch_fq as a HTB leaf. > > > > If you want both HTB and sch_fq, you need to setup a bonding device. > > > > HTB on bond0 > > > > sch_fq on the slaves > > > > Sure, the kernel should not crash, but HTB+sch_fq on same net device is > > certainly not something that will work anyway. > Strange, because except ppp, on static devices it works really very well > in such scheme. It is the only solution that can throttle incoming > bandwidth, when bandwidth is very overbooked - reliably, for my use > cases, such as 256k+ flows/2.5Gbps and several different classes of > traffic, so using DRR will end up in just not enough classes. > > On latest kernels i had to patch tc to provide parameter for orphan mask > in fq, to increase number for flows for transit traffic. > None of other qdiscs able to solve this problem, incoming bandwidth > simply flowing 10-20% more than set, but fq is doing magic. > The only device that was working with similar efficiency for such cases > - proprietary PacketShaper, but is modifying tcp window size, and can't > be called transparent, and also has stability issues over 1Gbps. Ah, I was thinking you needed more like 10Gb traffic ;) with HTB on bonding, we can use MQ+FQ on the slaves in order to use many cpus to serve local traffic. But yes, if you use HTB+FQ for forwarding, I guess the bonding setup is not really needed. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v4 4/8] dpaa_eth: add driver's Tx queue selection
Allow the selection of the transmission queue based on the CPU id. Signed-off-by: Madalin Bucur--- drivers/net/ethernet/freescale/dpaa/Kconfig | 10 ++ drivers/net/ethernet/freescale/dpaa/dpaa_eth.c| 3 +++ drivers/net/ethernet/freescale/dpaa/dpaa_eth.h| 6 ++ drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c | 8 drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h | 4 5 files changed, 31 insertions(+) diff --git a/drivers/net/ethernet/freescale/dpaa/Kconfig b/drivers/net/ethernet/freescale/dpaa/Kconfig index 022d5aa..2577aac 100644 --- a/drivers/net/ethernet/freescale/dpaa/Kconfig +++ b/drivers/net/ethernet/freescale/dpaa/Kconfig @@ -11,6 +11,16 @@ menuconfig FSL_DPAA_ETH if FSL_DPAA_ETH +config FSL_DPAA_ETH_USE_NDO_SELECT_QUEUE + bool "Use driver's Tx queue selection mechanism" + default y + ---help--- + The DPAA Ethernet driver defines a ndo_select_queue() callback for optimal selection + of the egress FQ. That will override the XPS support for this netdevice. + If for whatever reason you want to be in control of the egress FQ-to-CPU selection and mapping, + or simply don't want to use the driver's ndo_select_queue() callback, then unselect this + and use the standard XPS support instead. + config FSL_DPAA_ETH_FRIENDLY_IF_NAME bool "Use fmX-macY names for the DPAA interfaces" default y diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 31d55b4..894f1a7 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -390,6 +390,9 @@ static const struct net_device_ops dpa_private_ops = { .ndo_get_stats64 = dpa_get_stats64, .ndo_set_mac_address = dpa_set_mac_address, .ndo_validate_addr = eth_validate_addr, +#ifdef CONFIG_FSL_DPAA_ETH_USE_NDO_SELECT_QUEUE + .ndo_select_queue = dpa_select_queue, +#endif .ndo_change_mtu = dpa_change_mtu, .ndo_set_rx_mode = dpa_set_rx_mode, .ndo_init = dpa_ndo_init, diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h index 1ba6617..87577cf 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h @@ -420,9 +420,15 @@ static inline void _dpa_assign_wq(struct dpa_fq *fq) } } +#ifdef CONFIG_FSL_DPAA_ETH_USE_NDO_SELECT_QUEUE +/* Use in lieu of skb_get_queue_mapping() */ +#define dpa_get_queue_mapping(skb) \ + raw_smp_processor_id() +#else /* Use the queue selected by XPS */ #define dpa_get_queue_mapping(skb) \ skb_get_queue_mapping(skb) +#endif static inline void _dpa_bp_free_pf(void *addr) { diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c index b36cbca..89f3b1f 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c @@ -593,6 +593,14 @@ bool dpa_bpid2pool_use(int bpid) return false; } +#ifdef CONFIG_FSL_DPAA_ETH_USE_NDO_SELECT_QUEUE +u16 dpa_select_queue(struct net_device *net_dev, struct sk_buff *skb, +void *accel_priv, select_queue_fallback_t fallback) +{ + return dpa_get_queue_mapping(skb); +} +#endif + struct dpa_fq *dpa_fq_alloc(struct device *dev, const struct fqid_cell *fqids, struct list_head *list, diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h index 9df8f14..2e9471d 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h @@ -70,6 +70,10 @@ struct dpa_bp *dpa_bpid2pool(int bpid); void dpa_bpid2pool_map(int bpid, struct dpa_bp *dpa_bp); bool dpa_bpid2pool_use(int bpid); void dpa_bp_drain(struct dpa_bp *bp); +#ifdef CONFIG_FSL_DPAA_ETH_USE_NDO_SELECT_QUEUE +u16 dpa_select_queue(struct net_device *net_dev, struct sk_buff *skb, +void *accel_priv, select_queue_fallback_t fallback); +#endif struct dpa_fq *dpa_fq_alloc(struct device *dev, const struct fqid_cell *fqids, struct list_head *list, -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[v7, 3/6] fsl/fman: Add FMan MAC support
From: Igal LibermanAdd the Data Path Acceleration Architecture Frame Manger MAC support. This patch adds The FMan MAC configuration, initialization and runtime control routines. This patch contains support for these types of MACs: - dTSEC: Three speed Ethernet controller (10/100/1000 Mbps) - tGEC: 10G Ethernet controller (10 Gbps) - mEMAC: Multi-rate Ethernet MAC (10/100/1000/1 Mbps) Different FMan revisions have different type and number of MACs. Signed-off-by: Igal Liberman --- drivers/net/ethernet/freescale/fman/Makefile |3 +- .../net/ethernet/freescale/fman/crc_mac_addr_ext.h | 314 drivers/net/ethernet/freescale/fman/fman_dtsec.c | 1609 drivers/net/ethernet/freescale/fman/fman_dtsec.h | 59 + drivers/net/ethernet/freescale/fman/fman_mac.h | 276 drivers/net/ethernet/freescale/fman/fman_memac.c | 1307 drivers/net/ethernet/freescale/fman/fman_memac.h | 60 + drivers/net/ethernet/freescale/fman/fman_tgec.c| 798 ++ drivers/net/ethernet/freescale/fman/fman_tgec.h| 55 + 9 files changed, 4480 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/freescale/fman/crc_mac_addr_ext.h create mode 100644 drivers/net/ethernet/freescale/fman/fman_dtsec.c create mode 100644 drivers/net/ethernet/freescale/fman/fman_dtsec.h create mode 100644 drivers/net/ethernet/freescale/fman/fman_mac.h create mode 100644 drivers/net/ethernet/freescale/fman/fman_memac.c create mode 100644 drivers/net/ethernet/freescale/fman/fman_memac.h create mode 100644 drivers/net/ethernet/freescale/fman/fman_tgec.c create mode 100644 drivers/net/ethernet/freescale/fman/fman_tgec.h diff --git a/drivers/net/ethernet/freescale/fman/Makefile b/drivers/net/ethernet/freescale/fman/Makefile index fb5a7f0..43360d70 100644 --- a/drivers/net/ethernet/freescale/fman/Makefile +++ b/drivers/net/ethernet/freescale/fman/Makefile @@ -1,5 +1,6 @@ subdir-ccflags-y += -I$(srctree)/drivers/net/ethernet/freescale/fman -obj-y += fsl_fman.o +obj-y += fsl_fman.o fsl_fman_mac.o fsl_fman-objs := fman_muram.o fman.o +fsl_fman_mac-objs := fman_dtsec.o fman_memac.o fman_tgec.o diff --git a/drivers/net/ethernet/freescale/fman/crc_mac_addr_ext.h b/drivers/net/ethernet/freescale/fman/crc_mac_addr_ext.h new file mode 100644 index 000..92f2e87 --- /dev/null +++ b/drivers/net/ethernet/freescale/fman/crc_mac_addr_ext.h @@ -0,0 +1,314 @@ +/* + * Copyright 2008-2015 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Define a macro that calculate the crc value of an Ethernet MAC address + * (48 bitd address) + */ + +#ifndef __crc_mac_addr_ext_h +#define __crc_mac_addr_ext_h + +#include + +static u32 crc_table[256] = { + 0x, + 0x77073096, + 0xee0e612c, + 0x990951ba, + 0x076dc419, + 0x706af48f, + 0xe963a535, + 0x9e6495a3, + 0x0edb8832, + 0x79dcb8a4, + 0xe0d5e91e, + 0x97d2d988, + 0x09b64c2b, + 0x7eb17cbd, + 0xe7b82d07, + 0x90bf1d91, + 0x1db71064, + 0x6ab020f2, + 0xf3b97148, +
Re: [PATCH net] net: avoid NULL deref in inet_ctl_sock_destroy()
On Mon, Nov 2, 2015, at 16:50, Eric Dumazet wrote: > From: Eric Dumazet> > Under low memory conditions, tcp_sk_init() and icmp_sk_init() > can both iterate on all possible cpus and call inet_ctl_sock_destroy(), > with eventual NULL pointer. > > Signed-off-by: Eric Dumazet > Reported-by: Dmitry Vyukov Eric, was this a private report or some of those floating around publicly? Thanks, Hannes -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v4 1/8] devres: add devm_alloc_percpu()
Introduce managed counterparts for alloc_percpu() and free_percpu(). Add devm_alloc_percpu() and devm_free_percpu() into the managed interfaces list. Signed-off-by: Madalin BucurTested-by: Madalin-Cristian Bucur --- Documentation/driver-model/devres.txt | 4 +++ drivers/base/devres.c | 64 +++ include/linux/device.h| 19 +++ 3 files changed, 87 insertions(+) diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index 831a536..595fd1b 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -312,6 +312,10 @@ MEM devm_kvasprintf() devm_kzalloc() +PER-CPU MEM + devm_alloc_percpu() + devm_free_percpu() + PCI pcim_enable_device() : after success, all PCI ops become managed pcim_pin_device(): keep PCI device enabled after release diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 8754646..6c314cc 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "base.h" @@ -984,3 +985,66 @@ void devm_free_pages(struct device *dev, unsigned long addr) )); } EXPORT_SYMBOL_GPL(devm_free_pages); + +static void devm_percpu_release(struct device *dev, void *pdata) +{ + void __percpu *p; + + p = *(void __percpu **)pdata; + free_percpu(p); +} + +static int devm_percpu_match(struct device *dev, void *data, void *p) +{ + struct devres *devr = container_of(data, struct devres, data); + + return *(void **)devr->data == p; +} + +/** + * __devm_alloc_percpu - Resource-managed alloc_percpu + * @dev: Device to allocate per-cpu memory for + * @size: Size of per-cpu memory to allocate + * @align: Alignement of per-cpu memory to allocate + * + * Managed alloc_percpu. Per-cpu memory allocated with this function is + * automatically freed on driver detach. + * + * RETURNS: + * Pointer to allocated memory on success, NULL on failure. + */ +void __percpu *__devm_alloc_percpu(struct device *dev, size_t size, + size_t align) +{ + void *p; + void __percpu *pcpu; + + pcpu = __alloc_percpu(size, align); + if (!pcpu) + return NULL; + + p = devres_alloc(devm_percpu_release, sizeof(void *), GFP_KERNEL); + if (!p) + return NULL; + + *(void __percpu **)p = pcpu; + + devres_add(dev, p); + + return pcpu; +} +EXPORT_SYMBOL_GPL(__devm_alloc_percpu); + +/** + * devm_free_percpu - Resource-managed free_percpu + * @dev: Device this memory belongs to + * @pdata: Per-cpu memory to free + * + * Free memory allocated with devm_alloc_percpu(). + */ +void devm_free_percpu(struct device *dev, void __percpu *pdata) +{ + WARN_ON(devres_destroy(dev, devm_percpu_release, devm_percpu_match, + (void *)pdata)); +} +EXPORT_SYMBOL_GPL(devm_free_percpu); diff --git a/include/linux/device.h b/include/linux/device.h index 5d7bc63..b563cc5 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -673,6 +673,25 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res); int devm_add_action(struct device *dev, void (*action)(void *), void *data); void devm_remove_action(struct device *dev, void (*action)(void *), void *data); +/** + * devm_alloc_percpu - Resource-managed alloc_percpu + * @dev: Device to allocate per-cpu memory for + * @type: Type to allocate per-cpu memory for + * + * Managed alloc_percpu. Per-cpu memory allocated with this function is + * automatically freed on driver detach. + * + * RETURNS: + * Pointer to allocated memory on success, NULL on failure. + */ +#define devm_alloc_percpu(dev, type) \ + (typeof(type) __percpu *)__devm_alloc_percpu(dev, sizeof(type), \ +__alignof__(type)) + +void __percpu *__devm_alloc_percpu(struct device *dev, size_t size, + size_t align); +void devm_free_percpu(struct device *dev, void __percpu *pdata); + struct device_dma_parameters { /* * a low level driver may set these to teach IOMMU code about -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH v3 net] i40e: Look up MAC address in Open Firmware or IDPROM
> -Original Message- > From: Sowmini Varadhan [mailto:sowmini.varad...@oracle.com] > Sent: Sunday, November 01, 2015 4:07 PM > > On (11/01/15 21:03), Nelson, Shannon wrote: > > .. In the meantime, be sure to test what happens over a reset, such as > what > > happens when the MTU is changed. This will make sure that the replay > > of mac and vlan filters happens correctly. You'll want to test this > > with and without vlans. > > I assume you mean .1q (aka linux macvlan) as opposed to access/trunk > vlans? Yes, this is what I had in mind. > I will test that tomorrow but I did a quick sanity check on mtu, as well > as turning tso on/off which also restarts the driver (I believe), and > it was "fine", i.e., able to ping offlink hosts. > > --Sowmini Great, thanks. sln -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kernel panic in 4.2.3, rb_erase in sch_fq
On 2015-11-02 17:24, Eric Dumazet wrote: On Mon, 2015-11-02 at 16:11 +0200, Denys Fedoryshchenko wrote: Hi! Actually seems i was getting this panic for a while (once per week) on loaded pppoe server, but just now was able to get full panic message. After checking commit logs on sch_fq.c i didnt seen any fixes, so probably upgrading to newer kernel wont help? I do not think we support sch_fq as a HTB leaf. If you want both HTB and sch_fq, you need to setup a bonding device. HTB on bond0 sch_fq on the slaves Sure, the kernel should not crash, but HTB+sch_fq on same net device is certainly not something that will work anyway. Strange, because except ppp, on static devices it works really very well in such scheme. It is the only solution that can throttle incoming bandwidth, when bandwidth is very overbooked - reliably, for my use cases, such as 256k+ flows/2.5Gbps and several different classes of traffic, so using DRR will end up in just not enough classes. On latest kernels i had to patch tc to provide parameter for orphan mask in fq, to increase number for flows for transit traffic. None of other qdiscs able to solve this problem, incoming bandwidth simply flowing 10-20% more than set, but fq is doing magic. The only device that was working with similar efficiency for such cases - proprietary PacketShaper, but is modifying tcp window size, and can't be called transparent, and also has stability issues over 1Gbps. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kernel panic in 4.2.3, rb_erase in sch_fq
On 2015-11-02 18:12, Eric Dumazet wrote: On Mon, 2015-11-02 at 17:58 +0200, Denys Fedoryshchenko wrote: On 2015-11-02 17:24, Eric Dumazet wrote: > On Mon, 2015-11-02 at 16:11 +0200, Denys Fedoryshchenko wrote: >> Hi! >> >> Actually seems i was getting this panic for a while (once per week) on >> loaded pppoe server, but just now was able to get full panic message. >> After checking commit logs on sch_fq.c i didnt seen any fixes, so >> probably upgrading to newer kernel wont help? > > I do not think we support sch_fq as a HTB leaf. > > If you want both HTB and sch_fq, you need to setup a bonding device. > > HTB on bond0 > > sch_fq on the slaves > > Sure, the kernel should not crash, but HTB+sch_fq on same net device is > certainly not something that will work anyway. Strange, because except ppp, on static devices it works really very well in such scheme. It is the only solution that can throttle incoming bandwidth, when bandwidth is very overbooked - reliably, for my use cases, such as 256k+ flows/2.5Gbps and several different classes of traffic, so using DRR will end up in just not enough classes. On latest kernels i had to patch tc to provide parameter for orphan mask in fq, to increase number for flows for transit traffic. None of other qdiscs able to solve this problem, incoming bandwidth simply flowing 10-20% more than set, but fq is doing magic. The only device that was working with similar efficiency for such cases - proprietary PacketShaper, but is modifying tcp window size, and can't be called transparent, and also has stability issues over 1Gbps. Ah, I was thinking you needed more like 10Gb traffic ;) with HTB on bonding, we can use MQ+FQ on the slaves in order to use many cpus to serve local traffic. But yes, if you use HTB+FQ for forwarding, I guess the bonding setup is not really needed. Well, here country is very underdeveloped in matters of technology. 10G interfaces appeared in some ISP only this year. On the ppp interfaces where crash happening - it is even less bandwidth. Each user max 1-2Mbps(average usage 128kbps), 4.5k interfaces. But i have some more heavy setups there, around 9k pppoe users terminated on single server, (means 9k interfaces), about 2Gbps traffic passing thru. If i take non-FOSS solution, i will have to pay for software licenses $100k+, which is unbearable for local ISP. fq is not critical in this specific use case, i can use for ppp interfaces fifo or such, but i guess better to report a but :) -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v4 7/8] dpaa_eth: add sysfs exports
Export Frame Queue and Buffer Pool IDs through sysfs. Signed-off-by: Madalin Bucur--- drivers/net/ethernet/freescale/dpaa/Makefile | 2 +- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 + drivers/net/ethernet/freescale/dpaa/dpaa_eth.h | 3 + .../net/ethernet/freescale/dpaa/dpaa_eth_common.c | 2 + .../net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c | 167 + 5 files changed, 175 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c diff --git a/drivers/net/ethernet/freescale/dpaa/Makefile b/drivers/net/ethernet/freescale/dpaa/Makefile index 9b75d52..141ade4 100644 --- a/drivers/net/ethernet/freescale/dpaa/Makefile +++ b/drivers/net/ethernet/freescale/dpaa/Makefile @@ -8,4 +8,4 @@ ccflags-y += -I$(FMAN) obj-$(CONFIG_FSL_DPAA_ETH) += fsl_dpa.o -fsl_dpa-objs += dpaa_eth.o dpaa_eth_sg.o dpaa_eth_common.o dpaa_ethtool.o +fsl_dpa-objs += dpaa_eth.o dpaa_eth_sg.o dpaa_eth_common.o dpaa_ethtool.o dpaa_eth_sysfs.o diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 0b3332a..3cd03f5 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -756,6 +756,8 @@ dpaa_eth_priv_probe(struct platform_device *pdev) if (err < 0) goto netdev_init_failed; + dpaa_eth_sysfs_init(_dev->dev); + pr_info("Probed interface %s\n", net_dev->name); return 0; diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h index ccaadd9..cdc7595 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h @@ -371,6 +371,9 @@ static inline u16 dpa_get_headroom(struct dpa_buffer_layout_s *bl) return bl->data_align ? ALIGN(headroom, bl->data_align) : headroom; } +void dpaa_eth_sysfs_remove(struct device *dev); +void dpaa_eth_sysfs_init(struct device *dev); + void dpa_private_napi_del(struct net_device *net_dev); static inline void clear_fd(struct qm_fd *fd) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c index 4947cb9..2cf4565 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c @@ -299,6 +299,8 @@ int dpa_remove(struct platform_device *pdev) priv = netdev_priv(net_dev); + dpaa_eth_sysfs_remove(dev); + dev_set_drvdata(dev, NULL); unregister_netdev(net_dev); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c new file mode 100644 index 000..a6c71b1 --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c @@ -0,0 +1,167 @@ +/* Copyright 2008-2015 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include "dpaa_eth.h" +#include "mac.h" + +static ssize_t dpaa_eth_show_addr(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct
[net-next v4 5/8] dpaa_eth: add ethtool functionality
Add support for basic ethtool operations. Signed-off-by: Madalin Bucur--- drivers/net/ethernet/freescale/dpaa/Makefile | 2 +- .../net/ethernet/freescale/dpaa/dpaa_eth_common.c | 2 + .../net/ethernet/freescale/dpaa/dpaa_eth_common.h | 3 + drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c | 230 + 4 files changed, 236 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c diff --git a/drivers/net/ethernet/freescale/dpaa/Makefile b/drivers/net/ethernet/freescale/dpaa/Makefile index 3847ec7..9b75d52 100644 --- a/drivers/net/ethernet/freescale/dpaa/Makefile +++ b/drivers/net/ethernet/freescale/dpaa/Makefile @@ -8,4 +8,4 @@ ccflags-y += -I$(FMAN) obj-$(CONFIG_FSL_DPAA_ETH) += fsl_dpa.o -fsl_dpa-objs += dpaa_eth.o dpaa_eth_sg.o dpaa_eth_common.o +fsl_dpa-objs += dpaa_eth.o dpaa_eth_sg.o dpaa_eth_common.o dpaa_ethtool.o diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c index 89f3b1f..2b95696 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.c @@ -102,6 +102,8 @@ int dpa_netdev_init(struct net_device *net_dev, memcpy(net_dev->perm_addr, mac_addr, net_dev->addr_len); memcpy(net_dev->dev_addr, mac_addr, net_dev->addr_len); + net_dev->ethtool_ops = _ethtool_ops; + net_dev->needed_headroom = priv->tx_headroom; net_dev->watchdog_timeo = msecs_to_jiffies(tx_timeout); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h index 2e9471d..160a018 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_common.h @@ -43,6 +43,9 @@ /* used in napi related functions */ extern u16 qman_portal_max; +/* from dpa_ethtool.c */ +extern const struct ethtool_ops dpa_ethtool_ops; + int dpa_netdev_init(struct net_device *net_dev, const u8 *mac_addr, u16 tx_timeout); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c new file mode 100644 index 000..fa8ba69 --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -0,0 +1,230 @@ +/* Copyright 2008-2015 Freescale Semiconductor, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include + +#include "dpaa_eth.h" +#include "mac.h" +#include "dpaa_eth_common.h" + +static int dpa_get_settings(struct net_device *net_dev, + struct ethtool_cmd *et_cmd) +{ + int err; + struct dpa_priv_s *priv; + + priv = netdev_priv(net_dev); + + if (!priv->mac_dev->phy_dev) { + netdev_dbg(net_dev, "phy device not initialized\n"); + return 0; + } + + err = phy_ethtool_gset(priv->mac_dev->phy_dev, et_cmd); + + return err; +} + +static int dpa_set_settings(struct net_device *net_dev, + struct ethtool_cmd *et_cmd) +{ + int err; + struct dpa_priv_s *priv; + + priv =