Re: [PATCH v5 03/14] PCI: Add pcie_bandwidth_capable() to compute max supported link bandwidth
On 4/2/2018 3:40 AM, Bjorn Helgaas wrote: On Sun, Apr 01, 2018 at 11:38:53PM +0300, Tal Gilboa wrote: On 3/31/2018 12:05 AM, Bjorn Helgaas wrote: From: Tal Gilboa Add pcie_bandwidth_capable() to compute the max link bandwidth supported by a device, based on the max link speed and width, adjusted by the encoding overhead. The maximum bandwidth of the link is computed as: max_link_speed * max_link_width * (1 - encoding_overhead) The encoding overhead is about 20% for 2.5 and 5.0 GT/s links using 8b/10b encoding, and about 1.5% for 8 GT/s or higher speed links using 128b/130b encoding. Signed-off-by: Tal Gilboa [bhelgaas: adjust for pcie_get_speed_cap() and pcie_get_width_cap() signatures, don't export outside drivers/pci] Signed-off-by: Bjorn Helgaas Reviewed-by: Tariq Toukan --- drivers/pci/pci.c | 21 + drivers/pci/pci.h |9 + 2 files changed, 30 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 43075be79388..9ce89e254197 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5208,6 +5208,27 @@ enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev) return PCIE_LNK_WIDTH_UNKNOWN; } +/** + * pcie_bandwidth_capable - calculates a PCI device's link bandwidth capability + * @dev: PCI device + * @speed: storage for link speed + * @width: storage for link width + * + * Calculate a PCI device's link bandwidth by querying for its link speed + * and width, multiplying them, and applying encoding overhead. + */ +u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + *speed = pcie_get_speed_cap(dev); + *width = pcie_get_width_cap(dev); + + if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) + return 0; + + return *width * PCIE_SPEED2MBS_ENC(*speed); +} + /** * pci_select_bars - Make BAR mask from the type of resource * @dev: the PCI device for which BAR mask is made diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 66738f1050c0..2a50172b9803 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -261,8 +261,17 @@ void pci_disable_bridge_window(struct pci_dev *dev); (speed) == PCIE_SPEED_2_5GT ? "2.5 GT/s" : \ "Unknown speed") +/* PCIe speed to Mb/s with encoding overhead: 20% for gen2, ~1.5% for gen3 */ +#define PCIE_SPEED2MBS_ENC(speed) \ Missing gen4. I made it "gen3+". I think that's accurate, isn't it? The spec doesn't seem to actually use "gen3" as a specific term, but sec 4.2.2 says rates of 8 GT/s or higher (which I think includes gen3 and gen4) use 128b/130b encoding. I meant that PCIE_SPEED_16_0GT will return 0 from this macro since it wasn't added. Need to return 15754.
[PATCH v3 2/2] net: usb: asix88179_178a: de-duplicate code
Remove the duplicated code for asix88179_178a bind and reset methods. Signed-off-by: Alexander Kurz --- drivers/net/usb/ax88179_178a.c | 137 ++--- 1 file changed, 31 insertions(+), 106 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index a6ef75907ae9..fea4c7b877cc 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1223,7 +1223,7 @@ static int ax88179_led_setting(struct usbnet *dev) return 0; } -static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) +static int ax88179_bind_or_reset(struct usbnet *dev, bool do_reset) { u8 buf[5]; u16 *tmp16; @@ -1231,12 +1231,11 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data; struct ethtool_eee eee_data; - usbnet_get_endpoints(dev, intf); - tmp16 = (u16 *)buf; tmp = (u8 *)buf; - memset(ax179_data, 0, sizeof(*ax179_data)); + if (!do_reset) + memset(ax179_data, 0, sizeof(*ax179_data)); /* Power up ethernet PHY */ *tmp16 = 0; @@ -1249,9 +1248,13 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp); msleep(100); + if (do_reset) + ax88179_auto_detach(dev, 0); + ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN, dev->net->dev_addr); - memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN); + if (!do_reset) + memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN); /* RX bulk configuration */ memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5); @@ -1266,19 +1269,21 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_HIGH, 1, 1, tmp); - dev->net->netdev_ops = &ax88179_netdev_ops; - dev->net->ethtool_ops = &ax88179_ethtool_ops; - dev->net->needed_headroom = 8; - dev->net->max_mtu = 4088; - - /* Initialize MII structure */ - dev->mii.dev = dev->net; - dev->mii.mdio_read = ax88179_mdio_read; - dev->mii.mdio_write = ax88179_mdio_write; - dev->mii.phy_id_mask = 0xff; - dev->mii.reg_num_mask = 0xff; - dev->mii.phy_id = 0x03; - dev->mii.supports_gmii = 1; + if (!do_reset) { + dev->net->netdev_ops = &ax88179_netdev_ops; + dev->net->ethtool_ops = &ax88179_ethtool_ops; + dev->net->needed_headroom = 8; + dev->net->max_mtu = 4088; + + /* Initialize MII structure */ + dev->mii.dev = dev->net; + dev->mii.mdio_read = ax88179_mdio_read; + dev->mii.mdio_write = ax88179_mdio_write; + dev->mii.phy_id_mask = 0xff; + dev->mii.reg_num_mask = 0xff; + dev->mii.phy_id = 0x03; + dev->mii.supports_gmii = 1; + } dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; @@ -1330,6 +1335,13 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) return 0; } +static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) +{ + usbnet_get_endpoints(dev, intf); + + return ax88179_bind_or_reset(dev, false); +} + static void ax88179_unbind(struct usbnet *dev, struct usb_interface *intf) { u16 tmp16; @@ -1530,94 +1542,7 @@ static int ax88179_link_reset(struct usbnet *dev) static int ax88179_reset(struct usbnet *dev) { - u8 buf[5]; - u16 *tmp16; - u8 *tmp; - struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data; - struct ethtool_eee eee_data; - - tmp16 = (u16 *)buf; - tmp = (u8 *)buf; - - /* Power up ethernet PHY */ - *tmp16 = 0; - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16); - - *tmp16 = AX_PHYPWR_RSTCTL_IPRL; - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16); - msleep(200); - - *tmp = AX_CLK_SELECT_ACS | AX_CLK_SELECT_BCS; - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp); - msleep(100); - - /* Ethernet PHY Auto Detach*/ - ax88179_auto_detach(dev, 0); - - ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN, -dev->net->dev_addr); - - /* RX bulk configuration */ - memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5); - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_BULKIN_QCTRL, 5, 5, tmp); - - dev->rx_urb_size = 1024 * 20; - - *tmp = 0x34; - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_LOW, 1, 1, tmp); - - *tmp = 0x52; -
[PATCH v3 1/2] net: usb: asix88179_178a: set permanent address once only
The permanent address of asix88179_178a devices is read at probe time and should not be overwritten later. Otherwise it may be overwritten unintentionally with a configured address. Signed-off-by: Alexander Kurz --- drivers/net/usb/ax88179_178a.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index f32261ecd215..a6ef75907ae9 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1556,7 +1556,6 @@ static int ax88179_reset(struct usbnet *dev) ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN, dev->net->dev_addr); - memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN); /* RX bulk configuration */ memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5); -- 2.11.0
Re: [PATCH] net: improve ipv4 performances
Hi Anton, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on net/master] [also build test WARNING on v4.16 next-20180329] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Anton-Gary-Ceph/net-improve-ipv4-performances/20180402-103807 reproduce: # apt-get install sparse make ARCH=x86_64 allmodconfig make C=1 CF=-D__CHECK_ENDIAN__ sparse warnings: (new ones prefixed by >>) >> net/bridge/br_private.h:690:15: sparse: restricted __be16 degrades to integer net/bridge/br_private.h:694:15: sparse: restricted __be16 degrades to integer -- >> net/bridge/br_multicast.c:66:14: sparse: restricted __be16 degrades to >> integer net/bridge/br_multicast.c:69:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:96:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:99:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:171:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:175:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:96:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:99:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:581:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:584:14: sparse: restricted __be16 degrades to integer >> net/bridge/br_multicast.c:66:14: sparse: restricted __be16 degrades to >> integer net/bridge/br_multicast.c:69:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:96:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:99:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:96:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:99:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:1325:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:1328:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:1765:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:1769:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:1913:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:1917:14: sparse: restricted __be16 degrades to integer >> net/bridge/br_private.h:690:15: sparse: restricted __be16 degrades to integer net/bridge/br_private.h:694:15: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:2497:14: sparse: restricted __be16 degrades to integer net/bridge/br_multicast.c:2532:14: sparse: restricted __be16 degrades to integer -- net/core/filter.c:318:33: sparse: subtraction of functions? Share your drugs net/core/filter.c:321:33: sparse: subtraction of functions? Share your drugs net/core/filter.c:324:33: sparse: subtraction of functions? Share your drugs net/core/filter.c:327:33: sparse: subtraction of functions? Share your drugs net/core/filter.c:330:33: sparse: subtraction of functions? Share your drugs net/core/filter.c:1184:39: sparse: incorrect type in argument 1 (different address spaces) @@expected struct sock_filter const *filter @@got struct sockstruct sock_filter const *filter @@ net/core/filter.c:1184:39:expected struct sock_filter const *filter net/core/filter.c:1184:39:got struct sock_filter [noderef] *filter net/core/filter.c:1286:39: sparse: incorrect type in argument 1 (different address spaces) @@expected struct sock_filter const *filter @@got struct sockstruct sock_filter const *filter @@ net/core/filter.c:1286:39:expected struct sock_filter const *filter net/core/filter.c:1286:39:got struct sock_filter [noderef] *filter net/core/filter.c:1547:43: sparse: incorrect type in argument 2 (different base types) @@expected restricted __wsum [usertype] diff @@got unsigned lonrestricted __wsum [usertype] diff @@ net/core/filter.c:1547:43:expected restricted __wsum [usertype] diff net/core/filter.c:1547:43:got unsigned long long [unsigned] [usertype] to net/core/filter.c:1550:36: sparse: incorrect type in argument 2 (different base types) @@expected restricted __be16 [usertype] old @@got unsigned lonrestricted __be16 [usertype] old @@ net/core/filter.c:1550:36:expected restricted __be16 [usertype] old net/core/filter.c:1550:36:got unsigned long long [unsigned] [usertype] from net/core/filter.c:1550:42: sparse: incorrect type in argument 3 (different base types) @@expected restricted __be16 [usertype] new @@got unsigned lonrestricted __be16 [usertype] new @@ net/core/filter.c:1550:42:expected restricted __be1
Re: [PATCH net-next v2 1/2] fs/crashdd: add API to collect hardware dump in second kernel
Fri, Mar 30, 2018 at 08:42:00PM CEST, ebied...@xmission.com wrote: >Rahul Lakkireddy writes: > >> On Friday, March 03/30/18, 2018 at 16:09:07 +0530, Jiri Pirko wrote: >>> Sat, Mar 24, 2018 at 11:56:33AM CET, rahul.lakkire...@chelsio.com wrote: >>> >Add a new module crashdd that exports the /sys/kernel/crashdd/ >>> >directory in second kernel, containing collected hardware/firmware >>> >dumps. >>> > >>> >The sequence of actions done by device drivers to append their device >>> >specific hardware/firmware logs to /sys/kernel/crashdd/ directory are >>> >as follows: >>> > >>> >1. During probe (before hardware is initialized), device drivers >>> >register to the crashdd module (via crashdd_add_dump()), with >>> >callback function, along with buffer size and log name needed for >>> >firmware/hardware log collection. >>> > >>> >2. Crashdd creates a driver's directory under >>> >/sys/kernel/crashdd/. Then, it allocates the buffer with >>> >>> This smells. I need to identify the exact ASIC instance that produced >>> the dump. To identify by driver name does not help me if I have multiple >>> instances of the same driver. This looks wrong to me. This looks like >>> a job for devlink where you have 1 devlink instance per 1 ASIC instance. >>> >>> Please see: >>> http://patchwork.ozlabs.org/project/netdev/list/?series=36524 >>> >>> I bevieve that the solution in the patchset could be used for >>> your usecase too. >>> >>> >> >> The sysfs approach proposed here had been dropped in favour exporting >> the dumps as ELF notes in /proc/vmcore. >> >> Will be posting the new patches soon. > >The concern was actually how you identify which device that came from. >Where you read the identifier changes but sysfs or /proc/vmcore the >change remains valid. Yeah. I still don't see how you link the dump and the device. Rahul, did you look at the patchset I pointed out? Thanks!
Re: [PATCH net-next v2 1/2] fs/crashdd: add API to collect hardware dump in second kernel
Fri, Mar 30, 2018 at 05:11:29PM CEST, and...@lunn.ch wrote: >> Please see: >> http://patchwork.ozlabs.org/project/netdev/list/?series=36524 >> >> I bevieve that the solution in the patchset could be used for >> your usecase too. > >Hi Jiri > >https://lkml.org/lkml/2018/3/20/436 > >How well does this API work for a 2Gbyte snapshot? Ccing Alex who did the tests. > >Andrew
Re: [patch iproute2-next] man: fix devlink object list
Fri, Mar 30, 2018 at 06:43:33PM CEST, dsah...@gmail.com wrote: >On 3/29/18 8:26 AM, Jiri Pirko wrote: >> From: Jiri Pirko >> >> Signed-off-by: Jiri Pirko >> --- >> man/man8/devlink.8 | 10 +- >> 1 file changed, 9 insertions(+), 1 deletion(-) >> > >resource and sb exist on master, so this patch seems more appropriate >for it than -next. It's a documentation fix, so I sent it to -next. I don't mind this to be applied on master though. Stephen?
possible deadlock in skb_queue_tail
Hello, syzbot hit the following crash on net-next commit 06b19fe9a6df7aaa423cd8404ebe5ac9ec4b2960 (Sun Apr 1 03:37:33 2018 +) Merge branch 'chelsio-inline-tls' syzbot dashboard link: https://syzkaller.appspot.com/bug?extid=6b495100f17ca8554ab9 Unfortunately, I don't have any reproducer for this crash yet. Raw console output: https://syzkaller.appspot.com/x/log.txt?id=6218830443446272 Kernel config: https://syzkaller.appspot.com/x/.config?id=3327544840960562528 compiler: gcc (GCC) 7.1.1 20170620 IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+6b495100f17ca8554...@syzkaller.appspotmail.com It will help syzbot understand when the bug is fixed. See footer for details. If you forward the report, please keep this part and the footer. == WARNING: possible circular locking dependency detected 4.16.0-rc6+ #290 Not tainted -- syz-executor7/20971 is trying to acquire lock: (&af_unix_sk_receive_queue_lock_key){+.+.}, at: [<271ef0d8>] skb_queue_tail+0x26/0x150 net/core/skbuff.c:2899 but task is already holding lock: (&(&u->lock)->rlock/1){+.+.}, at: [<4e725e14>] unix_state_double_lock+0x7b/0xb0 net/unix/af_unix.c:1088 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&(&u->lock)->rlock/1){+.+.}: _raw_spin_lock_nested+0x28/0x40 kernel/locking/spinlock.c:354 sk_diag_dump_icons net/unix/diag.c:82 [inline] sk_diag_fill.isra.4+0xa52/0xfe0 net/unix/diag.c:144 sk_diag_dump net/unix/diag.c:178 [inline] unix_diag_dump+0x400/0x4f0 net/unix/diag.c:206 netlink_dump+0x492/0xcf0 net/netlink/af_netlink.c:2221 __netlink_dump_start+0x4ec/0x710 net/netlink/af_netlink.c:2318 netlink_dump_start include/linux/netlink.h:214 [inline] unix_diag_handler_dump+0x3e7/0x750 net/unix/diag.c:307 __sock_diag_cmd net/core/sock_diag.c:230 [inline] sock_diag_rcv_msg+0x204/0x360 net/core/sock_diag.c:261 netlink_rcv_skb+0x14b/0x380 net/netlink/af_netlink.c:2443 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:272 netlink_unicast_kernel net/netlink/af_netlink.c:1307 [inline] netlink_unicast+0x4c4/0x6b0 net/netlink/af_netlink.c:1333 netlink_sendmsg+0xa4a/0xe80 net/netlink/af_netlink.c:1896 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg+0xca/0x110 net/socket.c:639 sock_write_iter+0x31a/0x5d0 net/socket.c:908 call_write_iter include/linux/fs.h:1782 [inline] new_sync_write fs/read_write.c:469 [inline] __vfs_write+0x684/0x970 fs/read_write.c:482 vfs_write+0x189/0x510 fs/read_write.c:544 SYSC_write fs/read_write.c:589 [inline] SyS_write+0xef/0x220 fs/read_write.c:581 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 -> #0 (&af_unix_sk_receive_queue_lock_key){+.+.}: lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3920 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x96/0xc0 kernel/locking/spinlock.c:152 skb_queue_tail+0x26/0x150 net/core/skbuff.c:2899 unix_dgram_sendmsg+0xa30/0x1610 net/unix/af_unix.c:1807 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg+0xca/0x110 net/socket.c:639 ___sys_sendmsg+0x320/0x8b0 net/socket.c:2047 __sys_sendmmsg+0x1ee/0x620 net/socket.c:2137 SYSC_sendmmsg net/socket.c:2168 [inline] SyS_sendmmsg+0x35/0x60 net/socket.c:2163 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x42/0xb7 other info that might help us debug this: Possible unsafe locking scenario: CPU0CPU1 lock(&(&u->lock)->rlock/1); lock(&af_unix_sk_receive_queue_lock_key); lock(&(&u->lock)->rlock/1); lock(&af_unix_sk_receive_queue_lock_key); *** DEADLOCK *** 1 lock held by syz-executor7/20971: #0: (&(&u->lock)->rlock/1){+.+.}, at: [<4e725e14>] unix_state_double_lock+0x7b/0xb0 net/unix/af_unix.c:1088 stack backtrace: CPU: 0 PID: 20971 Comm: syz-executor7 Not tainted 4.16.0-rc6+ #290 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x24d lib/dump_stack.c:53 print_circular_bug.isra.38+0x2cd/0x2dc kernel/locking/lockdep.c:1223 check_prev_add kernel/locking/lockdep.c:1863 [inline] check_prevs_add kernel/locking/lockdep.c:1976 [inline] validate_chain kernel/locking/lockdep.c:2417 [inline] __lock_acquire+0x30a8/0x3e00 kernel/locking/lockdep.c:3431 lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3920 __raw_spin_lock_irqsave include/linux/spinloc
KASAN: use-after-free Read in ccid2_hc_tx_packet_recv
Hello, syzbot hit the following crash on upstream commit 0adb32858b0bddf4ada5f364a84ed60b196dbcda (Sun Apr 1 21:20:27 2018 +) Linux 4.16 syzbot dashboard link: https://syzkaller.appspot.com/bug?extid=554ccde221001ab5479a Unfortunately, I don't have any reproducer for this crash yet. Raw console output: https://syzkaller.appspot.com/x/log.txt?id=5822430194958336 Kernel config: https://syzkaller.appspot.com/x/.config?id=-2374466361298166459 compiler: gcc (GCC) 7.1.1 20170620 user-space arch: i386 IMPORTANT: if you fix the bug, please add the following tag to the commit: Reported-by: syzbot+554ccde221001ab54...@syzkaller.appspotmail.com It will help syzbot understand when the bug is fixed. See footer for details. If you forward the report, please keep this part and the footer. R10: R11: R12: R13: R14: R15: dccp_parse_options: DCCP(7d56a000): Option 32 (len=7) error=9 == dccp_check_seqno: Step 6 failed for RESET packet, (LSWL(279336972291068) <= P.seqno(279336972291066) <= S.SWH(279336972291142)) and (P.ackno exists or LAWL(234137106534459) <= P.ackno(234137106534459) <= S.AWH(234137106534460), sending SYNC... BUG: KASAN: use-after-free in ccid2_hc_tx_packet_recv+0x234a/0x2440 net/dccp/ccids/ccid2.c:598 Read of size 1 at addr 8801bb7a4a82 by task syz-executor1/1660 CPU: 1 PID: 1660 Comm: syz-executor1 Not tainted 4.16.0+ #285 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x24d lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report+0x23c/0x360 mm/kasan/report.c:412 __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:430 ccid2_hc_tx_packet_recv+0x234a/0x2440 net/dccp/ccids/ccid2.c:598 ccid_hc_tx_packet_recv net/dccp/ccid.h:192 [inline] dccp_deliver_input_to_ccids+0x1d0/0x250 net/dccp/input.c:186 dccp_rcv_established+0x88/0xb0 net/dccp/input.c:378 dccp_v4_do_rcv+0x135/0x160 net/dccp/ipv4.c:653 sk_backlog_rcv include/net/sock.h:908 [inline] __release_sock+0x124/0x360 net/core/sock.c:2271 release_sock+0xa4/0x2a0 net/core/sock.c:2786 dccp_sendmsg+0x528/0xe60 net/dccp/proto.c:820 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:764 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg+0xca/0x110 net/socket.c:640 ___sys_sendmsg+0x320/0x8b0 net/socket.c:2046 __sys_sendmmsg+0x31b/0x620 net/socket.c:2129 C_SYSC_sendmmsg net/compat.c:745 [inline] compat_SyS_sendmmsg+0x32/0x40 net/compat.c:742 do_syscall_32_irqs_on arch/x86/entry/common.c:330 [inline] do_fast_syscall_32+0x3ec/0xf9f arch/x86/entry/common.c:392 entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139 RIP: 0023:0xf7f6dc99 RSP: 002b:f5f690ac EFLAGS: 0282 ORIG_RAX: 0159 RAX: ffda RBX: 0013 RCX: 2000b880 RDX: 0122 RSI: RDI: RBP: R08: R09: R10: R11: R12: R13: R14: R15: Allocated by task 1660: save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:552 __do_kmalloc_node mm/slab.c:3670 [inline] __kmalloc_node_track_caller+0x47/0x70 mm/slab.c:3684 __kmalloc_reserve.isra.39+0x41/0xd0 net/core/skbuff.c:137 __alloc_skb+0x13b/0x780 net/core/skbuff.c:205 alloc_skb include/linux/skbuff.h:983 [inline] dccp_send_ack+0xb6/0x350 net/dccp/output.c:580 ccid2_hc_rx_packet_recv+0x10d/0x180 net/dccp/ccids/ccid2.c:766 ccid_hc_rx_packet_recv net/dccp/ccid.h:185 [inline] dccp_deliver_input_to_ccids+0xd9/0x250 net/dccp/input.c:180 dccp_rcv_established+0x88/0xb0 net/dccp/input.c:378 dccp_v4_do_rcv+0x135/0x160 net/dccp/ipv4.c:653 sk_backlog_rcv include/net/sock.h:908 [inline] __sk_receive_skb+0x33e/0xc10 net/core/sock.c:513 dccp_v4_rcv+0xf5f/0x1c80 net/dccp/ipv4.c:874 ip_local_deliver_finish+0x2f1/0xc50 net/ipv4/ip_input.c:216 NF_HOOK include/linux/netfilter.h:288 [inline] ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 dst_input include/net/dst.h:449 [inline] ip_rcv_finish+0xa36/0x2040 net/ipv4/ip_input.c:397 NF_HOOK include/linux/netfilter.h:288 [inline] ip_rcv+0xb76/0x1820 net/ipv4/ip_input.c:493 __netif_receive_skb_core+0x1a41/0x3460 net/core/dev.c:4562 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4627 process_backlog+0x203/0x740 net/core/dev.c:5307 napi_poll net/core/dev.c:5705 [inline] net_rx_action+0x792/0x1910 net/core/dev.c:5771 __do_softirq+0x2d7/0xb85 kernel/softirq.c:285 Freed by task 1660: save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] __k
Re: [PATCH v3 2/2] net: usb: asix88179_178a: de-duplicate code
On Mon, 02 Apr 2018 08:43:49 +0100, Alexander Kurz wrote: Alexander, > > Remove the duplicated code for asix88179_178a bind and reset methods. > > Signed-off-by: Alexander Kurz > --- > drivers/net/usb/ax88179_178a.c | 137 > ++--- > 1 file changed, 31 insertions(+), 106 deletions(-) What has changed between this patch and the previous one? Having a bit of a change-log would certainly help. Also, I would have appreciated a reply to the questions I had on v2 before you posted a third version. > > diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c > index a6ef75907ae9..fea4c7b877cc 100644 > --- a/drivers/net/usb/ax88179_178a.c > +++ b/drivers/net/usb/ax88179_178a.c > @@ -1223,7 +1223,7 @@ static int ax88179_led_setting(struct usbnet *dev) > return 0; > } > > -static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) > +static int ax88179_bind_or_reset(struct usbnet *dev, bool do_reset) > { > u8 buf[5]; > u16 *tmp16; > @@ -1231,12 +1231,11 @@ static int ax88179_bind(struct usbnet *dev, struct > usb_interface *intf) > struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data; > struct ethtool_eee eee_data; > > - usbnet_get_endpoints(dev, intf); > - > tmp16 = (u16 *)buf; > tmp = (u8 *)buf; > > - memset(ax179_data, 0, sizeof(*ax179_data)); > + if (!do_reset) > + memset(ax179_data, 0, sizeof(*ax179_data)); > > /* Power up ethernet PHY */ > *tmp16 = 0; > @@ -1249,9 +1248,13 @@ static int ax88179_bind(struct usbnet *dev, struct > usb_interface *intf) > ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp); > msleep(100); > > + if (do_reset) > + ax88179_auto_detach(dev, 0); > + > ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, >ETH_ALEN, dev->net->dev_addr); > - memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN); > + if (!do_reset) > + memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN); > > /* RX bulk configuration */ > memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5); > @@ -1266,19 +1269,21 @@ static int ax88179_bind(struct usbnet *dev, struct > usb_interface *intf) > ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_HIGH, > 1, 1, tmp); > > - dev->net->netdev_ops = &ax88179_netdev_ops; > - dev->net->ethtool_ops = &ax88179_ethtool_ops; > - dev->net->needed_headroom = 8; > - dev->net->max_mtu = 4088; > - > - /* Initialize MII structure */ > - dev->mii.dev = dev->net; > - dev->mii.mdio_read = ax88179_mdio_read; > - dev->mii.mdio_write = ax88179_mdio_write; > - dev->mii.phy_id_mask = 0xff; > - dev->mii.reg_num_mask = 0xff; > - dev->mii.phy_id = 0x03; > - dev->mii.supports_gmii = 1; > + if (!do_reset) { > + dev->net->netdev_ops = &ax88179_netdev_ops; > + dev->net->ethtool_ops = &ax88179_ethtool_ops; > + dev->net->needed_headroom = 8; > + dev->net->max_mtu = 4088; > + > + /* Initialize MII structure */ > + dev->mii.dev = dev->net; > + dev->mii.mdio_read = ax88179_mdio_read; > + dev->mii.mdio_write = ax88179_mdio_write; > + dev->mii.phy_id_mask = 0xff; > + dev->mii.reg_num_mask = 0xff; > + dev->mii.phy_id = 0x03; > + dev->mii.supports_gmii = 1; > + } > > dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | > NETIF_F_RXCSUM; > @@ -1330,6 +1335,13 @@ static int ax88179_bind(struct usbnet *dev, struct > usb_interface *intf) > return 0; > } > > +static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) > +{ > + usbnet_get_endpoints(dev, intf); > + > + return ax88179_bind_or_reset(dev, false); > +} > + > static void ax88179_unbind(struct usbnet *dev, struct usb_interface *intf) > { > u16 tmp16; > @@ -1530,94 +1542,7 @@ static int ax88179_link_reset(struct usbnet *dev) > > static int ax88179_reset(struct usbnet *dev) > { > - u8 buf[5]; > - u16 *tmp16; > - u8 *tmp; > - struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data; > - struct ethtool_eee eee_data; > - > - tmp16 = (u16 *)buf; > - tmp = (u8 *)buf; > - > - /* Power up ethernet PHY */ > - *tmp16 = 0; > - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16); > - > - *tmp16 = AX_PHYPWR_RSTCTL_IPRL; > - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16); > - msleep(200); > - > - *tmp = AX_CLK_SELECT_ACS | AX_CLK_SELECT_BCS; > - ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp); > - msleep(100); > - > - /* Ethernet PHY Auto Detach*/ > - ax88179_auto_detach(dev, 0); > - > - ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_A
Re: [PATCH V5 net-next 06/14] net/tls: Add generic NIC offload infrastructure
Hi Kirill, On 3/28/2018 6:11 PM, Kirill Tkhai wrote: On 28.03.2018 02:56, Saeed Mahameed wrote: From: Ilya Lesokhin This patch adds a generic infrastructure to offload TLS crypto to a network device. It enables the kernel TLS socket to skip encryption and authentication operations on the transmit side of the data path. Leaving those computationally expensive operations to the NIC. The NIC offload infrastructure builds TLS records and pushes them to the TCP layer just like the SW KTLS implementation and using the same API. TCP segmentation is mostly unaffected. Currently the only exception is that we prevent mixed SKBs where only part of the payload requires offload. In the future we are likely to add a similar restriction following a change cipher spec record. The notable differences between SW KTLS and NIC offloaded TLS implementations are as follows: 1. The offloaded implementation builds "plaintext TLS record", those records contain plaintext instead of ciphertext and place holder bytes instead of authentication tags. 2. The offloaded implementation maintains a mapping from TCP sequence number to TLS records. Thus given a TCP SKB sent from a NIC offloaded TLS socket, we can use the tls NIC offload infrastructure to obtain enough context to encrypt the payload of the SKB. A TLS record is released when the last byte of the record is ack'ed, this is done through the new icsk_clean_acked callback. The infrastructure should be extendable to support various NIC offload implementations. However it is currently written with the implementation below in mind: The NIC assumes that packets from each offloaded stream are sent as plaintext and in-order. It keeps track of the TLS records in the TCP stream. When a packet marked for offload is transmitted, the NIC encrypts the payload in-place and puts authentication tags in the relevant place holders. The responsibility for handling out-of-order packets (i.e. TCP retransmission, qdisc drops) falls on the netdev driver. The netdev driver keeps track of the expected TCP SN from the NIC's perspective. If the next packet to transmit matches the expected TCP SN, the driver advances the expected TCP SN, and transmits the packet with TLS offload indication. If the next packet to transmit does not match the expected TCP SN. The driver calls the TLS layer to obtain the TLS record that includes the TCP of the packet for transmission. Using this TLS record, the driver posts a work entry on the transmit queue to reconstruct the NIC TLS state required for the offload of the out-of-order packet. It updates the expected TCP SN accordingly and transmits the now in-order packet. The same queue is used for packet transmission and TLS context reconstruction to avoid the need for flushing the transmit queue before issuing the context reconstruction request. Signed-off-by: Ilya Lesokhin Signed-off-by: Boris Pismenny Signed-off-by: Aviad Yehezkel Signed-off-by: Saeed Mahameed --- include/net/tls.h | 120 +-- net/tls/Kconfig | 10 + net/tls/Makefile | 2 + net/tls/tls_device.c | 759 ++ net/tls/tls_device_fallback.c | 454 + net/tls/tls_main.c| 120 --- net/tls/tls_sw.c | 132 7 files changed, 1476 insertions(+), 121 deletions(-) create mode 100644 net/tls/tls_device.c create mode 100644 net/tls/tls_device_fallback.c diff --git a/include/net/tls.h b/include/net/tls.h index 437a746300bf..0a8529e9ec21 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -57,21 +57,10 @@ #define TLS_AAD_SPACE_SIZE 13 -struct tls_sw_context { +struct tls_sw_context_tx { What the reason splitting this into tx + rx does not go in separate patch? Added a separate patch for this in V6. struct crypto_aead *aead_send; - struct crypto_aead *aead_recv; struct crypto_wait async_wait; - /* Receive context */ - struct strparser strp; - void (*saved_data_ready)(struct sock *sk); - unsigned int (*sk_poll)(struct file *file, struct socket *sock, - struct poll_table_struct *wait); - struct sk_buff *recv_pkt; - u8 control; - bool decrypted; - - /* Sending context */ char aad_space[TLS_AAD_SPACE_SIZE]; unsigned int sg_plaintext_size; @@ -88,6 +77,50 @@ struct tls_sw_context { struct scatterlist sg_aead_out[2]; }; +struct tls_sw_context_rx { + struct crypto_aead *aead_recv; + struct crypto_wait async_wait; + + struct strparser strp; + void (*saved_data_ready)(struct sock *sk); + unsigned int (*sk_poll)(struct file *file, struct socket *sock, + struct poll_table_struct *wait); + struct sk_buff *recv_pkt; + u8 control; + bool decrypted; +}; + +struct tls_record_info { + struct list_head list; + u32 end_seq; +
Re: [PATCH v2 0/7] net: thunderx: implement DMAC filtering support
On Sat, Mar 31, 2018 at 10:07:30PM -0400, David Miller wrote: > From: Vadim Lomovtsev > Date: Fri, 30 Mar 2018 04:59:46 -0700 > > > From: Vadim Lomovtsev > > > > By default CN88XX BGX accepts all incoming multicast and broadcast > > packets and filtering is disabled. The nic driver doesn't provide > > an ability to change such behaviour. > > > > This series is to implement DMAC filtering management for CN88XX > > nic driver allowing user to enable/disable filtering and configure > > specific MAC addresses to filter traffic. > > > > Changes from v1: > > build issues: > > - update code in order to address compiler warnings; > > checkpatch.pl reported issues: > > - update code in order to fit 80 symbols length; > > - update commit descriptions in order to fit 80 symbols length; > > Series applied. Thank you. WBR, Vadim
RE: [PATCH] net: bond: skip vlan header when do layer 3+4 hash policy
> -Original Message- > From: Nikolay Aleksandrov [mailto:niko...@cumulusnetworks.com] > Sent: Saturday, March 31, 2018 5:23 PM > To: liujian (CE) ; da...@davemloft.net; > j.vosbu...@gmail.com; vfal...@gmail.com; a...@greyhouse.net > Cc: netdev@vger.kernel.org; weiyongjun (A) > Subject: Re: [PATCH] net: bond: skip vlan header when do layer 3+4 hash policy > > On 31/03/18 12:14, liujia...@huawei.com wrote: > > From: liujian > > > > When the hash policy is BOND_XMIT_POLICY_LAYER34 mode and skb > protocol > > is 802.1q VLAN, the policy will be degenerated to LAYER2 mode; Now, > > change it to get the next layer protocol to ensure that it worked in > > BOND_XMIT_POLICY_LAYER34 mode. > > > > Signed-off-by: liujian > > --- > > drivers/net/bonding/bond_main.c | 11 --- > > 1 file changed, 8 insertions(+), 3 deletions(-) > > > Nak > Use BOND_XMIT_POLICY_ENCAP34 (encap3+4), that was one of the main > reasons it was added. Got it, thank you~
Re: [PATCH net-next v2 1/2] fs/crashdd: add API to collect hardware dump in second kernel
> >> The sysfs approach proposed here had been dropped in favour exporting > >> the dumps as ELF notes in /proc/vmcore. > >> > >> Will be posting the new patches soon. > > > >The concern was actually how you identify which device that came from. > >Where you read the identifier changes but sysfs or /proc/vmcore the > >change remains valid. > > Yeah. I still don't see how you link the dump and the device. Hi Jiri You can see in the third version the core code accept a free form name. The driver builds a name using the driver name and the adaptor name. What i think would be good is to try to have one API to the driver that can be used for both crash dumps and devlink snapshots. These are used at different times, but have basically the same purpose, get state from the device. Andrew
Re: [PATCH net-next v2 1/2] fs/crashdd: add API to collect hardware dump in second kernel
On Monday, April 04/02/18, 2018 at 14:41:43 +0530, Jiri Pirko wrote: > Fri, Mar 30, 2018 at 08:42:00PM CEST, ebied...@xmission.com wrote: > >Rahul Lakkireddy writes: > > > >> On Friday, March 03/30/18, 2018 at 16:09:07 +0530, Jiri Pirko wrote: > >>> Sat, Mar 24, 2018 at 11:56:33AM CET, rahul.lakkire...@chelsio.com wrote: > >>> >Add a new module crashdd that exports the /sys/kernel/crashdd/ > >>> >directory in second kernel, containing collected hardware/firmware > >>> >dumps. > >>> > > >>> >The sequence of actions done by device drivers to append their device > >>> >specific hardware/firmware logs to /sys/kernel/crashdd/ directory are > >>> >as follows: > >>> > > >>> >1. During probe (before hardware is initialized), device drivers > >>> >register to the crashdd module (via crashdd_add_dump()), with > >>> >callback function, along with buffer size and log name needed for > >>> >firmware/hardware log collection. > >>> > > >>> >2. Crashdd creates a driver's directory under > >>> >/sys/kernel/crashdd/. Then, it allocates the buffer with > >>> > >>> This smells. I need to identify the exact ASIC instance that produced > >>> the dump. To identify by driver name does not help me if I have multiple > >>> instances of the same driver. This looks wrong to me. This looks like > >>> a job for devlink where you have 1 devlink instance per 1 ASIC instance. > >>> > >>> Please see: > >>> http://patchwork.ozlabs.org/project/netdev/list/?series=36524 > >>> > >>> I bevieve that the solution in the patchset could be used for > >>> your usecase too. > >>> > >>> > >> > >> The sysfs approach proposed here had been dropped in favour exporting > >> the dumps as ELF notes in /proc/vmcore. > >> > >> Will be posting the new patches soon. > > > >The concern was actually how you identify which device that came from. > >Where you read the identifier changes but sysfs or /proc/vmcore the > >change remains valid. > > Yeah. I still don't see how you link the dump and the device. In our case, the dump and the device are being identified by the driver’s name followed by its corresponding pci bus id. I’ve posted an example in my v3 series: https://www.spinics.net/lists/netdev/msg493781.html Here’s an extract from the link above: # readelf -n /proc/vmcore Displaying notes found at file offset 0x1000 with length 0x04003288: Owner Data size Description VMCOREDD_cxgb4_:02:00.4 0x02000fd8 Unknown note type:(0x0700) VMCOREDD_cxgb4_:04:00.4 0x02000fd8 Unknown note type:(0x0700) CORE 0x0150 NT_PRSTATUS (prstatus structure) CORE 0x0150 NT_PRSTATUS (prstatus structure) CORE 0x0150 NT_PRSTATUS (prstatus structure) CORE 0x0150 NT_PRSTATUS (prstatus structure) CORE 0x0150 NT_PRSTATUS (prstatus structure) CORE 0x0150 NT_PRSTATUS (prstatus structure) CORE 0x0150 NT_PRSTATUS (prstatus structure) CORE 0x0150 NT_PRSTATUS (prstatus structure) VMCOREINFO 0x074f Unknown note type: (0x) Here, for my two devices, the dump’s names are VMCOREDD_cxgb4_:02:00.4 and VMCOREDD_cxgb4_:04:00.4. It’s really up to the callers to write their own unique name for the dump. The name is appended to “VMCOREDD_” string. > Rahul, did you look at the patchset I pointed out? For devlink, I think the dump name would be identified by bus_type/device_name; i.e. “pci/:02:00.4” for my example. Is my understanding correct? Thanks, Rahul
Re: [PATCH net-next] net: ipv6/gre: Add GRO support
On Sun, Apr 1, 2018 at 7:35 PM, Eric Dumazet wrote: > > > On 04/01/2018 06:17 AM, Tariq Toukan wrote: >> From: Eran Ben Elisha >> >> Add GRO capability for IPv6 GRE tunnel and ip6erspan tap, via gro_cells >> infrastructure. >> >> Performance testing: 55% higher badwidth. >> Measuring bandwidth of 1 thread IPv4 TCP traffic over IPv6 GRE tunnel >> while GRO on the physical interface is disabled. >> CPU: Intel Xeon E312xx (Sandy Bridge) >> NIC: Mellanox Technologies MT27700 Family [ConnectX-4] >> Before (GRO not working in tunnel) : 2.47 Gbits/sec >> After (GRO working in tunnel) : 3.85 Gbits/sec >> >> Signed-off-by: Eran Ben Elisha >> Signed-off-by: Tariq Toukan >> CC: Eric Dumazet >> --- > > > Seems good, but why isn't this handled directly in GRO native layer ? ip6_tunnel and ip6_gre do not share initialization flow functions (unlike ipv4). Changing the ipv6 init infrastructure should not be part of this patch. we prefer to keep this one minimal, simple and safe. >
Re: RFC on writel and writel_relaxed
On 3/29/2018 9:40 PM, Benjamin Herrenschmidt wrote: > On Thu, 2018-03-29 at 09:56 -0400, Sinan Kaya wrote: >> On 3/28/2018 11:55 AM, David Miller wrote: >>> From: Benjamin Herrenschmidt >>> Date: Thu, 29 Mar 2018 02:13:16 +1100 >>> Let's fix all archs, it's way easier than fixing all drivers. Half of the archs are unused or dead anyway. >>> >>> Agreed. >>> >> >> I pinged most of the maintainers yesterday. >> Which arches do we care about these days? >> I have not been paying attention any other architecture besides arm64. > > Thanks for going through that exercise ! > > Once sparc, s390, microblaze and mips reply, I think we'll have a good > coverage, maybe riscv is to put in that lot too. I posted the following two patches for supporting microblaze and unicore32. [PATCH v2 1/2] io: prevent compiler reordering on the default writeX() implementation [PATCH v2 2/2] io: prevent compiler reordering on the default readX() implementation The rest of the arches except mips and alpha seem OK. I sent a question email on Friday to mips and alpha mailing lists. I'll follow up with an actual patch today. -- Sinan Kaya Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
RE: [PATCH v3 2/4] bus: fsl-mc: add restool userspace support
> > > I'm still not convinced either way (high-level or low-level > > interface), but I think this needs to be discussed with the networking > > maintainers. Given the examples on the github page you linked to, the > > high-level user space commands based on these ioctls > > > >ls-addni # adds a network interface > >ls-addmux # adds a dpdmux > >ls-addsw # adds an l2switch > >ls-listmac # lists MACs and their connections > >ls-listni # lists network interfaces and their connections > > > > and I see that you also support the switchdev interface in > > drivers/staging/fsl-dpaa2, which I think does some of the same things, > > presumably by implementing the switchdev API using fsl_mc_command > > low-level interfaces in the kernel. > > Hi Arnd > > I agree that switchdev and devlink should be the correct way to handle this. > The > low level plumbing of the hardware should all be hidden. There should not be > any user space commands needed other than the usual network configuration > tools and devlink. > Hi, The commands listed above are for creating/destroying DPAA2 objects in Management Complex and not for runtime configuration where standard userspace tools are used. Restool is responsible for creating objects in Management complex and this process can be seen as the equivalent of hotplugging a peripheral rather than configuring it, thus there is no standard userspace tool to handle that. * The Management Complex is configured to create a specific set of DPAA2 objects dynamically through Restool (by sending create commands) or statically, at boot time, through a configuration file (Data Path Layout file) * The objects are then probed and configured by the corresponding drivers * The objects are controlled at runtime by the user via standard tools (e.g. ethtool for network interfaces). I hope this gives a better understanding on the DPAA2 hardware and software architecture. The fsl-mc bus documentation gives more details on this: https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git/tree/Documentation/networking/dpaa2/overview.rst?h=staging-next Ioana
[PATCH net-next 2/5] sctp: Handle sctp packets with CHECKSUM_PARTIAL
With SCTP checksum offload available in virtio, it is now possible for virtio to receive a sctp packet with CHECKSUM_PARTIAL set (guest-to-guest traffic). SCTP doesn't really have a partial checksum like TCP does because CRC32c can't do partial additive checksumming. It's all or nothing. So an SCTP packet with CHECKSUM_PARTIAL will have checksum set to 0. Let SCTP treat this as a valid checksum if CHECKSUM_PARTIAL is set. Signed-off-by: Vladislav Yasevich --- net/sctp/input.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index ba8a6e6..055b8ffa 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -80,8 +80,17 @@ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb) { struct sctphdr *sh = sctp_hdr(skb); __le32 cmp = sh->checksum; - __le32 val = sctp_compute_cksum(skb, 0); + __le32 val = 0; + /* In sctp PARTIAL checksum is always 0. This is a case of +* a packet received from guest that supports checksum offload. +* Assume it's correct as there is really no way to verify, +* and we want to avaoid computing it unnecesarily. +*/ + if (skb->ip_summed == CHECKSUM_PARTIAL) + return 0; + + val = sctp_compute_cksum(skb, 0); if (val != cmp) { /* CRC failure, dump it. */ __SCTP_INC_STATS(net, SCTP_MIB_CHECKSUMERRORS); -- 2.9.5
[PATCH net-next 0/5] virtio-net: Add SCTP checksum offload support
Now that we have SCTP offload capabilities in the kernel, we can add them to virtio as well. First step is SCTP checksum. We need a new freature in virtio to negotiate this support since SCTP is excluded with the stardard checksum and requires a little bit extra. This series proposes VIRTIO_NET_F_SCTP_CSUM feature bit. As the "little bit extra", the kernel uses a new bit in the skb (skb->csum_not_inet) to determine whether to use standard inet checksum or the SCTP CRC32c checksum. This bit has to be communicated between the host and the guest. This bit is carried in the vnet header. Tap and macvtap support is added through an extra feature for the TUNSETOFFLOAD ioctl. Additionally macvtap will no correctly do sctp checksumming if the receive doesn't support SCTP offload. This also turns on sctp offloading for macvlan devices. As for the perf numbers, I am seeing about a 5% increase in vm-to-vm and vm-to-hos throughput which is the same as manually disabling sctp checksumming,since this is exactly what we are emulatting. Sending outside the host, the increase about 2.5-3%. As for GSO, the way sctp GSO is currently implemented buys us nothing in added support to virtio. To add true GSO, would require a lot of re-work inside of SCTP and would require extensions to the virtio net header to carry extra sctp data. Vladislav Yasevich (5): virtio: Add support for SCTP checksum offloading sctp: Handle sctp packets with CHECKSUM_PARTIAL sctp: Build sctp offload support into the base kernel tun: Add support for SCTP checksum offload macvlan/macvtap: Add support for SCTP checksum offload. drivers/net/macvlan.c | 5 +++-- drivers/net/tap.c | 8 +--- drivers/net/tun.c | 5 + drivers/net/virtio_net.c| 10 +++--- include/linux/virtio_net.h | 6 ++ include/net/sctp/sctp.h | 5 - include/uapi/linux/if_tun.h | 1 + include/uapi/linux/virtio_net.h | 2 ++ net/Kconfig | 1 + net/sctp/Kconfig| 1 - net/sctp/Makefile | 3 ++- net/sctp/input.c| 11 ++- net/sctp/offload.c | 4 +++- net/sctp/protocol.c | 3 --- 14 files changed, 45 insertions(+), 20 deletions(-) -- 2.9.5
[PATCH net-next 3/5] sctp: Build sctp offload support into the base kernel
We need to take the sctp offload out of the sctp module and add it to the base kernel to support guests that can support it. This is similar to how IPv6 offloads are done and works around kernels that exclude sctp protocol support. Signed-off-by: Vladislav Yasevich --- include/net/sctp/sctp.h | 5 - net/Kconfig | 1 + net/sctp/Kconfig| 1 - net/sctp/Makefile | 3 ++- net/sctp/offload.c | 4 +++- net/sctp/protocol.c | 3 --- 6 files changed, 6 insertions(+), 11 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 72c5b8f..625b45f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -183,11 +183,6 @@ struct sctp_transport *sctp_epaddr_lookup_transport( int __net_init sctp_proc_init(struct net *net); /* - * sctp/offload.c - */ -int sctp_offload_init(void); - -/* * sctp/stream_sched.c */ void sctp_sched_ops_init(void); diff --git a/net/Kconfig b/net/Kconfig index 0428f12..2773f98 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -64,6 +64,7 @@ config INET bool "TCP/IP networking" select CRYPTO select CRYPTO_AES + select LIBCRC32C ---help--- These are the protocols used on the Internet and on most local Ethernets. It is highly recommended to say Y here (this will enlarge diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index c740b18..d07477a 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -9,7 +9,6 @@ menuconfig IP_SCTP select CRYPTO select CRYPTO_HMAC select CRYPTO_SHA1 - select LIBCRC32C ---help--- Stream Control Transmission Protocol diff --git a/net/sctp/Makefile b/net/sctp/Makefile index e845e45..ee206ca 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_IP_SCTP) += sctp.o obj-$(CONFIG_INET_SCTP_DIAG) += sctp_diag.o +obj-$(CONFIG_INET) += offload.o sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ protocol.o endpointola.o associola.o \ @@ -12,7 +13,7 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ inqueue.o outqueue.o ulpqueue.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o stream.o auth.o \ - offload.o stream_sched.o stream_sched_prio.o \ + stream_sched.o stream_sched_prio.o \ stream_sched_rr.o stream_interleave.o sctp_diag-y := diag.o diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 123e9f2..c61cbde 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -107,7 +107,7 @@ static const struct skb_checksum_ops crc32c_csum_ops = { .combine = sctp_csum_combine, }; -int __init sctp_offload_init(void) +static int __init sctp_offload_init(void) { int ret; @@ -127,3 +127,5 @@ int __init sctp_offload_init(void) out: return ret; } + +fs_initcall(sctp_offload_init); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a24cde2..46d2b63 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1479,9 +1479,6 @@ static __init int sctp_init(void) if (status) goto err_v6_add_protocol; - if (sctp_offload_init() < 0) - pr_crit("%s: Cannot add SCTP protocol offload\n", __func__); - out: return status; err_v6_add_protocol: -- 2.9.5
[PATCH net-next 4/5] tun: Add support for SCTP checksum offload
Adds a new tun offload flag to allow for SCTP checksum offload. The flag has to be set by the user and defaults to "no offload". Signed-off-by: Vladislav Yasevich --- drivers/net/tun.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index a1ba262..263bcbe 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2719,6 +2719,11 @@ static int set_offload(struct tun_struct *tun, unsigned long arg) arg &= ~TUN_F_UFO; } + if (arg & TUN_F_SCTP_CSUM) { + features |= NETIF_F_SCTP_CRC; + arg &= ~TUN_F_SCTP_CSUM; + } + /* This gives the user a way to test for new features in future by * trying to set them. */ if (arg) -- 2.9.5
[PATCH net-next 5/5] macvlan/macvtap: Add support for SCTP checksum offload.
Since we now have support for software CRC32c offload, turn it on for macvlan and macvtap devices so that guests can take advantage of offload SCTP checksums to the host or host hardware. Signed-off-by: Vladislav Yasevich --- drivers/net/macvlan.c | 5 +++-- drivers/net/tap.c | 8 +--- include/uapi/linux/if_tun.h | 1 + 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 725f4b4..646b730 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -834,7 +834,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key; #define ALWAYS_ON_OFFLOADS \ (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \ -NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL) +NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL | NETIF_F_SCTP_CRC) #define ALWAYS_ON_FEATURES (ALWAYS_ON_OFFLOADS | NETIF_F_LLTX) @@ -842,7 +842,8 @@ static struct lock_class_key macvlan_netdev_addr_lock_key; (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_TSO | NETIF_F_LRO | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ -NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) +NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER | \ +NETIF_F_SCTP_CRC) #define MACVLAN_STATE_MASK \ ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 9b6cb78..2c8512b 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -369,8 +369,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) *check, we either support them all or none. */ if (skb->ip_summed == CHECKSUM_PARTIAL && - !(features & NETIF_F_CSUM_MASK) && - skb_checksum_help(skb)) + skb_csum_hwoffload_help(skb, features)) goto drop; if (ptr_ring_produce(&q->ring, skb)) goto drop; @@ -945,6 +944,9 @@ static int set_offload(struct tap_queue *q, unsigned long arg) } } + if (arg & TUN_F_SCTP_CSUM) + feature_mask |= NETIF_F_SCTP_CRC; + /* tun/tap driver inverts the usage for TSO offloads, where * setting the TSO bit means that the userspace wants to * accept TSO frames and turning it off means that user space @@ -1077,7 +1079,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | - TUN_F_TSO_ECN | TUN_F_UFO)) + TUN_F_TSO_ECN | TUN_F_UFO | TUN_F_SCTP_CSUM)) return -EINVAL; rtnl_lock(); diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index ee432cd..c3bb282 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -86,6 +86,7 @@ #define TUN_F_TSO6 0x04/* I can handle TSO for IPv6 packets */ #define TUN_F_TSO_ECN 0x08/* I can handle TSO with ECN bits. */ #define TUN_F_UFO 0x10/* I can handle UFO packets */ +#define TUN_F_SCTP_CSUM 0x20 /* I can handle SCTP checksum offload */ /* Protocol info prepended to the packets (when IFF_NO_PI is not set) */ #define TUN_PKT_STRIP 0x0001 -- 2.9.5
[PATCH net-next 1/5] virtio: Add support for SCTP checksum offloading
To support SCTP checksum offloading, we need to add a new feature to virtio_net, so we can negotiate support between the hypervisor and the guest. The signalling to the guest that an alternate checksum needs to be used is done via a new flag in the virtio_net_hdr. If the flag is set, the host will know to perform an alternate checksum calculation, which right now is only CRC32c. Signed-off-by: Vladislav Yasevich --- drivers/net/virtio_net.c| 11 --- include/linux/virtio_net.h | 6 ++ include/uapi/linux/virtio_net.h | 2 ++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7b187ec..b601294 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2724,9 +2724,14 @@ static int virtnet_probe(struct virtio_device *vdev) /* Do we support "hardware" checksums? */ if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { /* This opens up the world of extra features. */ - dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; + netdev_features_t sctp = 0; + + if (virtio_has_feature(vdev, VIRTIO_NET_F_SCTP_CSUM)) + sctp |= NETIF_F_SCTP_CRC; + + dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG | sctp; if (csum) - dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; + dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG | sctp; if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { dev->hw_features |= NETIF_F_TSO @@ -2952,7 +2957,7 @@ static struct virtio_device_id id_table[] = { }; #define VIRTNET_FEATURES \ - VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ + VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_SCTP_CSUM, \ VIRTIO_NET_F_MAC, \ VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index f144216..2e7a64a 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -39,6 +39,9 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; + + if (hdr->flags & VIRTIO_NET_HDR_F_CSUM_NOT_INET) + skb->csum_not_inet = 1; } if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { @@ -96,6 +99,9 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ + if (skb->csum_not_inet) + hdr->flags &= VIRTIO_NET_HDR_F_CSUM_NOT_INET; + return 0; } diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 5de6ed3..3f279c8 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -36,6 +36,7 @@ #define VIRTIO_NET_F_GUEST_CSUM1 /* Guest handles pkts w/ partial csum */ #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */ #define VIRTIO_NET_F_MTU 3 /* Initial MTU advice */ +#define VIRTIO_NET_F_SCTP_CSUM 4 /* SCTP checksum offload support */ #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ #define VIRTIO_NET_F_GUEST_TSO47 /* Guest can handle TSOv4 in. */ #define VIRTIO_NET_F_GUEST_TSO68 /* Guest can handle TSOv6 in. */ @@ -101,6 +102,7 @@ struct virtio_net_config { struct virtio_net_hdr_v1 { #define VIRTIO_NET_HDR_F_NEEDS_CSUM1 /* Use csum_start, csum_offset */ #define VIRTIO_NET_HDR_F_DATA_VALID2 /* Csum is valid */ +#define VIRTIO_NET_HDR_F_CSUM_NOT_INET 4 /* Checksum is not inet */ __u8 flags; #define VIRTIO_NET_HDR_GSO_NONE0 /* Not a GSO frame */ #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ -- 2.9.5
Re: [PATCH v3 2/4] bus: fsl-mc: add restool userspace support
Hi Ioana > The commands listed above are for creating/destroying DPAA2 objects > in Management Complex and not for runtime configuration where > standard userspace tools are used. Please can you explain why this is not just plumbing inside a switchdev driver? The hardware has a number of physical ports. So on probe, i would expect it to create a DPMAC, DPNI, and DPIO for each port, and a linux netdev. From then on, standard tools are all that are needed. The switchdev driver can create a l2 switch object when the user uses the ip link add name br0 type bridge. It can then connect the switch object to the DPNI when the user adds an interface to the switch, etc. Andrew
[PATCH net v5 1/3] ipv6: add a wrapper for ip6_dst_store() with flowi6 checks
Move commonly used pattern of ip6_dst_store() usage to a separate function - ip6_sk_dst_store_flow(), which will check the addresses for equality using the flow information, before saving them. There is no functional changes in this patch. In addition, it will be used in the next patch, in ip6_sk_dst_lookup_flow(). Signed-off-by: Alexey Kodanev --- include/net/ip6_route.h | 3 +++ net/ipv6/datagram.c | 9 + net/ipv6/route.c| 17 + 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index ac0866b..abec280 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -210,6 +210,9 @@ static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, #endif } +void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst, + const struct flowi6 *fl6); + static inline bool ipv6_unicast_destination(const struct sk_buff *skb) { struct rt6_info *rt = (struct rt6_info *) skb_dst(skb); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a9f7eca..8f6a391 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -106,14 +106,7 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) } } - ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? - &sk->sk_v6_daddr : NULL, -#ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl6.saddr, &np->saddr) ? - &np->saddr : -#endif - NULL); + ip6_sk_dst_store_flow(sk, dst, &fl6); out: fl6_sock_release(flowlabel); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b0d5c64..b14008e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2153,6 +2153,23 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) } EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); +void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst, + const struct flowi6 *fl6) +{ +#ifdef CONFIG_IPV6_SUBTREES + struct ipv6_pinfo *np = inet6_sk(sk); +#endif + + ip6_dst_store(sk, dst, + ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ? + &sk->sk_v6_daddr : NULL, +#ifdef CONFIG_IPV6_SUBTREES + ipv6_addr_equal(&fl6->saddr, &np->saddr) ? + &np->saddr : +#endif + NULL); +} + /* Handle redirects */ struct ip6rd_flowi { struct flowi6 fl6; -- 1.8.3.1
[PATCH net v5 0/3] ipv6: udp6: set dst cache for a connected sk if current not valid
A new RTF_CACHE route can be created with the socket's dst cache update between the below calls in udpv6_sendmsg(), when datagram sending results to ICMPV6_PKT_TOOBIG error: dst = ip6_sk_dst_lookup_flow(...) ... release_dst: if (dst) { if (connected) { ip6_dst_store(sk, dst) Therefore, the new socket's dst cache reset to the old one on "release_dst:". The first two patches prepare the code to store dst cache with ip6_sk_dst_lookup_flow(): * the first patch adds ip6_sk_dst_store_flow() function with commonly used source and destiantion addresses checks using the flow information. * the second patch adds new argument to ip6_sk_dst_lookup_flow() and ability to store dst in the socket's cache. Also, the two users of the function are updated without enabling the new behavior: pingv6_sendmsg() and udpv6_sendmsg(). The last patch contains the actual fix that removes sk dst cache update in the end of udpv6_sendmsg(), and allows to do it in ip6_sk_dst_lookup_flow(). v5: * relocate ip6_sk_dst_store_flow() to net/ipv6/route.c and rename ip6_dst_store_flow() to ip6_sk_dst_store_flow() as suggested by Martin v4: * fix the error in the build of ip_dst_store_flow() reported by kbuild test robot due to missing checks for CONFIG_IPV6: add new function to ip6_output.c instead of ip6_route.h * add 'const' to struct flowi6 in ip6_dst_store_flow() * minor commit messages fixes v3: * instead of moving ip6_dst_store() above udp_v6_send_skb(), update socket's dst cache inside ip6_sk_dst_lookup_flow() if the current one is invalid * the issue not reproduced in 4.1, but starting from 4.2. Add one more 'Fixes:' commit that creates new RTF_CACHE route. Though, it is also mentioned in the first one Alexey Kodanev (3): ipv6: add a wrapper for ip6_dst_store() with flowi6 checks ipv6: allow to cache dst for a connected sk in ip6_sk_dst_lookup_flow() ipv6: udp6: set dst cache for a connected sk if current not valid include/net/ip6_route.h | 3 +++ include/net/ipv6.h | 3 ++- net/ipv6/datagram.c | 9 + net/ipv6/ip6_output.c | 15 --- net/ipv6/ping.c | 2 +- net/ipv6/route.c| 17 + net/ipv6/udp.c | 21 ++--- 7 files changed, 38 insertions(+), 32 deletions(-) -- 1.8.3.1
[PATCH net v5 3/3] ipv6: udp6: set dst cache for a connected sk if current not valid
A new RTF_CACHE route can be created between ip6_sk_dst_lookup_flow() and ip6_dst_store() calls in udpv6_sendmsg(), when datagram sending results to ICMPV6_PKT_TOOBIG error: udp_v6_send_skb(), for example with vti6 tunnel: vti6_xmit(), get ICMPV6_PKT_TOOBIG error skb_dst_update_pmtu(), can create a RTF_CACHE clone icmpv6_send() ... udpv6_err() ip6_sk_update_pmtu() ip6_update_pmtu(), can create a RTF_CACHE clone ... ip6_datagram_dst_update() ip6_dst_store() And after commit 33c162a980fe ("ipv6: datagram: Update dst cache of a connected datagram sk during pmtu update"), the UDPv6 error handler can update socket's dst cache, but it can happen before the update in the end of udpv6_sendmsg(), preventing getting the new dst cache on the next udpv6_sendmsg() calls. In order to fix it, save dst in a connected socket only if the current socket's dst cache is invalid. The previous patch prepared ip6_sk_dst_lookup_flow() to do that with the new argument, and this patch enables it in udpv6_sendmsg(). Fixes: 33c162a980fe ("ipv6: datagram: Update dst cache of a connected datagram sk during pmtu update") Fixes: 45e4fd26683c ("ipv6: Only create RTF_CACHE routes after encountering pmtu exception") Signed-off-by: Alexey Kodanev --- net/ipv6/udp.c | 21 ++--- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e49dac4..da13c90 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1289,7 +1289,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); - dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, 0); + dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, connected); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -1314,7 +1314,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) err = udp_v6_send_skb(skb, &fl6); - goto release_dst; + goto out; } lock_sock(sk); @@ -1348,23 +1348,6 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) err = np->recverr ? net_xmit_errno(err) : 0; release_sock(sk); -release_dst: - if (dst) { - if (connected) { - ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? - &sk->sk_v6_daddr : NULL, -#ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl6.saddr, &np->saddr) ? - &np->saddr : -#endif - NULL); - } else { - dst_release(dst); - } - dst = NULL; - } - out: dst_release(dst); fl6_sock_release(flowlabel); -- 1.8.3.1
[PATCH net v5 2/3] ipv6: allow to cache dst for a connected sk in ip6_sk_dst_lookup_flow()
Add 'connected' argument to ip6_sk_dst_lookup_flow() and update the cache only if ip6_sk_dst_check() returns NULL and a socket is connected. The function is used as before, the new behavior for UDP sockets in udpv6_sendmsg() will be enabled in the next patch. Signed-off-by: Alexey Kodanev --- include/net/ipv6.h| 3 ++- net/ipv6/ip6_output.c | 15 --- net/ipv6/ping.c | 2 +- net/ipv6/udp.c| 2 +- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8606c91..07e94dc 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -977,7 +977,8 @@ int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst); struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, -const struct in6_addr *final_dst); +const struct in6_addr *final_dst, +int connected); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *orig_dst); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a8a9195..15724e0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1105,23 +1105,32 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, * @sk: socket which provides the dst cache and route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup + * @connected: whether @sk is connected or not * * This function performs a route lookup on the given flow with the * possibility of using the cached route in the socket if it is valid. * It will take the socket dst lock when operating on the dst cache. * As a result, this function can only be used in process context. * + * In addition, for a connected socket, cache the dst in the socket + * if the current cache is not valid. + * * It returns a valid dst pointer on success, or a pointer encoded * error code. */ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, -const struct in6_addr *final_dst) +const struct in6_addr *final_dst, +int connected) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); dst = ip6_sk_dst_check(sk, dst, fl6); - if (!dst) - dst = ip6_dst_lookup_flow(sk, fl6, final_dst); + if (dst) + return dst; + + dst = ip6_dst_lookup_flow(sk, fl6, final_dst); + if (connected && !IS_ERR(dst)) + ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); return dst; } diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index d12c55d..546f4a6 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -121,7 +121,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.tclass = np->tclass; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); - dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr); + dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 0); if (IS_ERR(dst)) return PTR_ERR(dst); rt = (struct rt6_info *) dst; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 52e3ea0..e49dac4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1289,7 +1289,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); - dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p); + dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, 0); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; -- 1.8.3.1
[PATCH net-next V2 0/4] Introduce adaptive TX interrupt moderation to net DIM
Net DIM is a library designed for dynamic interrupt moderation. It was implemented and optimized with receive side interrupts in mind, since these are usually the CPU expensive ones. This patch-set introduces adaptive transmit interrupt moderation to net DIM, complete with a usage in the mlx5e driver. Using adaptive TX behavior would reduce interrupt rate for multiple scenarios. Furthermore, it is essential for increasing bandwidth on cases where payload aggregation is required. v2: Rebased over proper tree. v1: Fix compilation issues due to missed function renaming. Tal Gilboa (4): net/dim: Rename *_get_profile() functions to *_get_rx_moderation() net/dim: Add "enabled" field to net_dim struct net/dim: Support adaptive TX moderation net/mlx5e: Enable adaptive-TX moderation drivers/net/ethernet/broadcom/bcmsysport.c | 6 +- drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c | 8 +-- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 6 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 5 +- drivers/net/ethernet/mellanox/mlx5/core/en_dim.c | 28 ++--- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 35 +++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 34 -- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 37 --- include/linux/net_dim.h| 72 +- 10 files changed, 173 insertions(+), 60 deletions(-) -- 1.8.3.1
[PATCH net-next V2 4/4] net/mlx5e: Enable adaptive-TX moderation
Add support for adaptive TX moderation. This greatly reduces TX interrupt rate and increases bandwidth, mostly for TCP bandwidth over ARM architecture (below). There is a slight single stream TCP with very large message sizes degradation (x86). In this case if there's any moderation on transmitted packets the bandwidth would reduce due to hitting TCP output limit. Since this is a synthetic case, this is still worth doing. Performance improvement (ConnectX-4Lx 40GbE, ARM) TCP 64B bandwidth with 1-50 streams increased 6-35%. TCP 64B bandwidth with 100-500 streams increased 20-70%. Performance improvement (ConnectX-5 100GbE, x86) Bandwidth: increased up to 40% (1024B with 10s of streams). Interrupt rate: reduced up to 50% (1024B with 1000s of streams). Signed-off-by: Tal Gilboa --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 +++ drivers/net/ethernet/mellanox/mlx5/core/en_dim.c | 24 +++--- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 37 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 22 + drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 37 -- 5 files changed, 96 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 2c18d2f..1a05db0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -327,6 +327,7 @@ enum { MLX5E_SQ_STATE_ENABLED, MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, + MLX5E_SQ_STATE_AM, }; struct mlx5e_sq_wqe_info { @@ -339,6 +340,7 @@ struct mlx5e_txqsq { /* dirtied @completion */ u16cc; u32dma_fifo_cc; + struct net_dim dim; /* Adaptive Moderation */ /* dirtied @xmit */ u16pc cacheline_aligned_in_smp; @@ -376,6 +378,7 @@ struct mlx5e_txqsq { struct work_struct recover_work; u64last_recover; } recover; + } cacheline_aligned_in_smp; struct mlx5e_xdpsq { @@ -1106,4 +1109,5 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, u16 max_channels, u16 mtu); u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); void mlx5e_rx_dim_work(struct work_struct *work); +void mlx5e_tx_dim_work(struct work_struct *work); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index 1b286e1..9cec351 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -33,16 +33,30 @@ #include #include "en.h" +static inline void +mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder, + struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) +{ + mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts); + dim->state = NET_DIM_START_MEASURE; +} + void mlx5e_rx_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, - work); + struct net_dim *dim = container_of(work, struct net_dim, work); struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim); struct net_dim_cq_moder cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); - mlx5_core_modify_cq_moderation(rq->mdev, &rq->cq.mcq, - cur_moder.usec, cur_moder.pkts); + mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq); +} - dim->state = NET_DIM_START_MEASURE; +void mlx5e_tx_dim_work(struct work_struct *work) +{ + struct net_dim *dim = container_of(work, struct net_dim, work); + struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim); + struct net_dim_cq_moder cur_moder = + net_dim_get_tx_moderation(dim->mode, dim->profile_ix); + + mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 66c71da..4629bf8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -389,15 +389,20 @@ static int mlx5e_set_channels(struct net_device *dev, int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { + struct net_dim_cq_moder *rx_moder, *tx_moder; + if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -EOPNOTSUPP; - coal->rx_coalesce_usecs = priv->channels.params.rx_cq_moderation.usec; - coal->rx_max_coalesced_frames = priv->channels.params.rx_cq_moderation.pkts
[PATCH net-next V2 3/4] net/dim: Support adaptive TX moderation
Interrupt moderation for TX traffic requires different profiles than RX interrupt moderation. The main goal here is to reduce interrupt rate and allow better payload aggregation by keeping SKBs in the TX queue a bit longer. Ping-pong behavior would get a profile with a short timer, so latency wouldn't increase for these scenarios. There's a slight degradtion in bandwidth for single stream with large message sizes, since net.ipv4.tcp_limit_output_bytes is limiting the allowed TX traffic, but with many streams performance is always improved. Performance improvements (ConnectX-5 100GbE) Bandwidth: increased up to 40% (1024B with 10s of streams). Interrupt rate: reduced up to 50% (1024B with 1000s of streams). Performance degradation (ConnectX-5 100GbE) Bandwidth: up to 10% decrease single stream TCP (1MB message size from 51Gb/s to 47Gb/s). *Both cases with TX EQE based moderation enabled. Signed-off-by: Tal Gilboa --- include/linux/net_dim.h | 64 +++-- 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h index e6623cf..95a7a2f 100644 --- a/include/linux/net_dim.h +++ b/include/linux/net_dim.h @@ -104,11 +104,12 @@ enum { #define NET_DIM_PARAMS_NUM_PROFILES 5 /* Adaptive moderation profiles */ #define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 +#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 #define NET_DIM_DEF_PROFILE_CQE 1 #define NET_DIM_DEF_PROFILE_EQE 1 /* All profiles sizes must be NET_PARAMS_DIM_NUM_PROFILES */ -#define NET_DIM_EQE_PROFILES { \ +#define NET_DIM_RX_EQE_PROFILES { \ {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ @@ -116,7 +117,7 @@ enum { {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ } -#define NET_DIM_CQE_PROFILES { \ +#define NET_DIM_RX_CQE_PROFILES { \ {2, 256}, \ {8, 128}, \ {16, 64}, \ @@ -124,16 +125,38 @@ enum { {64, 64} \ } +#define NET_DIM_TX_EQE_PROFILES { \ + {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ +} + +#define NET_DIM_TX_CQE_PROFILES { \ + {5, 128}, \ + {8, 64}, \ + {16, 32}, \ + {32, 32}, \ + {64, 32} \ +} + +static const struct net_dim_cq_moder +rx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { + NET_DIM_RX_EQE_PROFILES, + NET_DIM_RX_CQE_PROFILES, +}; + static const struct net_dim_cq_moder -profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { - NET_DIM_EQE_PROFILES, - NET_DIM_CQE_PROFILES, +tx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { + NET_DIM_TX_EQE_PROFILES, + NET_DIM_TX_CQE_PROFILES, }; static inline struct net_dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix) { - struct net_dim_cq_moder cq_moder = profile[cq_period_mode][ix]; + struct net_dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix]; cq_moder.cq_period_mode = cq_period_mode; cq_moder.enabled = true; @@ -141,16 +164,31 @@ enum { } static inline struct net_dim_cq_moder -net_dim_get_def_rx_moderation(u8 rx_cq_period_mode) +net_dim_get_def_rx_moderation(u8 cq_period_mode) { - int default_profile_ix; + u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; - if (rx_cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE) - default_profile_ix = NET_DIM_DEF_PROFILE_CQE; - else /* NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE */ - default_profile_ix = NET_DIM_DEF_PROFILE_EQE; + return net_dim_get_rx_moderation(cq_period_mode, profile_ix); +} + +static inline struct net_dim_cq_moder +net_dim_get_tx_moderation(u8 cq_period_mode, int ix) +{ + struct net_dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix]; + + cq_moder.cq_period_mode = cq_period_mode; + cq_moder.enabled = true; + return cq_moder; +} + +static inline struct net_dim_cq_moder +net_dim_get_def_tx_moderation(u8 cq_period_mode) +{ + u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; - return net_dim_get_rx_moderation(rx_cq_period_mode, default_profile_ix); + return net_dim_get_tx_moderation(cq_period_mode, profile_ix); } static inline bool net_dim_on_top(struct net_dim *dim) -- 1.8.3.1
[PATCH net-next V2 2/4] net/dim: Add "enabled" field to net_dim struct
Preparation for introducing adaptive TX to net DIM. Signed-off-by: Tal Gilboa --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 10 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_main.c| 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- include/linux/net_dim.h | 2 ++ 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 30cad07..2c18d2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -238,7 +238,6 @@ struct mlx5e_params { u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; bool vlan_strip_disable; bool scatter_fcs_en; - bool rx_dim_enabled; u32 lro_timeout; u32 pflags; struct bpf_prog *xdp_prog; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 37fd024..66c71da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -394,9 +394,10 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, coal->rx_coalesce_usecs = priv->channels.params.rx_cq_moderation.usec; coal->rx_max_coalesced_frames = priv->channels.params.rx_cq_moderation.pkts; + coal->use_adaptive_rx_coalesce = + priv->channels.params.rx_cq_moderation.enabled; coal->tx_coalesce_usecs = priv->channels.params.tx_cq_moderation.usec; coal->tx_max_coalesced_frames = priv->channels.params.tx_cq_moderation.pkts; - coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled; return 0; } @@ -467,7 +468,8 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, new_channels.params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames; new_channels.params.rx_cq_moderation.usec = coal->rx_coalesce_usecs; new_channels.params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames; - new_channels.params.rx_dim_enabled= !!coal->use_adaptive_rx_coalesce; + new_channels.params.rx_cq_moderation.enabled = + !!coal->use_adaptive_rx_coalesce; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; @@ -475,7 +477,9 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, } /* we are opened */ - reset = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled; + reset = !!coal->use_adaptive_rx_coalesce != + priv->channels.params.rx_cq_moderation.enabled; + if (!reset) { mlx5e_set_priv_channels_coalesce(priv, coal); priv->channels.params = new_channels.params; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 37a89b7..9bcc578 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -781,7 +781,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (err) goto err_destroy_rq; - if (params->rx_dim_enabled) + if (params->rx_cq_moderation.enabled) c->rq.state |= BIT(MLX5E_RQ_STATE_AM); return 0; @@ -4103,7 +4103,7 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) params->rx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; - if (params->rx_dim_enabled) { + if (params->rx_cq_moderation.enabled) { switch (cq_period_mode) { case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: params->rx_cq_moderation = @@ -4178,7 +4178,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + params->rx_cq_moderation.enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode); mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index d8f68e4..a2918a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -888,7 +888,7 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_R
[PATCH net-next V2 1/4] net/dim: Rename *_get_profile() functions to *_get_rx_moderation()
Preparation for introducing adaptive TX to net DIM. Signed-off-by: Tal Gilboa --- drivers/net/ethernet/broadcom/bcmsysport.c| 6 +++--- drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c | 8 drivers/net/ethernet/broadcom/genet/bcmgenet.c| 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_dim.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 -- include/linux/net_dim.h | 12 ++-- 6 files changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 4a75b1d..98c5183 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -654,7 +654,7 @@ static int bcm_sysport_set_coalesce(struct net_device *dev, pkts = priv->rx_max_coalesced_frames; if (ec->use_adaptive_rx_coalesce && !priv->dim.use_dim) { - moder = net_dim_get_def_profile(priv->dim.dim.mode); + moder = net_dim_get_def_rx_moderation(priv->dim.dim.mode); usecs = moder.usec; pkts = moder.pkts; } @@ -1064,7 +1064,7 @@ static void bcm_sysport_dim_work(struct work_struct *work) struct bcm_sysport_priv *priv = container_of(ndim, struct bcm_sysport_priv, dim); struct net_dim_cq_moder cur_profile = - net_dim_get_profile(dim->mode, dim->profile_ix); + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); bcm_sysport_set_rx_coalesce(priv, cur_profile.usec, cur_profile.pkts); dim->state = NET_DIM_START_MEASURE; @@ -1436,7 +1436,7 @@ static void bcm_sysport_init_rx_coalesce(struct bcm_sysport_priv *priv) /* If DIM was enabled, re-apply default parameters */ if (dim->use_dim) { - moder = net_dim_get_def_profile(dim->dim.mode); + moder = net_dim_get_def_rx_moderation(dim->dim.mode); usecs = moder.usec; pkts = moder.pkts; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c index 408dd19..afa97c8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c @@ -21,11 +21,11 @@ void bnxt_dim_work(struct work_struct *work) struct bnxt_napi *bnapi = container_of(cpr, struct bnxt_napi, cp_ring); - struct net_dim_cq_moder cur_profile = net_dim_get_profile(dim->mode, - dim->profile_ix); + struct net_dim_cq_moder cur_moder = + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); - cpr->rx_ring_coal.coal_ticks = cur_profile.usec; - cpr->rx_ring_coal.coal_bufs = cur_profile.pkts; + cpr->rx_ring_coal.coal_ticks = cur_moder.usec; + cpr->rx_ring_coal.coal_bufs = cur_moder.pkts; bnxt_hwrm_set_ring_coal(bnapi->bp, bnapi); dim->state = NET_DIM_START_MEASURE; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 264fb37..3c3b780 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -652,7 +652,7 @@ static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring, pkts = ring->rx_max_coalesced_frames; if (ec->use_adaptive_rx_coalesce && !ring->dim.use_dim) { - moder = net_dim_get_def_profile(ring->dim.dim.mode); + moder = net_dim_get_def_rx_moderation(ring->dim.dim.mode); usecs = moder.usec; pkts = moder.pkts; } @@ -1924,7 +1924,7 @@ static void bcmgenet_dim_work(struct work_struct *work) struct bcmgenet_rx_ring *ring = container_of(ndim, struct bcmgenet_rx_ring, dim); struct net_dim_cq_moder cur_profile = - net_dim_get_profile(dim->mode, dim->profile_ix); + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); bcmgenet_set_rx_coalesce(ring, cur_profile.usec, cur_profile.pkts); dim->state = NET_DIM_START_MEASURE; @@ -2101,7 +2101,7 @@ static void bcmgenet_init_rx_coalesce(struct bcmgenet_rx_ring *ring) /* If DIM was enabled, re-apply default parameters */ if (dim->use_dim) { - moder = net_dim_get_def_profile(dim->dim.mode); + moder = net_dim_get_def_rx_moderation(dim->dim.mode); usecs = moder.usec; pkts = moder.pkts; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index 602851a..1b286e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethe
Re: [PATCH v5 03/14] PCI: Add pcie_bandwidth_capable() to compute max supported link bandwidth
On Mon, Apr 02, 2018 at 10:34:58AM +0300, Tal Gilboa wrote: > On 4/2/2018 3:40 AM, Bjorn Helgaas wrote: > > On Sun, Apr 01, 2018 at 11:38:53PM +0300, Tal Gilboa wrote: > > > On 3/31/2018 12:05 AM, Bjorn Helgaas wrote: > > > > From: Tal Gilboa > > > > > > > > Add pcie_bandwidth_capable() to compute the max link bandwidth > > > > supported by > > > > a device, based on the max link speed and width, adjusted by the > > > > encoding > > > > overhead. > > > > > > > > The maximum bandwidth of the link is computed as: > > > > > > > > max_link_speed * max_link_width * (1 - encoding_overhead) > > > > > > > > The encoding overhead is about 20% for 2.5 and 5.0 GT/s links using > > > > 8b/10b > > > > encoding, and about 1.5% for 8 GT/s or higher speed links using > > > > 128b/130b > > > > encoding. > > > > > > > > Signed-off-by: Tal Gilboa > > > > [bhelgaas: adjust for pcie_get_speed_cap() and pcie_get_width_cap() > > > > signatures, don't export outside drivers/pci] > > > > Signed-off-by: Bjorn Helgaas > > > > Reviewed-by: Tariq Toukan > > > > --- > > > >drivers/pci/pci.c | 21 + > > > >drivers/pci/pci.h |9 + > > > >2 files changed, 30 insertions(+) > > > > > > > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > > > > index 43075be79388..9ce89e254197 100644 > > > > --- a/drivers/pci/pci.c > > > > +++ b/drivers/pci/pci.c > > > > @@ -5208,6 +5208,27 @@ enum pcie_link_width pcie_get_width_cap(struct > > > > pci_dev *dev) > > > > return PCIE_LNK_WIDTH_UNKNOWN; > > > >} > > > > +/** > > > > + * pcie_bandwidth_capable - calculates a PCI device's link bandwidth > > > > capability > > > > + * @dev: PCI device > > > > + * @speed: storage for link speed > > > > + * @width: storage for link width > > > > + * > > > > + * Calculate a PCI device's link bandwidth by querying for its link > > > > speed > > > > + * and width, multiplying them, and applying encoding overhead. > > > > + */ > > > > +u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed > > > > *speed, > > > > + enum pcie_link_width *width) > > > > +{ > > > > + *speed = pcie_get_speed_cap(dev); > > > > + *width = pcie_get_width_cap(dev); > > > > + > > > > + if (*speed == PCI_SPEED_UNKNOWN || *width == > > > > PCIE_LNK_WIDTH_UNKNOWN) > > > > + return 0; > > > > + > > > > + return *width * PCIE_SPEED2MBS_ENC(*speed); > > > > +} > > > > + > > > >/** > > > > * pci_select_bars - Make BAR mask from the type of resource > > > > * @dev: the PCI device for which BAR mask is made > > > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h > > > > index 66738f1050c0..2a50172b9803 100644 > > > > --- a/drivers/pci/pci.h > > > > +++ b/drivers/pci/pci.h > > > > @@ -261,8 +261,17 @@ void pci_disable_bridge_window(struct pci_dev > > > > *dev); > > > > (speed) == PCIE_SPEED_2_5GT ? "2.5 GT/s" : \ > > > > "Unknown speed") > > > > +/* PCIe speed to Mb/s with encoding overhead: 20% for gen2, ~1.5% for > > > > gen3 */ > > > > +#define PCIE_SPEED2MBS_ENC(speed) \ > > > > > > Missing gen4. > > > > I made it "gen3+". I think that's accurate, isn't it? The spec > > doesn't seem to actually use "gen3" as a specific term, but sec 4.2.2 > > says rates of 8 GT/s or higher (which I think includes gen3 and gen4) > > use 128b/130b encoding. > > > > I meant that PCIE_SPEED_16_0GT will return 0 from this macro since it wasn't > added. Need to return 15754. Oh, duh, of course! Sorry for being dense. What about the following? I included the calculation as opposed to just the magic numbers to try to make it clear how they're derived. This has the disadvantage of truncating the result instead of rounding, but I doubt that's significant in this context. If it is, we could use the magic numbers and put the computation in a comment. Another question: we currently deal in Mb/s, not MB/s. Mb/s has the advantage of sort of corresponding to the GT/s numbers, but using MB/s would have the advantage of smaller numbers that match the table here: https://en.wikipedia.org/wiki/PCI_Express#History_and_revisions, but I don't know what's most typical in user-facing situations. What's better? commit 946435491b35b7782157e9a4d1bd73071fba7709 Author: Tal Gilboa Date: Fri Mar 30 08:32:03 2018 -0500 PCI: Add pcie_bandwidth_capable() to compute max supported link bandwidth Add pcie_bandwidth_capable() to compute the max link bandwidth supported by a device, based on the max link speed and width, adjusted by the encoding overhead. The maximum bandwidth of the link is computed as: max_link_width * max_link_speed * (1 - encoding_overhead) 2.5 and 5.0 GT/s links use 8b/10b encoding, which reduces the raw bandwidth available by 20%; 8.0 GT/s and faster links use 128b/130b encoding, which reduces it by about 1.5%. The result is in Mb/s, i.e., megabits/second, of r
Re: [PATCH net-next] ipv6: frags: fix /proc/sys/net/ipv6/ip6frag_low_thresh
From: Eric Dumazet Date: Sun, 1 Apr 2018 21:57:59 -0700 > I forgot to change ip6frag_low_thresh proc_handler > from proc_dointvec_minmax to proc_doulongvec_minmax > > Fixes: 3e67f106f619 ("inet: frags: break the 2GB limit for frags storage") > Signed-off-by: Eric Dumazet > Reported-by: Maciej Żenczykowski Applied, thanks Eric.
Re: [PATCH v3 2/2] net: usb: asix88179_178a: de-duplicate code
From: Marc Zyngier Date: Mon, 02 Apr 2018 10:45:40 +0100 > What has changed between this patch and the previous one? Having a bit > of a change-log would certainly help. Also, I would have appreciated a > reply to the questions I had on v2 before you posted a third version. Agreed, and I'm not applying these patches until this is sorted out and explained properly.
Re: [PATCH net-next 0/5] virtio-net: Add SCTP checksum offload support
From: Vladislav Yasevich Date: Mon, 2 Apr 2018 09:40:01 -0400 > Now that we have SCTP offload capabilities in the kernel, we can add > them to virtio as well. First step is SCTP checksum. Vlad, the net-next tree is closed, please resubmit this when the merge window is over and the net-next tree opens back up. Thank you.
Re: [PATCH net-next V2 4/4] net/mlx5e: Enable adaptive-TX moderation
From: Tal Gilboa Date: Mon, 2 Apr 2018 16:59:34 +0300 > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c > b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c > index 1b286e1..9cec351 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c > @@ -33,16 +33,30 @@ > #include > #include "en.h" > > +static inline void > +mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder, > + struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) Do not use inline in foo.c files, let the compiler decide. > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c > b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c > index f292bb3..ff1d5fe 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c > @@ -44,6 +44,30 @@ static inline bool mlx5e_channel_no_affinity_change(struct > mlx5e_channel *c) > return cpumask_test_cpu(current_cpu, aff); > } > > +static inline void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) > +{ > + struct net_dim_sample dim_sample; Likewise. > +static inline void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) > +{ Likewise.
[PATCH 13/15] ARM: pxa: remove the DMA IO resources
As the last driver using the former mechanism to acquire the DMA requestor line has be converted to the dma_slave_map, remove all these resources from the PXA devices. Signed-off-by: Robert Jarzmik --- arch/arm/mach-pxa/devices.c | 136 1 file changed, 136 deletions(-) diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c index da67ebe9a7d5..c0b3c90fd67f 100644 --- a/arch/arm/mach-pxa/devices.c +++ b/arch/arm/mach-pxa/devices.c @@ -61,16 +61,6 @@ static struct resource pxamci_resources[] = { .end= IRQ_MMC, .flags = IORESOURCE_IRQ, }, - [2] = { - .start = 21, - .end= 21, - .flags = IORESOURCE_DMA, - }, - [3] = { - .start = 22, - .end= 22, - .flags = IORESOURCE_DMA, - }, }; static u64 pxamci_dmamask = 0xUL; @@ -408,16 +398,6 @@ static struct resource pxa_ir_resources[] = { .end= 0x40700023, .flags = IORESOURCE_MEM, }, - [5] = { - .start = 17, - .end= 17, - .flags = IORESOURCE_DMA, - }, - [6] = { - .start = 18, - .end= 18, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa_device_ficp = { @@ -546,18 +526,6 @@ static struct resource pxa25x_resource_ssp[] = { .end= IRQ_SSP, .flags = IORESOURCE_IRQ, }, - [2] = { - /* DRCMR for RX */ - .start = 13, - .end= 13, - .flags = IORESOURCE_DMA, - }, - [3] = { - /* DRCMR for TX */ - .start = 14, - .end= 14, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa25x_device_ssp = { @@ -584,18 +552,6 @@ static struct resource pxa25x_resource_nssp[] = { .end= IRQ_NSSP, .flags = IORESOURCE_IRQ, }, - [2] = { - /* DRCMR for RX */ - .start = 15, - .end= 15, - .flags = IORESOURCE_DMA, - }, - [3] = { - /* DRCMR for TX */ - .start = 16, - .end= 16, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa25x_device_nssp = { @@ -622,18 +578,6 @@ static struct resource pxa25x_resource_assp[] = { .end= IRQ_ASSP, .flags = IORESOURCE_IRQ, }, - [2] = { - /* DRCMR for RX */ - .start = 23, - .end= 23, - .flags = IORESOURCE_DMA, - }, - [3] = { - /* DRCMR for TX */ - .start = 24, - .end= 24, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa25x_device_assp = { @@ -752,18 +696,6 @@ static struct resource pxa27x_resource_ssp1[] = { .end= IRQ_SSP, .flags = IORESOURCE_IRQ, }, - [2] = { - /* DRCMR for RX */ - .start = 13, - .end= 13, - .flags = IORESOURCE_DMA, - }, - [3] = { - /* DRCMR for TX */ - .start = 14, - .end= 14, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa27x_device_ssp1 = { @@ -790,18 +722,6 @@ static struct resource pxa27x_resource_ssp2[] = { .end= IRQ_SSP2, .flags = IORESOURCE_IRQ, }, - [2] = { - /* DRCMR for RX */ - .start = 15, - .end= 15, - .flags = IORESOURCE_DMA, - }, - [3] = { - /* DRCMR for TX */ - .start = 16, - .end= 16, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa27x_device_ssp2 = { @@ -828,18 +748,6 @@ static struct resource pxa27x_resource_ssp3[] = { .end= IRQ_SSP3, .flags = IORESOURCE_IRQ, }, - [2] = { - /* DRCMR for RX */ - .start = 66, - .end= 66, - .flags = IORESOURCE_DMA, - }, - [3] = { - /* DRCMR for TX */ - .start = 67, - .end= 67, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa27x_device_ssp3 = { @@ -896,16 +804,6 @@ static struct resource pxa3xx_resources_mci2[] = { .end= IRQ_MMC2, .flags = IORESOURCE_IRQ, }, - [2] = { - .start = 93, - .end= 93, - .flags = IORESOURCE_DMA, - }, - [3] = { - .start = 9
[PATCH 09/15] net: irda: pxaficp_ir: remove the dmaengine compat need
As the pxa architecture switched towards the dmaengine slave map, the old compatibility mechanism to acquire the dma requestor line number and priority are not needed anymore. This patch simplifies the dma resource acquisition, using the more generic function dma_request_slave_channel(). Signed-off-by: Robert Jarzmik --- drivers/staging/irda/drivers/pxaficp_ir.c | 14 ++ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/staging/irda/drivers/pxaficp_ir.c b/drivers/staging/irda/drivers/pxaficp_ir.c index 2ea00a6531f9..9dd6e21dc11e 100644 --- a/drivers/staging/irda/drivers/pxaficp_ir.c +++ b/drivers/staging/irda/drivers/pxaficp_ir.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -735,9 +734,7 @@ static void pxa_irda_shutdown(struct pxa_irda *si) static int pxa_irda_start(struct net_device *dev) { struct pxa_irda *si = netdev_priv(dev); - dma_cap_mask_t mask; struct dma_slave_config config; - struct pxad_param param; int err; si->speed = 9600; @@ -757,9 +754,6 @@ static int pxa_irda_start(struct net_device *dev) disable_irq(si->icp_irq); err = -EBUSY; - dma_cap_zero(mask); - dma_cap_set(DMA_SLAVE, mask); - param.prio = PXAD_PRIO_LOWEST; memset(&config, 0, sizeof(config)); config.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; @@ -769,15 +763,11 @@ static int pxa_irda_start(struct net_device *dev) config.src_maxburst = 32; config.dst_maxburst = 32; - param.drcmr = si->drcmr_rx; - si->rxdma = dma_request_slave_channel_compat(mask, pxad_filter_fn, -¶m, &dev->dev, "rx"); + si->rxdma = dma_request_slave_channel(&dev->dev, "rx"); if (!si->rxdma) goto err_rx_dma; - param.drcmr = si->drcmr_tx; - si->txdma = dma_request_slave_channel_compat(mask, pxad_filter_fn, -¶m, &dev->dev, "tx"); + si->txdma = dma_request_slave_channel(&dev->dev, "tx"); if (!si->txdma) goto err_tx_dma; -- 2.11.0
[PATCH 14/15] ARM: pxa: change SSP devices allocation
In order to prepare for the dma_slave_map change for SSP DMA channels allocation, the SSP platform devices will now include a platform data structure which in turn selects which dma channel has to be used for data transfers, especially the PCM ones. Signed-off-by: Robert Jarzmik --- arch/arm/mach-pxa/devices.c| 78 +- arch/arm/mach-pxa/devices.h| 14 ++ arch/arm/mach-pxa/include/mach/audio.h | 12 ++ arch/arm/mach-pxa/pxa25x.c | 4 +- arch/arm/mach-pxa/pxa27x.c | 4 +- arch/arm/mach-pxa/pxa3xx.c | 5 +-- 6 files changed, 86 insertions(+), 31 deletions(-) diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c index c0b3c90fd67f..955d255dc4f4 100644 --- a/arch/arm/mach-pxa/devices.c +++ b/arch/arm/mach-pxa/devices.c @@ -481,6 +481,17 @@ void __init pxa_set_ac97_info(pxa2xx_audio_ops_t *ops) pxa_register_device(&pxa_device_ac97, ops); } +static struct pxa_ssp_info pxa_ssp_infos[] = { + { .dma_chan_rx_name = "ssp1_rx", .dma_chan_tx_name = "ssp1_tx", }, + { .dma_chan_rx_name = "ssp1_rx", .dma_chan_tx_name = "ssp1_tx", }, + { .dma_chan_rx_name = "ssp2_rx", .dma_chan_tx_name = "ssp2_tx", }, + { .dma_chan_rx_name = "ssp2_rx", .dma_chan_tx_name = "ssp2_tx", }, + { .dma_chan_rx_name = "ssp3_rx", .dma_chan_tx_name = "ssp3_tx", }, + { .dma_chan_rx_name = "ssp3_rx", .dma_chan_tx_name = "ssp3_tx", }, + { .dma_chan_rx_name = "ssp4_rx", .dma_chan_tx_name = "ssp4_tx", }, + { .dma_chan_rx_name = "ssp4_rx", .dma_chan_tx_name = "ssp4_tx", }, +}; + #ifdef CONFIG_PXA25x static struct resource pxa25x_resource_pwm0[] = { @@ -528,7 +539,7 @@ static struct resource pxa25x_resource_ssp[] = { }, }; -struct platform_device pxa25x_device_ssp = { +static struct platform_device pxa25x_device_ssp = { .name = "pxa25x-ssp", .id = 0, .dev= { @@ -554,7 +565,7 @@ static struct resource pxa25x_resource_nssp[] = { }, }; -struct platform_device pxa25x_device_nssp = { +static struct platform_device pxa25x_device_nssp = { .name = "pxa25x-nssp", .id = 1, .dev= { @@ -580,7 +591,7 @@ static struct resource pxa25x_resource_assp[] = { }, }; -struct platform_device pxa25x_device_assp = { +static struct platform_device pxa25x_device_assp = { /* ASSP is basically equivalent to NSSP */ .name = "pxa25x-nssp", .id = 2, @@ -591,6 +602,22 @@ struct platform_device pxa25x_device_assp = { .resource = pxa25x_resource_assp, .num_resources = ARRAY_SIZE(pxa25x_resource_assp), }; + +static struct platform_device *pxa25x_device_ssps[] = { + &pxa25x_device_ssp, + &pxa25x_device_nssp, + &pxa25x_device_assp, +}; + +void __init pxa25x_set_ssp_info(void) +{ + int ssp; + + for (ssp = 0; ssp < ARRAY_SIZE(pxa25x_device_ssps); ssp++) + pxa_register_device(pxa25x_device_ssps[ssp], + &pxa_ssp_infos[ssp]); +} + #endif /* CONFIG_PXA25x */ #if defined(CONFIG_PXA27x) || defined(CONFIG_PXA3xx) @@ -698,7 +725,7 @@ static struct resource pxa27x_resource_ssp1[] = { }, }; -struct platform_device pxa27x_device_ssp1 = { +static struct platform_device pxa27x_device_ssp1 = { .name = "pxa27x-ssp", .id = 0, .dev= { @@ -724,7 +751,7 @@ static struct resource pxa27x_resource_ssp2[] = { }, }; -struct platform_device pxa27x_device_ssp2 = { +static struct platform_device pxa27x_device_ssp2 = { .name = "pxa27x-ssp", .id = 1, .dev= { @@ -750,7 +777,7 @@ static struct resource pxa27x_resource_ssp3[] = { }, }; -struct platform_device pxa27x_device_ssp3 = { +static struct platform_device pxa27x_device_ssp3 = { .name = "pxa27x-ssp", .id = 2, .dev= { @@ -761,6 +788,21 @@ struct platform_device pxa27x_device_ssp3 = { .num_resources = ARRAY_SIZE(pxa27x_resource_ssp3), }; +static struct platform_device *pxa27x_device_ssps[] = { + &pxa27x_device_ssp1, + &pxa27x_device_ssp2, + &pxa27x_device_ssp3, +}; + +void __init pxa27x_set_ssp_info(void) +{ + int ssp; + + for (ssp = 0; ssp < ARRAY_SIZE(pxa27x_device_ssps); ssp++) + pxa_register_device(pxa27x_device_ssps[ssp], + &pxa_ssp_infos[ssp]); +} + static struct resource pxa27x_resource_pwm0[] = { [0] = { .start = 0x40b0, @@ -951,7 +993,7 @@ static struct resource pxa3xx_resource_ssp4[] = { * make the driver set the correct internal type, hence we provide specific * platform_devices for each of them. */ -struct platform_device pxa3xx_device_ssp1 = { +static struct
[PATCH 15/15] ARM: pxa: change SSP DMA channels allocation
Now the dma_slave_map is available for PXA architecture, switch the SSP device to it. This specifically means that : - for platform data based machines, the DMA requestor channels are extracted from platform data and passed further to the SSP user, ie. usually the pxa-pcm-audio driver - for device tree platforms, the dma node should be hooked into the pxa-pcm-audio node. Signed-off-by: Robert Jarzmik --- arch/arm/plat-pxa/ssp.c| 50 +- include/linux/pxa2xx_ssp.h | 4 ++-- sound/soc/pxa/pxa-ssp.c| 5 ++--- 3 files changed, 9 insertions(+), 50 deletions(-) diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index ba13f793fbce..3457f01e3340 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -16,6 +16,7 @@ * Author: Liam Girdwood */ +#include #include #include #include @@ -116,6 +117,7 @@ static int pxa_ssp_probe(struct platform_device *pdev) struct resource *res; struct ssp_device *ssp; struct device *dev = &pdev->dev; + struct pxa_ssp_info *info = dev_get_platdata(dev); ssp = devm_kzalloc(dev, sizeof(struct ssp_device), GFP_KERNEL); if (ssp == NULL) @@ -127,51 +129,9 @@ static int pxa_ssp_probe(struct platform_device *pdev) if (IS_ERR(ssp->clk)) return PTR_ERR(ssp->clk); - if (dev->of_node) { - struct of_phandle_args dma_spec; - struct device_node *np = dev->of_node; - int ret; - - /* -* FIXME: we should allocate the DMA channel from this -* context and pass the channel down to the ssp users. -* For now, we lookup the rx and tx indices manually -*/ - - /* rx */ - ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells", -0, &dma_spec); - - if (ret) { - dev_err(dev, "Can't parse dmas property\n"); - return -ENODEV; - } - ssp->drcmr_rx = dma_spec.args[0]; - of_node_put(dma_spec.np); - - /* tx */ - ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells", -1, &dma_spec); - if (ret) { - dev_err(dev, "Can't parse dmas property\n"); - return -ENODEV; - } - ssp->drcmr_tx = dma_spec.args[0]; - of_node_put(dma_spec.np); - } else { - res = platform_get_resource(pdev, IORESOURCE_DMA, 0); - if (res == NULL) { - dev_err(dev, "no SSP RX DRCMR defined\n"); - return -ENODEV; - } - ssp->drcmr_rx = res->start; - - res = platform_get_resource(pdev, IORESOURCE_DMA, 1); - if (res == NULL) { - dev_err(dev, "no SSP TX DRCMR defined\n"); - return -ENODEV; - } - ssp->drcmr_tx = res->start; + if (!dev->of_node && info) { + ssp->dma_chan_rx = info->dma_chan_rx_name; + ssp->dma_chan_tx = info->dma_chan_tx_name; } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 8461b18e4608..99c99d397e4d 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -212,9 +212,9 @@ struct ssp_device { int type; int use_count; int irq; - int drcmr_rx; - int drcmr_tx; + const char *dma_chan_rx; + const char *dma_chan_tx; struct device_node *of_node; }; diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index 0291c7cb64eb..a0189b88f1d2 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -104,9 +104,8 @@ static int pxa_ssp_startup(struct snd_pcm_substream *substream, dma = kzalloc(sizeof(struct snd_dmaengine_dai_dma_data), GFP_KERNEL); if (!dma) return -ENOMEM; - - dma->filter_data = substream->stream == SNDRV_PCM_STREAM_PLAYBACK ? - &ssp->drcmr_tx : &ssp->drcmr_rx; + dma->chan_name = substream->stream == SNDRV_PCM_STREAM_PLAYBACK ? + ssp->dma_chan_tx : ssp->dma_chan_rx; snd_soc_dai_set_dma_data(cpu_dai, substream, dma); -- 2.11.0
[PATCH 12/15] dmaengine: pxa: make the filter function internal
As the pxa architecture and all its related drivers do not rely anymore on the filter function, thanks to the slave map conversion, make pxad_filter_fn() static, and remove it from the global namespace. Signed-off-by: Robert Jarzmik --- drivers/dma/pxa_dma.c | 5 ++--- include/linux/dma/pxa-dma.h | 11 --- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index 9505334f9c6e..a332ad1d7dfb 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -179,7 +179,7 @@ static unsigned int pxad_drcmr(unsigned int line) return 0x1000 + line * 4; } -bool pxad_filter_fn(struct dma_chan *chan, void *param); +static bool pxad_filter_fn(struct dma_chan *chan, void *param); /* * Debug fs @@ -1496,7 +1496,7 @@ static struct platform_driver pxad_driver = { .remove = pxad_remove, }; -bool pxad_filter_fn(struct dma_chan *chan, void *param) +static bool pxad_filter_fn(struct dma_chan *chan, void *param) { struct pxad_chan *c = to_pxad_chan(chan); struct pxad_param *p = param; @@ -1509,7 +1509,6 @@ bool pxad_filter_fn(struct dma_chan *chan, void *param) return true; } -EXPORT_SYMBOL_GPL(pxad_filter_fn); module_platform_driver(pxad_driver); diff --git a/include/linux/dma/pxa-dma.h b/include/linux/dma/pxa-dma.h index 9fc594f69eff..fceb5df07097 100644 --- a/include/linux/dma/pxa-dma.h +++ b/include/linux/dma/pxa-dma.h @@ -23,15 +23,4 @@ struct pxad_param { enum pxad_chan_prio prio; }; -struct dma_chan; - -#ifdef CONFIG_PXA_DMA -bool pxad_filter_fn(struct dma_chan *chan, void *param); -#else -static inline bool pxad_filter_fn(struct dma_chan *chan, void *param) -{ - return false; -} -#endif - #endif /* _PXA_DMA_H_ */ -- 2.11.0
[PATCH 10/15] ata: pata_pxa: remove the dmaengine compat need
As the pxa architecture switched towards the dmaengine slave map, the old compatibility mechanism to acquire the dma requestor line number and priority are not needed anymore. This patch simplifies the dma resource acquisition, using the more generic function dma_request_slave_channel(). Signed-off-by: Robert Jarzmik --- drivers/ata/pata_pxa.c | 10 +- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/ata/pata_pxa.c b/drivers/ata/pata_pxa.c index f6c46e9a4dc0..e8b6a2e464c9 100644 --- a/drivers/ata/pata_pxa.c +++ b/drivers/ata/pata_pxa.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -180,8 +179,6 @@ static int pxa_ata_probe(struct platform_device *pdev) struct resource *irq_res; struct pata_pxa_pdata *pdata = dev_get_platdata(&pdev->dev); struct dma_slave_config config; - dma_cap_mask_t mask; - struct pxad_param param; int ret = 0; /* @@ -278,10 +275,6 @@ static int pxa_ata_probe(struct platform_device *pdev) ap->private_data = data; - dma_cap_zero(mask); - dma_cap_set(DMA_SLAVE, mask); - param.prio = PXAD_PRIO_LOWEST; - param.drcmr = pdata->dma_dreq; memset(&config, 0, sizeof(config)); config.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; config.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; @@ -294,8 +287,7 @@ static int pxa_ata_probe(struct platform_device *pdev) * Request the DMA channel */ data->dma_chan = - dma_request_slave_channel_compat(mask, pxad_filter_fn, -¶m, &pdev->dev, "data"); + dma_request_slave_channel(&pdev->dev, "data"); if (!data->dma_chan) return -EBUSY; ret = dmaengine_slave_config(data->dma_chan, &config); -- 2.11.0
[PATCH 11/15] dmaengine: pxa: document pxad_param
Add some documentation for the pxad_param structure, and describe the contract behind the minimal required priority of a DMA channel. Signed-off-by: Robert Jarzmik --- include/linux/dma/pxa-dma.h | 9 + 1 file changed, 9 insertions(+) diff --git a/include/linux/dma/pxa-dma.h b/include/linux/dma/pxa-dma.h index e56ec7af4fd7..9fc594f69eff 100644 --- a/include/linux/dma/pxa-dma.h +++ b/include/linux/dma/pxa-dma.h @@ -9,6 +9,15 @@ enum pxad_chan_prio { PXAD_PRIO_LOWEST, }; +/** + * struct pxad_param - dma channel request parameters + * @drcmr: requestor line number + * @prio: minimal mandatory priority of the channel + * + * If a requested channel is granted, its priority will be at least @prio, + * ie. if PXAD_PRIO_LOW is required, the requested channel will be either + * PXAD_PRIO_LOW, PXAD_PRIO_NORMAL or PXAD_PRIO_HIGHEST. + */ struct pxad_param { unsigned int drcmr; enum pxad_chan_prio prio; -- 2.11.0
[PATCH 07/15] net: smc91x: remove the dmaengine compat need
From: Robert Jarzmik As the pxa architecture switched towards the dmaengine slave map, the old compatibility mechanism to acquire the dma requestor line number and priority are not needed anymore. This patch simplifies the dma resource acquisition, using the more generic function dma_request_slave_channel(). Signed-off-by: Robert Jarzmik --- drivers/net/ethernet/smsc/smc91x.c | 12 +--- drivers/net/ethernet/smsc/smc91x.h | 1 - 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 080428762858..4c600f430f6d 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2018,18 +2018,8 @@ static int smc_probe(struct net_device *dev, void __iomem *ioaddr, lp->cfg.flags |= SMC91X_USE_DMA; # endif if (lp->cfg.flags & SMC91X_USE_DMA) { - dma_cap_mask_t mask; - struct pxad_param param; - - dma_cap_zero(mask); - dma_cap_set(DMA_SLAVE, mask); - param.prio = PXAD_PRIO_LOWEST; - param.drcmr = -1UL; - lp->dma_chan = - dma_request_slave_channel_compat(mask, pxad_filter_fn, -¶m, &dev->dev, -"data"); + dma_request_slave_channel(lp->device, "data"); } #endif diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index 08b17adf0a65..e849b6c2fa60 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -327,7 +327,6 @@ struct smc_local { * as RX which can overrun memory and lose packets. */ #include -#include #ifdef SMC_insl #undef SMC_insl -- 2.11.0
[PATCH 08/15] ASoC: pxa: remove the dmaengine compat need
As the pxa architecture switched towards the dmaengine slave map, the old compatibility mechanism to acquire the dma requestor line number and priority are not needed anymore. This patch simplifies the dma resource acquisition, using the more generic function dma_request_slave_channel(). Signed-off-by: Robert Jarzmik --- sound/arm/pxa2xx-ac97.c | 14 ++ sound/arm/pxa2xx-pcm-lib.c | 6 +++--- sound/soc/pxa/pxa2xx-ac97.c | 32 +--- 3 files changed, 10 insertions(+), 42 deletions(-) diff --git a/sound/arm/pxa2xx-ac97.c b/sound/arm/pxa2xx-ac97.c index 4bc244c40f80..236a63cdaf9f 100644 --- a/sound/arm/pxa2xx-ac97.c +++ b/sound/arm/pxa2xx-ac97.c @@ -63,28 +63,18 @@ static struct snd_ac97_bus_ops pxa2xx_ac97_ops = { .reset = pxa2xx_ac97_legacy_reset, }; -static struct pxad_param pxa2xx_ac97_pcm_out_req = { - .prio = PXAD_PRIO_LOWEST, - .drcmr = 12, -}; - static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_out = { .addr = __PREG(PCDR), .addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, + .chan_name = "pcm_pcm_stereo_out", .maxburst = 32, - .filter_data= &pxa2xx_ac97_pcm_out_req, -}; - -static struct pxad_param pxa2xx_ac97_pcm_in_req = { - .prio = PXAD_PRIO_LOWEST, - .drcmr = 11, }; static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_in = { .addr = __PREG(PCDR), .addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, + .chan_name = "pcm_pcm_stereo_in", .maxburst = 32, - .filter_data= &pxa2xx_ac97_pcm_in_req, }; static struct snd_pcm *pxa2xx_ac97_pcm; diff --git a/sound/arm/pxa2xx-pcm-lib.c b/sound/arm/pxa2xx-pcm-lib.c index e8da3b8ee721..cbfaba60b79a 100644 --- a/sound/arm/pxa2xx-pcm-lib.c +++ b/sound/arm/pxa2xx-pcm-lib.c @@ -125,9 +125,9 @@ int __pxa2xx_pcm_open(struct snd_pcm_substream *substream) if (ret < 0) return ret; - return snd_dmaengine_pcm_open_request_chan(substream, - pxad_filter_fn, - dma_params->filter_data); + return snd_dmaengine_pcm_open( + substream, dma_request_slave_channel(rtd->platform->dev, +dma_params->chan_name)); } EXPORT_SYMBOL(__pxa2xx_pcm_open); diff --git a/sound/soc/pxa/pxa2xx-ac97.c b/sound/soc/pxa/pxa2xx-ac97.c index 803818aabee9..1b41c0f2a8fb 100644 --- a/sound/soc/pxa/pxa2xx-ac97.c +++ b/sound/soc/pxa/pxa2xx-ac97.c @@ -68,61 +68,39 @@ static struct snd_ac97_bus_ops pxa2xx_ac97_ops = { .reset = pxa2xx_ac97_cold_reset, }; -static struct pxad_param pxa2xx_ac97_pcm_stereo_in_req = { - .prio = PXAD_PRIO_LOWEST, - .drcmr = 11, -}; - static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_stereo_in = { .addr = __PREG(PCDR), .addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, + .chan_name = "pcm_pcm_stereo_in", .maxburst = 32, - .filter_data= &pxa2xx_ac97_pcm_stereo_in_req, -}; - -static struct pxad_param pxa2xx_ac97_pcm_stereo_out_req = { - .prio = PXAD_PRIO_LOWEST, - .drcmr = 12, }; static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_stereo_out = { .addr = __PREG(PCDR), .addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, + .chan_name = "pcm_pcm_stereo_out", .maxburst = 32, - .filter_data= &pxa2xx_ac97_pcm_stereo_out_req, }; -static struct pxad_param pxa2xx_ac97_pcm_aux_mono_out_req = { - .prio = PXAD_PRIO_LOWEST, - .drcmr = 10, -}; static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_aux_mono_out = { .addr = __PREG(MODR), .addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, + .chan_name = "pcm_aux_mono_out", .maxburst = 16, - .filter_data= &pxa2xx_ac97_pcm_aux_mono_out_req, }; -static struct pxad_param pxa2xx_ac97_pcm_aux_mono_in_req = { - .prio = PXAD_PRIO_LOWEST, - .drcmr = 9, -}; static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_aux_mono_in = { .addr = __PREG(MODR), .addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, + .chan_name = "pcm_aux_mono_in", .maxburst = 16, - .filter_data= &pxa2xx_ac97_pcm_aux_mono_in_req, }; -static struct pxad_param pxa2xx_ac97_pcm_aux_mic_mono_req = { - .prio = PXAD_PRIO_LOWEST, - .drcmr = 8, -}; static struct snd_dmaengine_dai_dma_data pxa2xx_ac97_pcm_mic_mono_in = { .addr = __PREG(MCDR), .addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, + .chan_name = "pcm_aux_mic_mono", .maxburst = 16, - .filter_data= &pxa2xx_ac97_pcm_aux_mic_mono_req, }; static int pxa2xx_ac97_hifi_startup(struct snd_pcm_substream *substream, -- 2.11.0
[PATCH 06/15] net: smc911x: remove the dmaengine compat need
From: Robert Jarzmik As the pxa architecture switched towards the dmaengine slave map, the old compatibility mechanism to acquire the dma requestor line number and priority are not needed anymore. This patch simplifies the dma resource acquisition, using the more generic function dma_request_slave_channel(). Signed-off-by: Robert Jarzmik --- drivers/net/ethernet/smsc/smc911x.c | 16 ++-- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 05157442a980..4c3713bd5caa 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -74,7 +74,6 @@ static const char version[] = #include #include -#include #include @@ -1794,8 +1793,6 @@ static int smc911x_probe(struct net_device *dev) unsigned long irq_flags; #ifdef SMC_USE_DMA struct dma_slave_config config; - dma_cap_mask_t mask; - struct pxad_param param; #endif DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); @@ -1969,17 +1966,8 @@ static int smc911x_probe(struct net_device *dev) #ifdef SMC_USE_DMA - dma_cap_zero(mask); - dma_cap_set(DMA_SLAVE, mask); - param.prio = PXAD_PRIO_LOWEST; - param.drcmr = -1UL; - - lp->rxdma = - dma_request_slave_channel_compat(mask, pxad_filter_fn, -¶m, &dev->dev, "rx"); - lp->txdma = - dma_request_slave_channel_compat(mask, pxad_filter_fn, -¶m, &dev->dev, "tx"); + lp->rxdma = dma_request_slave_channel(&dev->dev, "rx"); + lp->txdma = dma_request_slave_channel(&dev->dev, "tx"); lp->rxdma_active = 0; lp->txdma_active = 0; -- 2.11.0
[PATCH 04/15] media: pxa_camera: remove the dmaengine compat need
From: Robert Jarzmik As the pxa architecture switched towards the dmaengine slave map, the old compatibility mechanism to acquire the dma requestor line number and priority are not needed anymore. This patch simplifies the dma resource acquisition, using the more generic function dma_request_slave_channel(). Signed-off-by: Robert Jarzmik --- drivers/media/platform/pxa_camera.c | 22 +++--- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/drivers/media/platform/pxa_camera.c b/drivers/media/platform/pxa_camera.c index c71a00736541..4c82d1880753 100644 --- a/drivers/media/platform/pxa_camera.c +++ b/drivers/media/platform/pxa_camera.c @@ -2357,8 +2357,6 @@ static int pxa_camera_probe(struct platform_device *pdev) .src_maxburst = 8, .direction = DMA_DEV_TO_MEM, }; - dma_cap_mask_t mask; - struct pxad_param params; char clk_name[V4L2_CLK_NAME_SIZE]; int irq; int err = 0, i; @@ -2432,34 +2430,20 @@ static int pxa_camera_probe(struct platform_device *pdev) pcdev->base = base; /* request dma */ - dma_cap_zero(mask); - dma_cap_set(DMA_SLAVE, mask); - dma_cap_set(DMA_PRIVATE, mask); - - params.prio = 0; - params.drcmr = 68; - pcdev->dma_chans[0] = - dma_request_slave_channel_compat(mask, pxad_filter_fn, -¶ms, &pdev->dev, "CI_Y"); + pcdev->dma_chans[0] = dma_request_slave_channel(&pdev->dev, "CI_Y"); if (!pcdev->dma_chans[0]) { dev_err(&pdev->dev, "Can't request DMA for Y\n"); return -ENODEV; } - params.drcmr = 69; - pcdev->dma_chans[1] = - dma_request_slave_channel_compat(mask, pxad_filter_fn, -¶ms, &pdev->dev, "CI_U"); + pcdev->dma_chans[1] = dma_request_slave_channel(&pdev->dev, "CI_U"); if (!pcdev->dma_chans[1]) { dev_err(&pdev->dev, "Can't request DMA for Y\n"); err = -ENODEV; goto exit_free_dma_y; } - params.drcmr = 70; - pcdev->dma_chans[2] = - dma_request_slave_channel_compat(mask, pxad_filter_fn, -¶ms, &pdev->dev, "CI_V"); + pcdev->dma_chans[2] = dma_request_slave_channel(&pdev->dev, "CI_V"); if (!pcdev->dma_chans[2]) { dev_err(&pdev->dev, "Can't request DMA for V\n"); err = -ENODEV; -- 2.11.0
Re: [PATCH net-next V2 0/4] Introduce adaptive TX interrupt moderation to net DIM
From: Tal Gilboa Date: Mon, 2 Apr 2018 16:59:30 +0300 > Net DIM is a library designed for dynamic interrupt moderation. It was > implemented and optimized with receive side interrupts in mind, since these > are usually the CPU expensive ones. This patch-set introduces adaptive > transmit > interrupt moderation to net DIM, complete with a usage in the mlx5e driver. > Using adaptive TX behavior would reduce interrupt rate for multiple scenarios. > Furthermore, it is essential for increasing bandwidth on cases where payload > aggregation is required. > > v2: Rebased over proper tree. > > v1: Fix compilation issues due to missed function renaming. This series still needs fixes, and the net-next tree has closed meanwhile. And to be honest, handling this series has been very painful for me so far. The patches either didn't apply or didn't even compile. Please do not resubmit this until the merge window is over and the net-next tree opens up again. Thank you.
[PATCH 03/15] mmc: pxamci: remove the dmaengine compat need
As the pxa architecture switched towards the dmaengine slave map, the old compatibility mechanism to acquire the dma requestor line number and priority are not needed anymore. This patch simplifies the dma resource acquisition, using the more generic function dma_request_slave_channel(). Signed-off-by: Robert Jarzmik --- drivers/mmc/host/pxamci.c | 29 +++-- 1 file changed, 3 insertions(+), 26 deletions(-) diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index c763b404510f..6c94474e36f4 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -637,10 +636,8 @@ static int pxamci_probe(struct platform_device *pdev) { struct mmc_host *mmc; struct pxamci_host *host = NULL; - struct resource *r, *dmarx, *dmatx; - struct pxad_param param_rx, param_tx; + struct resource *r; int ret, irq, gpio_cd = -1, gpio_ro = -1, gpio_power = -1; - dma_cap_mask_t mask; ret = pxamci_of_init(pdev); if (ret) @@ -739,34 +736,14 @@ static int pxamci_probe(struct platform_device *pdev) platform_set_drvdata(pdev, mmc); - if (!pdev->dev.of_node) { - dmarx = platform_get_resource(pdev, IORESOURCE_DMA, 0); - dmatx = platform_get_resource(pdev, IORESOURCE_DMA, 1); - if (!dmarx || !dmatx) { - ret = -ENXIO; - goto out; - } - param_rx.prio = PXAD_PRIO_LOWEST; - param_rx.drcmr = dmarx->start; - param_tx.prio = PXAD_PRIO_LOWEST; - param_tx.drcmr = dmatx->start; - } - - dma_cap_zero(mask); - dma_cap_set(DMA_SLAVE, mask); - - host->dma_chan_rx = - dma_request_slave_channel_compat(mask, pxad_filter_fn, -¶m_rx, &pdev->dev, "rx"); + host->dma_chan_rx = dma_request_slave_channel(&pdev->dev, "rx"); if (host->dma_chan_rx == NULL) { dev_err(&pdev->dev, "unable to request rx dma channel\n"); ret = -ENODEV; goto out; } - host->dma_chan_tx = - dma_request_slave_channel_compat(mask, pxad_filter_fn, -¶m_tx, &pdev->dev, "tx"); + host->dma_chan_tx = dma_request_slave_channel(&pdev->dev, "tx"); if (host->dma_chan_tx == NULL) { dev_err(&pdev->dev, "unable to request tx dma channel\n"); ret = -ENODEV; -- 2.11.0
[PATCH 01/15] dmaengine: pxa: use a dma slave map
In order to remove the specific knowledge of the dma mapping from PXA drivers, add a default slave map for pxa architectures. This won't impact MMP architecture, but is aimed only at all PXA boards. This is the first step, and once all drivers are converted, pxad_filter_fn() will be made static, and the DMA resources removed from device.c. Signed-off-by: Robert Jarzmik Reported-by: Arnd Bergmann --- drivers/dma/pxa_dma.c | 10 +- include/linux/platform_data/mmp_dma.h | 4 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index b53fb618bbf6..9505334f9c6e 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -179,6 +179,8 @@ static unsigned int pxad_drcmr(unsigned int line) return 0x1000 + line * 4; } +bool pxad_filter_fn(struct dma_chan *chan, void *param); + /* * Debug fs */ @@ -1396,9 +1398,10 @@ static int pxad_probe(struct platform_device *op) { struct pxad_device *pdev; const struct of_device_id *of_id; + const struct dma_slave_map *slave_map = NULL; struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev); struct resource *iores; - int ret, dma_channels = 0, nb_requestors = 0; + int ret, dma_channels = 0, nb_requestors = 0, slave_map_cnt = 0; const enum dma_slave_buswidth widths = DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES | DMA_SLAVE_BUSWIDTH_4_BYTES; @@ -1429,6 +1432,8 @@ static int pxad_probe(struct platform_device *op) } else if (pdata && pdata->dma_channels) { dma_channels = pdata->dma_channels; nb_requestors = pdata->nb_requestors; + slave_map = pdata->slave_map; + slave_map_cnt = pdata->slave_map_cnt; } else { dma_channels = 32; /* default 32 channel */ } @@ -1440,6 +1445,9 @@ static int pxad_probe(struct platform_device *op) pdev->slave.device_prep_dma_memcpy = pxad_prep_memcpy; pdev->slave.device_prep_slave_sg = pxad_prep_slave_sg; pdev->slave.device_prep_dma_cyclic = pxad_prep_dma_cyclic; + pdev->slave.filter.map = slave_map; + pdev->slave.filter.mapcnt = slave_map_cnt; + pdev->slave.filter.fn = pxad_filter_fn; pdev->slave.copy_align = PDMA_ALIGNMENT; pdev->slave.src_addr_widths = widths; diff --git a/include/linux/platform_data/mmp_dma.h b/include/linux/platform_data/mmp_dma.h index d1397c8ed94e..6397b9c8149a 100644 --- a/include/linux/platform_data/mmp_dma.h +++ b/include/linux/platform_data/mmp_dma.h @@ -12,9 +12,13 @@ #ifndef MMP_DMA_H #define MMP_DMA_H +struct dma_slave_map; + struct mmp_dma_platdata { int dma_channels; int nb_requestors; + int slave_map_cnt; + const struct dma_slave_map *slave_map; }; #endif /* MMP_DMA_H */ -- 2.11.0
[PATCH 00/15] ARM: pxa: switch to DMA slave maps
Hi, This serie is aimed at removing the dmaengine slave compat use, and transfer knowledge of the DMA requestors into architecture code. This was discussed/advised by Arnd a couple of years back, it's almost time. The serie is divided in 3 phasees : - phase 1 : patch 1/15 and patch 2/15 => this is the preparation work - phase 2 : patches 3/15 .. 10/15 => this is the switch of all the drivers => this one will require either an Ack of the maintainers or be taken by them once phase 1 is merged - phase 3 : patches 11/15 => this is the last part, cleanup and removal of export of the DMA filter function As this looks like a patch bomb, each maintainer expressing for his tree either an Ack or "I want to take through my tree" will be spared in the next iterations of this serie. Several of these changes have been tested on actual hardware, including : - pxamci - pxa_camera - smc* - ASoC and SSP Happy review. Robert Jarzmik (15): dmaengine: pxa: use a dma slave map ARM: pxa: add dma slave map mmc: pxamci: remove the dmaengine compat need media: pxa_camera: remove the dmaengine compat need mtd: nand: pxa3xx: remove the dmaengine compat need net: smc911x: remove the dmaengine compat need net: smc91x: remove the dmaengine compat need ASoC: pxa: remove the dmaengine compat need net: irda: pxaficp_ir: remove the dmaengine compat need ata: pata_pxa: remove the dmaengine compat need dmaengine: pxa: document pxad_param dmaengine: pxa: make the filter function internal ARM: pxa: remove the DMA IO resources ARM: pxa: change SSP devices allocation ARM: pxa: change SSP DMA channels allocation arch/arm/mach-pxa/devices.c | 269 ++ arch/arm/mach-pxa/devices.h | 14 +- arch/arm/mach-pxa/include/mach/audio.h| 12 ++ arch/arm/mach-pxa/pxa25x.c| 4 +- arch/arm/mach-pxa/pxa27x.c| 4 +- arch/arm/mach-pxa/pxa3xx.c| 5 +- arch/arm/plat-pxa/ssp.c | 50 +- drivers/ata/pata_pxa.c| 10 +- drivers/dma/pxa_dma.c | 13 +- drivers/media/platform/pxa_camera.c | 22 +-- drivers/mmc/host/pxamci.c | 29 +--- drivers/mtd/nand/pxa3xx_nand.c| 10 +- drivers/net/ethernet/smsc/smc911x.c | 16 +- drivers/net/ethernet/smsc/smc91x.c| 12 +- drivers/net/ethernet/smsc/smc91x.h| 1 - drivers/staging/irda/drivers/pxaficp_ir.c | 14 +- include/linux/dma/pxa-dma.h | 20 +-- include/linux/platform_data/mmp_dma.h | 4 + include/linux/pxa2xx_ssp.h| 4 +- sound/arm/pxa2xx-ac97.c | 14 +- sound/arm/pxa2xx-pcm-lib.c| 6 +- sound/soc/pxa/pxa-ssp.c | 5 +- sound/soc/pxa/pxa2xx-ac97.c | 32 +--- 23 files changed, 196 insertions(+), 374 deletions(-) -- 2.11.0
Re: [PATCH v5 03/14] PCI: Add pcie_bandwidth_capable() to compute max supported link bandwidth
On 4/2/2018 5:05 PM, Bjorn Helgaas wrote: On Mon, Apr 02, 2018 at 10:34:58AM +0300, Tal Gilboa wrote: On 4/2/2018 3:40 AM, Bjorn Helgaas wrote: On Sun, Apr 01, 2018 at 11:38:53PM +0300, Tal Gilboa wrote: On 3/31/2018 12:05 AM, Bjorn Helgaas wrote: From: Tal Gilboa Add pcie_bandwidth_capable() to compute the max link bandwidth supported by a device, based on the max link speed and width, adjusted by the encoding overhead. The maximum bandwidth of the link is computed as: max_link_speed * max_link_width * (1 - encoding_overhead) The encoding overhead is about 20% for 2.5 and 5.0 GT/s links using 8b/10b encoding, and about 1.5% for 8 GT/s or higher speed links using 128b/130b encoding. Signed-off-by: Tal Gilboa [bhelgaas: adjust for pcie_get_speed_cap() and pcie_get_width_cap() signatures, don't export outside drivers/pci] Signed-off-by: Bjorn Helgaas Reviewed-by: Tariq Toukan --- drivers/pci/pci.c | 21 + drivers/pci/pci.h |9 + 2 files changed, 30 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 43075be79388..9ce89e254197 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5208,6 +5208,27 @@ enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev) return PCIE_LNK_WIDTH_UNKNOWN; } +/** + * pcie_bandwidth_capable - calculates a PCI device's link bandwidth capability + * @dev: PCI device + * @speed: storage for link speed + * @width: storage for link width + * + * Calculate a PCI device's link bandwidth by querying for its link speed + * and width, multiplying them, and applying encoding overhead. + */ +u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + *speed = pcie_get_speed_cap(dev); + *width = pcie_get_width_cap(dev); + + if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) + return 0; + + return *width * PCIE_SPEED2MBS_ENC(*speed); +} + /** * pci_select_bars - Make BAR mask from the type of resource * @dev: the PCI device for which BAR mask is made diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 66738f1050c0..2a50172b9803 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -261,8 +261,17 @@ void pci_disable_bridge_window(struct pci_dev *dev); (speed) == PCIE_SPEED_2_5GT ? "2.5 GT/s" : \ "Unknown speed") +/* PCIe speed to Mb/s with encoding overhead: 20% for gen2, ~1.5% for gen3 */ +#define PCIE_SPEED2MBS_ENC(speed) \ Missing gen4. I made it "gen3+". I think that's accurate, isn't it? The spec doesn't seem to actually use "gen3" as a specific term, but sec 4.2.2 says rates of 8 GT/s or higher (which I think includes gen3 and gen4) use 128b/130b encoding. I meant that PCIE_SPEED_16_0GT will return 0 from this macro since it wasn't added. Need to return 15754. Oh, duh, of course! Sorry for being dense. What about the following? I included the calculation as opposed to just the magic numbers to try to make it clear how they're derived. This has the disadvantage of truncating the result instead of rounding, but I doubt that's significant in this context. If it is, we could use the magic numbers and put the computation in a comment. We can always use DIV_ROUND_UP((speed * enc_nominator), enc_denominator). I think this is confusing and since this introduces a bandwidth limit I would prefer to give a wider limit than a wrong one, even it is by less than 1Mb/s. My vote is for leaving it as you wrote below. Another question: we currently deal in Mb/s, not MB/s. Mb/s has the advantage of sort of corresponding to the GT/s numbers, but using MB/s would have the advantage of smaller numbers that match the table here: https://en.wikipedia.org/wiki/PCI_Express#History_and_revisions, but I don't know what's most typical in user-facing situations. What's better? I don't know what's better but for network devices we measure bandwidth in Gb/s, so presenting bandwidth in MB/s would mean additional calculations. The truth is I would have prefer to use Gb/s instead of Mb/s, but again, don't want to loss up to 1Gb/s. commit 946435491b35b7782157e9a4d1bd73071fba7709 Author: Tal Gilboa Date: Fri Mar 30 08:32:03 2018 -0500 PCI: Add pcie_bandwidth_capable() to compute max supported link bandwidth Add pcie_bandwidth_capable() to compute the max link bandwidth supported by a device, based on the max link speed and width, adjusted by the encoding overhead. The maximum bandwidth of the link is computed as: max_link_width * max_link_speed * (1 - encoding_overhead) 2.5 and 5.0 GT/s links use 8b/10b encoding, which reduces the raw bandwidth available by 20%; 8.0 GT/s and faster links use 128b/130b encoding, which reduces it by about 1.5%. The result is in Mb/s, i.e., megabits/second, of raw bandwidth. Signed-off-by: T
[PATCH 02/15] ARM: pxa: add dma slave map
In order to remove the specific knowledge of the dma mapping from PXA drivers, add a default slave map for pxa architectures. This is the first step, and once all drivers are converted, pxad_filter_fn() will be made static, and the DMA resources removed from device.c. Signed-off-by: Robert Jarzmik Reported-by: Arnd Bergmann --- arch/arm/mach-pxa/devices.c | 55 + 1 file changed, 55 insertions(+) diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c index d7c9a8476d57..da67ebe9a7d5 100644 --- a/arch/arm/mach-pxa/devices.c +++ b/arch/arm/mach-pxa/devices.c @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -1202,9 +1204,62 @@ void __init pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_master *info) platform_device_add(pd); } +#define PDMA_FILTER_PARAM(_prio, _requestor) (&(struct pxad_param) { \ + .prio = PXAD_PRIO_##_prio, .drcmr = _requestor }) + +static const struct dma_slave_map pxa_slave_map[] = { + /* PXA25x, PXA27x and PXA3xx common entries */ + { "pxa-pcm-audio", "ac97_mic_mono", PDMA_FILTER_PARAM(LOWEST, 8) }, + { "pxa-pcm-audio", "ac97_aux_mono_in", PDMA_FILTER_PARAM(LOWEST, 9) }, + { "pxa-pcm-audio", "ac97_aux_mono_out", PDMA_FILTER_PARAM(LOWEST, 10) }, + { "pxa-pcm-audio", "ac97_stereo_in", PDMA_FILTER_PARAM(LOWEST, 11) }, + { "pxa-pcm-audio", "ac97_stereo_out", PDMA_FILTER_PARAM(LOWEST, 12) }, + { "pxa-pcm-audio", "ssp1_rx", PDMA_FILTER_PARAM(LOWEST, 13) }, + { "pxa-pcm-audio", "ssp1_tx", PDMA_FILTER_PARAM(LOWEST, 14) }, + { "pxa-pcm-audio", "ssp2_rx", PDMA_FILTER_PARAM(LOWEST, 15) }, + { "pxa-pcm-audio", "ssp2_tx", PDMA_FILTER_PARAM(LOWEST, 16) }, + { "pxa2xx-ir", "rx", PDMA_FILTER_PARAM(LOWEST, 17) }, + { "pxa2xx-ir", "tx", PDMA_FILTER_PARAM(LOWEST, 18) }, + { "pxa2xx-mci.0", "rx", PDMA_FILTER_PARAM(LOWEST, 21) }, + { "pxa2xx-mci.0", "tx", PDMA_FILTER_PARAM(LOWEST, 22) }, + { "smc911x.0", "rx", PDMA_FILTER_PARAM(LOWEST, -1) }, + { "smc911x.0", "tx", PDMA_FILTER_PARAM(LOWEST, -1) }, + { "smc91x.0", "data", PDMA_FILTER_PARAM(LOWEST, -1) }, + + /* PXA25x specific map */ + { "pxa25x-ssp.0", "rx", PDMA_FILTER_PARAM(LOWEST, 13) }, + { "pxa25x-ssp.0", "tx", PDMA_FILTER_PARAM(LOWEST, 14) }, + { "pxa25x-nssp.1", "rx", PDMA_FILTER_PARAM(LOWEST, 15) }, + { "pxa25x-nssp.1", "tx", PDMA_FILTER_PARAM(LOWEST, 16) }, + { "pxa25x-nssp.2", "rx", PDMA_FILTER_PARAM(LOWEST, 23) }, + { "pxa25x-nssp.2", "tx", PDMA_FILTER_PARAM(LOWEST, 24) }, + { "pxa-pcm-audio", "nssp2_rx", PDMA_FILTER_PARAM(LOWEST, 15) }, + { "pxa-pcm-audio", "nssp2_tx", PDMA_FILTER_PARAM(LOWEST, 16) }, + { "pxa-pcm-audio", "nssp3_rx", PDMA_FILTER_PARAM(LOWEST, 23) }, + { "pxa-pcm-audio", "nssp3_tx", PDMA_FILTER_PARAM(LOWEST, 24) }, + + /* PXA27x specific map */ + { "pxa-pcm-audio", "ssp3_rx", PDMA_FILTER_PARAM(LOWEST, 66) }, + { "pxa-pcm-audio", "ssp3_tx", PDMA_FILTER_PARAM(LOWEST, 67) }, + { "pxa27x-camera.0", "CI_Y", PDMA_FILTER_PARAM(HIGHEST, 68) }, + { "pxa27x-camera.0", "CI_U", PDMA_FILTER_PARAM(HIGHEST, 69) }, + { "pxa27x-camera.0", "CI_V", PDMA_FILTER_PARAM(HIGHEST, 70) }, + + /* PXA3xx specific map */ + { "pxa-pcm-audio", "ssp4_rx", PDMA_FILTER_PARAM(LOWEST, 2) }, + { "pxa-pcm-audio", "ssp4_tx", PDMA_FILTER_PARAM(LOWEST, 3) }, + { "pxa2xx-mci.1", "rx", PDMA_FILTER_PARAM(LOWEST, 93) }, + { "pxa2xx-mci.1", "tx", PDMA_FILTER_PARAM(LOWEST, 94) }, + { "pxa3xx-nand", "data", PDMA_FILTER_PARAM(LOWEST, 97) }, + { "pxa2xx-mci.2", "rx", PDMA_FILTER_PARAM(LOWEST, 100) }, + { "pxa2xx-mci.2", "tx", PDMA_FILTER_PARAM(LOWEST, 101) }, +}; + static struct mmp_dma_platdata pxa_dma_pdata = { .dma_channels = 0, .nb_requestors = 0, + .slave_map = pxa_slave_map, + .slave_map_cnt = ARRAY_SIZE(pxa_slave_map), }; static struct resource pxa_dma_resource[] = { -- 2.11.0
[PATCH 05/15] mtd: nand: pxa3xx: remove the dmaengine compat need
From: Robert Jarzmik As the pxa architecture switched towards the dmaengine slave map, the old compatibility mechanism to acquire the dma requestor line number and priority are not needed anymore. This patch simplifies the dma resource acquisition, using the more generic function dma_request_slave_channel(). Signed-off-by: Robert Jarzmik --- drivers/mtd/nand/pxa3xx_nand.c | 10 +- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index d1979c7dbe7e..4a56a0aef5b1 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -1518,8 +1518,6 @@ static int pxa3xx_nand_init_buff(struct pxa3xx_nand_info *info) { struct platform_device *pdev = info->pdev; struct dma_slave_config config; - dma_cap_mask_t mask; - struct pxad_param param; int ret; info->data_buff = kmalloc(info->buf_size, GFP_KERNEL); @@ -1533,13 +1531,7 @@ static int pxa3xx_nand_init_buff(struct pxa3xx_nand_info *info) return ret; sg_init_one(&info->sg, info->data_buff, info->buf_size); - dma_cap_zero(mask); - dma_cap_set(DMA_SLAVE, mask); - param.prio = PXAD_PRIO_LOWEST; - param.drcmr = info->drcmr_dat; - info->dma_chan = dma_request_slave_channel_compat(mask, pxad_filter_fn, - ¶m, &pdev->dev, - "data"); + info->dma_chan = dma_request_slave_channel(&pdev->dev, "data"); if (!info->dma_chan) { dev_err(&pdev->dev, "unable to request data dma channel\n"); return -ENODEV; -- 2.11.0
[net-next PATCH v3 06/11] net: netcp: ethss: use rgmii link status for 2u cpsw hardware
Introduce rgmii link status to handle link state events for 2u cpsw hardware on K2G. Signed-off-by: Murali Karicheri --- drivers/net/ethernet/ti/netcp_ethss.c | 34 +++--- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 06bbb65..ab6c918 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -169,6 +169,11 @@ #defineGBE_RXHOOK_ORDER0 #define GBE_DEFAULT_ALE_AGEOUT 30 #define SLAVE_LINK_IS_XGMII(s) ((s)->link_interface >= XGMII_LINK_MAC_PHY) +#define SLAVE_LINK_IS_RGMII(s) \ + (((s)->link_interface >= RGMII_LINK_MAC_PHY) && \ +((s)->link_interface <= RGMII_LINK_MAC_PHY_NO_MDIO)) +#define SLAVE_LINK_IS_SGMII(s) \ + ((s)->link_interface <= SGMII_LINK_MAC_PHY_NO_MDIO) #define NETCP_LINK_STATE_INVALID -1 #define GBE_SET_REG_OFS(p, rb, rn) p->rb##_ofs.rn = \ @@ -552,6 +557,7 @@ struct gbe_ss_regs { struct gbe_ss_regs_ofs { u16 id_ver; u16 control; + u16 rgmii_status; /* 2U */ }; struct gbe_switch_regs { @@ -2120,23 +2126,35 @@ static bool gbe_phy_link_status(struct gbe_slave *slave) return !slave->phy || slave->phy->link; } +#define RGMII_REG_STATUS_LINK BIT(0) + +static void netcp_2u_rgmii_get_port_link(struct gbe_priv *gbe_dev, bool *status) +{ + u32 val = 0; + + val = readl(GBE_REG_ADDR(gbe_dev, ss_regs, rgmii_status)); + *status = !!(val & RGMII_REG_STATUS_LINK); +} + static void netcp_ethss_update_link_state(struct gbe_priv *gbe_dev, struct gbe_slave *slave, struct net_device *ndev) { - int sp = slave->slave_num; - int phy_link_state, sgmii_link_state = 1, link_state; + bool sw_link_state = true, phy_link_state; + int sp = slave->slave_num, link_state; if (!slave->open) return; - if (!SLAVE_LINK_IS_XGMII(slave)) { - sgmii_link_state = - netcp_sgmii_get_port_link(SGMII_BASE(gbe_dev, sp), sp); - } + if (SLAVE_LINK_IS_RGMII(slave)) + netcp_2u_rgmii_get_port_link(gbe_dev, +&sw_link_state); + if (SLAVE_LINK_IS_SGMII(slave)) + sw_link_state = + netcp_sgmii_get_port_link(SGMII_BASE(gbe_dev, sp), sp); phy_link_state = gbe_phy_link_status(slave); - link_state = phy_link_state & sgmii_link_state; + link_state = phy_link_state & sw_link_state; if (atomic_xchg(&slave->link_state, link_state) != link_state) netcp_ethss_link_state_action(gbe_dev, ndev, slave, @@ -3431,6 +3449,8 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev, /* Subsystem registers */ GBENU_SET_REG_OFS(gbe_dev, ss_regs, id_ver); + /* ok to set for MU, but used by 2U only */ + GBENU_SET_REG_OFS(gbe_dev, ss_regs, rgmii_status); /* Switch module registers */ GBENU_SET_REG_OFS(gbe_dev, switch_regs, id_ver); -- 1.9.1
[net-next PATCH v3 11/11] net: netcp: support probe deferral
The netcp driver shouldn't proceed until the knav qmss and dma devices are ready. So return -EPROBE_DEFER if these devices are not ready. Signed-off-by: Murali Karicheri --- drivers/net/ethernet/ti/netcp_core.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 9c51b25..736f6f7 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -2158,6 +2158,10 @@ static int netcp_probe(struct platform_device *pdev) struct netcp_module *module; int ret; + if (!knav_dma_device_ready() || + !knav_qmss_device_ready()) + return -EPROBE_DEFER; + if (!node) { dev_err(dev, "could not find device info\n"); return -ENODEV; -- 1.9.1
[net-next PATCH v3 04/11] net: netcp: ethss: make call to gbe_sgmii_config() conditional
As a preparatory patch to add support for 2u cpsw hardware found on K2G SoC, make call to gbe_sgmii_config() conditional. This is required since 2u uses RGMII interface instead of SGMII and to allow for driver re-use. Signed-off-by: Murali Karicheri --- drivers/net/ethernet/ti/netcp_ethss.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 1b79fe5..1628fbe 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -2274,7 +2274,8 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf) void (*hndlr)(struct net_device *) = gbe_adjust_link; - gbe_sgmii_config(priv, slave); + if (IS_SS_ID_VER_14(priv) || IS_SS_ID_NU(priv)) + gbe_sgmii_config(priv, slave); gbe_port_reset(slave); gbe_sgmii_rtreset(priv, slave, false); gbe_port_config(priv, slave, priv->rx_packet_max); @@ -3042,7 +3043,8 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev, continue; } - gbe_sgmii_config(gbe_dev, slave); + if (IS_SS_ID_VER_14(gbe_dev) || IS_SS_ID_NU(gbe_dev)) + gbe_sgmii_config(gbe_dev, slave); gbe_port_reset(slave); gbe_port_config(gbe_dev, slave, gbe_dev->rx_packet_max); list_add_tail(&slave->slave_list, &gbe_dev->secondary_slaves); -- 1.9.1
[net-next PATCH v3 10/11] Revert "net: netcp: remove dead code from the driver"
As the probe sequence is not guaranteed contrary to the assumption of the commit 2d8e276a9030, same has to be reverted. commit 2d8e276a9030 ("net: netcp: remove dead code from the driver") Signed-off-by: Murali Karicheri --- drivers/net/ethernet/ti/netcp_core.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index f5a7eb2..9c51b25 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -2155,6 +2155,7 @@ static int netcp_probe(struct platform_device *pdev) struct device_node *child, *interfaces; struct netcp_device *netcp_device; struct device *dev = &pdev->dev; + struct netcp_module *module; int ret; if (!node) { @@ -2203,6 +2204,14 @@ static int netcp_probe(struct platform_device *pdev) /* Add the device instance to the list */ list_add_tail(&netcp_device->device_list, &netcp_devices); + /* Probe & attach any modules already registered */ + mutex_lock(&netcp_modules_lock); + for_each_netcp_module(module) { + ret = netcp_module_probe(netcp_device, module); + if (ret < 0) + dev_err(dev, "module(%s) probe failed\n", module->name); + } + mutex_unlock(&netcp_modules_lock); return 0; probe_quit_interface: -- 1.9.1
[net-next PATCH v3 02/11] soc: ti: K2G: provide APIs to support driver probe deferral
This patch provide APIs to allow client drivers to support probe deferral. On K2G SoC, devices can be probed only after the ti_sci_pm_domains driver is probed and ready. As drivers may get probed at different order, any driver that depends on knav dma and qmss drivers, for example netcp network driver, needs to defer probe until knav devices are probed and ready to service. To do this, add an API to query the device ready status from the knav dma and qmss devices. Signed-off-by: Murali Karicheri --- drivers/soc/ti/knav_dma.c| 8 drivers/soc/ti/knav_qmss_queue.c | 8 include/linux/soc/ti/knav_dma.h | 12 include/linux/soc/ti/knav_qmss.h | 1 + 4 files changed, 29 insertions(+) diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c index 026182d..224d7dd 100644 --- a/drivers/soc/ti/knav_dma.c +++ b/drivers/soc/ti/knav_dma.c @@ -134,6 +134,13 @@ struct knav_dma_chan { static struct knav_dma_pool_device *kdev; +static bool device_ready; +bool knav_dma_device_ready(void) +{ + return device_ready; +} +EXPORT_SYMBOL_GPL(knav_dma_device_ready); + static bool check_config(struct knav_dma_chan *chan, struct knav_dma_cfg *cfg) { if (!memcmp(&chan->cfg, cfg, sizeof(*cfg))) @@ -773,6 +780,7 @@ static int knav_dma_probe(struct platform_device *pdev) debugfs_create_file("knav_dma", S_IFREG | S_IRUGO, NULL, NULL, &knav_dma_debug_ops); + device_ready = true; return ret; } diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c index 8526c8e..419365a 100644 --- a/drivers/soc/ti/knav_qmss_queue.c +++ b/drivers/soc/ti/knav_qmss_queue.c @@ -74,6 +74,13 @@ */ const char *knav_acc_firmwares[] = {"ks2_qmss_pdsp_acc48.bin"}; +static bool device_ready; +bool knav_qmss_device_ready(void) +{ + return device_ready; +} +EXPORT_SYMBOL_GPL(knav_qmss_device_ready); + /** * knav_queue_notify: qmss queue notfier call * @@ -1849,6 +1856,7 @@ static int knav_queue_probe(struct platform_device *pdev) debugfs_create_file("qmss", S_IFREG | S_IRUGO, NULL, NULL, &knav_queue_debug_ops); + device_ready = true; return 0; err: diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h index 66693bc..7127ec3 100644 --- a/include/linux/soc/ti/knav_dma.h +++ b/include/linux/soc/ti/knav_dma.h @@ -167,6 +167,8 @@ struct knav_dma_desc { void *knav_dma_open_channel(struct device *dev, const char *name, struct knav_dma_cfg *config); void knav_dma_close_channel(void *channel); +int knav_dma_get_flow(void *channel); +bool knav_dma_device_ready(void); #else static inline void *knav_dma_open_channel(struct device *dev, const char *name, struct knav_dma_cfg *config) @@ -176,6 +178,16 @@ static inline void *knav_dma_open_channel(struct device *dev, const char *name, static inline void knav_dma_close_channel(void *channel) {} +static inline int knav_dma_get_flow(void *channel) +{ + return -EINVAL; +} + +static inline bool knav_dma_device_ready(void) +{ + return false; +} + #endif #endif /* __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__ */ diff --git a/include/linux/soc/ti/knav_qmss.h b/include/linux/soc/ti/knav_qmss.h index 9f0ebb3b..9745df6 100644 --- a/include/linux/soc/ti/knav_qmss.h +++ b/include/linux/soc/ti/knav_qmss.h @@ -86,5 +86,6 @@ int knav_pool_desc_map(void *ph, void *desc, unsigned size, void *knav_pool_desc_unmap(void *ph, dma_addr_t dma, unsigned dma_sz); dma_addr_t knav_pool_desc_virt_to_dma(void *ph, void *virt); void *knav_pool_desc_dma_to_virt(void *ph, dma_addr_t dma); +bool knav_qmss_device_ready(void); #endif /* __SOC_TI_KNAV_QMSS_H__ */ -- 1.9.1
[net-next PATCH v3 09/11] net: netcp: ethss: use of_get_phy_mode() to support different RGMII modes
The phy used for K2G allows for internal delays to be added optionally to the clock circuitry based on board desing. To add this support, enhance the driver to use of_get_phy_mode() to read the phy-mode from the phy device and pass the same to phy through of_phy_connect(). Signed-off-by: Murali Karicheri --- drivers/net/ethernet/ti/netcp_ethss.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 7b3b373..f7af999 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -705,6 +706,7 @@ struct gbe_slave { u32 link_interface; u32 mac_control; u8 phy_port_t; + struct device_node *node; struct device_node *phy_node; struct ts_ctl ts_ctl; struct list_headslave_list; @@ -2318,6 +2320,21 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf) has_phy = true; phy_mode = PHY_INTERFACE_MODE_SGMII; slave->phy_port_t = PORT_MII; + } else if (slave->link_interface == RGMII_LINK_MAC_PHY) { + has_phy = true; + phy_mode = of_get_phy_mode(slave->node); + /* if phy-mode is not present, default to +* PHY_INTERFACE_MODE_RGMII +*/ + if (phy_mode < 0) + phy_mode = PHY_INTERFACE_MODE_RGMII; + + if (!phy_interface_mode_is_rgmii(phy_mode)) { + dev_err(priv->dev, + "Unsupported phy mode %d\n", phy_mode); + return -EINVAL; + } + slave->phy_port_t = PORT_MII; } else if (slave->link_interface == XGMII_LINK_MAC_PHY) { has_phy = true; phy_mode = PHY_INTERFACE_MODE_NA; @@ -2943,6 +2960,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, slave->link_interface = SGMII_LINK_MAC_PHY; } + slave->node = node; slave->open = false; if ((slave->link_interface == SGMII_LINK_MAC_PHY) || (slave->link_interface == RGMII_LINK_MAC_PHY) || -- 1.9.1
[net-next PATCH v3 00/11] Add support for netcp driver on K2G SoC
K2G SoC is another variant of Keystone family of SoCs. This patch series add support for NetCP driver on this SoC. The QMSS found on K2G SoC is a cut down version of the QMSS found on other keystone devices with less number of queues, internal link ram etc. The patch series has 2 patch sets that goes into the drivers/soc and the rest has to be applied to net sub system. Please review and merge if this looks good. K2G TRM is located at http://www.ti.com/lit/ug/spruhy8g/spruhy8g.pdf Thanks The boot logs on K2G ICE board (tftp boot over Ethernet) is at https://pastebin.ubuntu.com/p/VQTv3c2XBS/ The boot logs on K2G GP board (tftp boot over Ethernet) is at https://pastebin.ubuntu.com/p/6Vh55DW8vT/ Also regressed boot on K2HK and K2L EVMs as we have modified GBE version detection logic. This series applies to net-next master branch. Change history: v3 - Addressed comments from Andrew Lunn and Grygorii Strashko against v2. v2 - Addressed following comments on initial version - split patch 3/5 to multiple patches from Andrew Lunn Murali Karicheri (11): soc: ti: K2G: enhancement to support QMSS in K2G NAVSS soc: ti: K2G: provide APIs to support driver probe deferral net: netcp: ethss: use macro for checking ss_version consistently net: netcp: ethss: make call to gbe_sgmii_config() conditional net: netcp: ethss: add support for handling sgmii link interface net: netcp: ethss: use rgmii link status for 2u cpsw hardware net: netcp: ethss: map vlan priorities to zero flow net: netcp: ethss: re-use stats handling code for 2u hardware net: netcp: ethss: use of_get_phy_mode() to support different RGMII modes Revert "net: netcp: remove dead code from the driver" net: netcp: support probe deferral .../bindings/soc/ti/keystone-navigator-qmss.txt| 9 +- drivers/net/ethernet/ti/netcp.h| 2 + drivers/net/ethernet/ti/netcp_core.c | 13 +++ drivers/net/ethernet/ti/netcp_ethss.c | 113 - drivers/soc/ti/knav_dma.c | 8 ++ drivers/soc/ti/knav_qmss.h | 6 ++ drivers/soc/ti/knav_qmss_queue.c | 98 ++ include/linux/soc/ti/knav_dma.h| 12 +++ include/linux/soc/ti/knav_qmss.h | 1 + 9 files changed, 212 insertions(+), 50 deletions(-) -- 1.9.1
[net-next PATCH v3 07/11] net: netcp: ethss: map vlan priorities to zero flow
The driver currently support only vlan priority zero. So map the vlan priorities to zero flow in hardware. Signed-off-by: Murali Karicheri --- drivers/net/ethernet/ti/netcp_ethss.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index ab6c918..33d70d4 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -600,6 +600,7 @@ struct gbe_port_regs { struct gbe_port_regs_ofs { u16 port_vlan; u16 tx_pri_map; + u16 rx_pri_map; u16 sa_lo; u16 sa_hi; u16 ts_ctl; @@ -2300,6 +2301,13 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf) gbe_sgmii_rtreset(priv, slave, false); gbe_port_config(priv, slave, priv->rx_packet_max); gbe_set_slave_mac(slave, gbe_intf); + /* For NU & 2U switch, map the vlan priorities to zero +* as we only configure to use priority 0 +*/ + if (IS_SS_ID_MU(priv)) + writel(HOST_TX_PRI_MAP_DEFAULT, + GBE_REG_ADDR(slave, port_regs, rx_pri_map)); + /* enable forwarding */ cpsw_ale_control_set(priv->ale, slave->port_num, ALE_PORT_STATE, ALE_PORT_STATE_FORWARD); @@ -3001,6 +3009,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, /* Initialize slave port register offsets */ GBENU_SET_REG_OFS(slave, port_regs, port_vlan); GBENU_SET_REG_OFS(slave, port_regs, tx_pri_map); + GBENU_SET_REG_OFS(slave, port_regs, rx_pri_map); GBENU_SET_REG_OFS(slave, port_regs, sa_lo); GBENU_SET_REG_OFS(slave, port_regs, sa_hi); GBENU_SET_REG_OFS(slave, port_regs, ts_ctl); -- 1.9.1
[net-next PATCH v3 01/11] soc: ti: K2G: enhancement to support QMSS in K2G NAVSS
Navigator Subsystem (NAVSS) available on K2G SoC has a cut down version of QMSS with less number of queues, internal linking ram with lesser number of buffers etc. It doesn't have status and explicit push register space as in QMSS available on other K2 SoCs. So define reg indices specific to QMSS on K2G. This patch introduces "ti,66ak2g-navss-qm" compatibility to identify QMSS on K2G NAVSS and to customize the dts handling code. Per Device manual, descriptors with index less than or equal to regions0_size is in region 0 in the case of K2 QMSS where as for QMSS on K2G, descriptors with index less than regions0_size is in region 0. So update the size accordingly in the regions0_size bits of the linking ram size 0 register. Signed-off-by: Murali Karicheri Signed-off-by: WingMan Kwok --- .../bindings/soc/ti/keystone-navigator-qmss.txt| 9 ++- drivers/soc/ti/knav_qmss.h | 6 ++ drivers/soc/ti/knav_qmss_queue.c | 90 -- 3 files changed, 82 insertions(+), 23 deletions(-) diff --git a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt index 77cd42c..b025770 100644 --- a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt +++ b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt @@ -17,7 +17,8 @@ pool management. Required properties: -- compatible : Must be "ti,keystone-navigator-qmss"; +- compatible : Must be "ti,keystone-navigator-qmss". + : Must be "ti,66ak2g-navss-qm" for QMSS on K2G SoC. - clocks : phandle to the reference clock for this device. - queue-range : total range of queue numbers for the device. - linkram0 : for internal link ram, where size is the total @@ -39,6 +40,12 @@ Required properties: - Descriptor memory setup region. - Queue Management/Queue Proxy region for queue Push. - Queue Management/Queue Proxy region for queue Pop. + +For QMSS on K2G SoC, following QM reg indexes are used in that order + - Queue Peek region. + - Queue configuration region. + - Queue Management/Queue Proxy region for queue Push/Pop. + - queue-pools : child node classifying the queue ranges into pools. Queue ranges are grouped into 3 type of pools: - qpend : pool of qpend(interruptible) queues diff --git a/drivers/soc/ti/knav_qmss.h b/drivers/soc/ti/knav_qmss.h index 905b974..56866ba4 100644 --- a/drivers/soc/ti/knav_qmss.h +++ b/drivers/soc/ti/knav_qmss.h @@ -292,6 +292,11 @@ struct knav_queue { struct list_headlist; }; +enum qmss_version { + QMSS, + QMSS_66AK2G, +}; + struct knav_device { struct device *dev; unsignedbase_id; @@ -305,6 +310,7 @@ struct knav_device { struct list_headpools; struct list_headpdsps; struct list_headqmgrs; + enum qmss_version version; }; struct knav_range_ops { diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c index 77d6b5c..8526c8e 100644 --- a/drivers/soc/ti/knav_qmss_queue.c +++ b/drivers/soc/ti/knav_qmss_queue.c @@ -42,6 +42,15 @@ #define KNAV_QUEUE_PUSH_REG_INDEX 4 #define KNAV_QUEUE_POP_REG_INDEX 5 +/* Queue manager register indices in DTS for QMSS in K2G NAVSS. + * There are no status and vbusm push registers on this version + * of QMSS. Push registers are same as pop, So all indices above 1 + * are to be re-defined + */ +#define KNAV_L_QUEUE_CONFIG_REG_INDEX 1 +#define KNAV_L_QUEUE_REGION_REG_INDEX 2 +#define KNAV_L_QUEUE_PUSH_REG_INDEX3 + /* PDSP register indices in DTS */ #define KNAV_QUEUE_PDSP_IRAM_REG_INDEX 0 #define KNAV_QUEUE_PDSP_REGS_REG_INDEX 1 @@ -1169,8 +1178,12 @@ static int knav_queue_setup_link_ram(struct knav_device *kdev) dev_dbg(kdev->dev, "linkram0: dma:%pad, virt:%p, size:%x\n", &block->dma, block->virt, block->size); writel_relaxed((u32)block->dma, &qmgr->reg_config->link_ram_base0); - writel_relaxed(block->size, &qmgr->reg_config->link_ram_size0); - + if (kdev->version == QMSS_66AK2G) + writel_relaxed(block->size, + &qmgr->reg_config->link_ram_size0); + else + writel_relaxed(block->size - 1, + &qmgr->reg_config->link_ram_size0); block++; if (!block->size) continue; @@ -1387,42 +1400,64 @@ static int knav_queue_init_qmgrs(struct knav_device *kdev, qmgr->reg
[net-next PATCH v3 08/11] net: netcp: ethss: re-use stats handling code for 2u hardware
The stats block in 2u cpsw hardware is similar to the one on nu and hence handle it in a similar way by using a macro that includes 2u hardware as well. Signed-off-by: Murali Karicheri --- drivers/net/ethernet/ti/netcp_ethss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 33d70d4..7b3b373 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -3394,7 +3394,7 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev, gbe_dev->num_stats_mods = gbe_dev->max_num_ports; gbe_dev->et_stats = gbenu_et_stats; - if (IS_SS_ID_NU(gbe_dev)) + if (IS_SS_ID_MU(gbe_dev)) gbe_dev->num_et_stats = GBENU_ET_STATS_HOST_SIZE + (gbe_dev->max_num_slaves * GBENU_ET_STATS_PORT_SIZE); else -- 1.9.1
[net-next PATCH v3 03/11] net: netcp: ethss: use macro for checking ss_version consistently
Driver currently uses macro for NU and XBE hardwrae, while other places for older hardware such as that on K2H/K SoC (version 1.4 of the cpsw hardware, it explicitly check for the ss_version inline. Add a new macro for version 1.4 and use it to customize code in the driver. While at it also fix similar issue with checking XBE version by re-using existing macro IS_SS_ID_XGBE(). Signed-off-by: Murali Karicheri --- drivers/net/ethernet/ti/netcp_ethss.c | 29 - 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 56dbc0b..1b79fe5 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -42,7 +42,7 @@ /* 1G Ethernet SS defines */ #define GBE_MODULE_NAME"netcp-gbe" -#define GBE_SS_VERSION_14 0x4ed21104 +#define GBE_SS_VERSION_14 0x4ed2 #define GBE_SS_REG_INDEX 0 #define GBE_SGMII34_REG_INDEX 1 @@ -72,6 +72,9 @@ #define IS_SS_ID_NU(d) \ (GBE_IDENT((d)->ss_version) == GBE_SS_ID_NU) +#define IS_SS_ID_VER_14(d) \ + (GBE_IDENT((d)->ss_version) == GBE_SS_VERSION_14) + #define GBENU_SS_REG_INDEX 0 #define GBENU_SM_REG_INDEX 1 #define GBENU_SGMII_MODULE_OFFSET 0x100 @@ -86,7 +89,7 @@ /* 10G Ethernet SS defines */ #define XGBE_MODULE_NAME "netcp-xgbe" -#define XGBE_SS_VERSION_10 0x4ee42100 +#define XGBE_SS_VERSION_10 0x4ee4 #define XGBE_SS_REG_INDEX 0 #define XGBE_SM_REG_INDEX 1 @@ -1915,7 +1918,7 @@ static void keystone_get_ethtool_stats(struct net_device *ndev, gbe_dev = gbe_intf->gbe_dev; spin_lock_bh(&gbe_dev->hw_stats_lock); - if (gbe_dev->ss_version == GBE_SS_VERSION_14) + if (IS_SS_ID_VER_14(gbe_dev)) gbe_update_stats_ver14(gbe_dev, data); else gbe_update_stats(gbe_dev, data); @@ -2205,7 +2208,7 @@ static void gbe_port_config(struct gbe_priv *gbe_dev, struct gbe_slave *slave, max_rx_len = NETCP_MAX_FRAME_SIZE; /* Enable correct MII mode at SS level */ - if ((gbe_dev->ss_version == XGBE_SS_VERSION_10) && + if (IS_SS_ID_XGBE(gbe_dev) && (slave->link_interface >= XGMII_LINK_MAC_PHY)) { xgmii_mode = readl(GBE_REG_ADDR(gbe_dev, ss_regs, control)); xgmii_mode |= (1 << slave->slave_num); @@ -2293,7 +2296,7 @@ static int gbe_slave_open(struct gbe_intf *gbe_intf) } if (has_phy) { - if (priv->ss_version == XGBE_SS_VERSION_10) + if (IS_SS_ID_XGBE(priv)) hndlr = xgbe_adjust_link; slave->phy = of_phy_connect(gbe_intf->ndev, @@ -2764,7 +2767,7 @@ static void netcp_ethss_timer(struct timer_list *t) /* A timer runs as a BH, no need to block them */ spin_lock(&gbe_dev->hw_stats_lock); - if (gbe_dev->ss_version == GBE_SS_VERSION_14) + if (IS_SS_ID_VER_14(gbe_dev)) gbe_update_stats_ver14(gbe_dev, NULL); else gbe_update_stats(gbe_dev, NULL); @@ -2807,7 +2810,7 @@ static int gbe_open(void *intf_priv, struct net_device *ndev) GBE_RTL_VERSION(reg), GBE_IDENT(reg)); /* For 10G and on NetCP 1.5, use directed to port */ - if ((gbe_dev->ss_version == XGBE_SS_VERSION_10) || IS_SS_ID_MU(gbe_dev)) + if (IS_SS_ID_XGBE(gbe_dev) || IS_SS_ID_MU(gbe_dev)) gbe_intf->tx_pipe.flags = SWITCH_TO_PORT_IN_TAGINFO; if (gbe_dev->enable_ale) @@ -2924,7 +2927,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, /* Emac regs memmap are contiguous but port regs are not */ port_reg_num = slave->slave_num; - if (gbe_dev->ss_version == GBE_SS_VERSION_14) { + if (IS_SS_ID_VER_14(gbe_dev)) { if (slave->slave_num > 1) { port_reg_ofs = GBE13_SLAVE_PORT2_OFFSET; port_reg_num -= 2; @@ -2939,7 +2942,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, emac_reg_ofs = GBENU_EMAC_OFFSET; port_reg_blk_sz = 0x1000; emac_reg_blk_sz = 0x1000; - } else if (gbe_dev->ss_version == XGBE_SS_VERSION_10) { + } else if (IS_SS_ID_XGBE(gbe_dev)) { port_reg_ofs = XGBE10_SLAVE_PORT_OFFSET; emac_reg_ofs = XGBE10_EMAC_OFFSET; port_reg_blk_sz = 0x30; @@ -2955,7 +2958,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, slave->emac_regs = gbe_dev->switch_regs + emac_reg_ofs + (emac_reg_blk_sz * slave->slave_num); - if (gbe_dev->ss_version == GBE_SS_VERSION_14) { + if (IS_SS_ID_VER_14(gbe_dev)) { /* Initialize slave p
[net-next PATCH v3 05/11] net: netcp: ethss: add support for handling sgmii link interface
2u cpsw hardware on K2G uses sgmii link to interface with Phy. So add support for this interface in the code so that drover can be re-used for 2u hardware. Signed-off-by: Murali Karicheri --- drivers/net/ethernet/ti/netcp.h | 2 ++ drivers/net/ethernet/ti/netcp_ethss.c | 15 +++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h index 8900a6f..416f732 100644 --- a/drivers/net/ethernet/ti/netcp.h +++ b/drivers/net/ethernet/ti/netcp.h @@ -33,6 +33,8 @@ #define SGMII_LINK_MAC_MAC_FORCED 2 #define SGMII_LINK_MAC_FIBER 3 #define SGMII_LINK_MAC_PHY_NO_MDIO 4 +#define RGMII_LINK_MAC_PHY 5 +#define RGMII_LINK_MAC_PHY_NO_MDIO 7 #define XGMII_LINK_MAC_PHY 10 #define XGMII_LINK_MAC_MAC_FORCED 11 diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 1628fbe..06bbb65 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -2094,8 +2094,9 @@ static void netcp_ethss_link_state_action(struct gbe_priv *gbe_dev, ALE_PORT_STATE_FORWARD); if (ndev && slave->open && - slave->link_interface != SGMII_LINK_MAC_PHY && - slave->link_interface != XGMII_LINK_MAC_PHY) + ((slave->link_interface != SGMII_LINK_MAC_PHY) && + (slave->link_interface != RGMII_LINK_MAC_PHY) && + (slave->link_interface != XGMII_LINK_MAC_PHY))) netif_carrier_on(ndev); } else { writel(mac_control, GBE_REG_ADDR(slave, emac_regs, @@ -2104,8 +2105,9 @@ static void netcp_ethss_link_state_action(struct gbe_priv *gbe_dev, ALE_PORT_STATE, ALE_PORT_STATE_DISABLE); if (ndev && - slave->link_interface != SGMII_LINK_MAC_PHY && - slave->link_interface != XGMII_LINK_MAC_PHY) + ((slave->link_interface != SGMII_LINK_MAC_PHY) && + (slave->link_interface != RGMII_LINK_MAC_PHY) && + (slave->link_interface != XGMII_LINK_MAC_PHY))) netif_carrier_off(ndev); } @@ -2917,6 +2919,7 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave, slave->open = false; if ((slave->link_interface == SGMII_LINK_MAC_PHY) || + (slave->link_interface == RGMII_LINK_MAC_PHY) || (slave->link_interface == XGMII_LINK_MAC_PHY)) slave->phy_node = of_parse_phandle(node, "phy-handle", 0); slave->port_num = gbe_get_slave_port(gbe_dev, slave->slave_num); @@ -3078,6 +3081,9 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev, if (slave->link_interface == SGMII_LINK_MAC_PHY) { phy_mode = PHY_INTERFACE_MODE_SGMII; slave->phy_port_t = PORT_MII; + } else if (slave->link_interface == RGMII_LINK_MAC_PHY) { + phy_mode = PHY_INTERFACE_MODE_RGMII; + slave->phy_port_t = PORT_MII; } else { phy_mode = PHY_INTERFACE_MODE_NA; slave->phy_port_t = PORT_FIBRE; @@ -3085,6 +3091,7 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev, for_each_sec_slave(slave, gbe_dev) { if ((slave->link_interface != SGMII_LINK_MAC_PHY) && + (slave->link_interface != RGMII_LINK_MAC_PHY) && (slave->link_interface != XGMII_LINK_MAC_PHY)) continue; slave->phy = -- 1.9.1
Re: [net-next PATCH v3 00/11] Add support for netcp driver on K2G SoC
The net-next tree is closed, please resubmit this after the merge window and the net-next tree is open back up again.
Re: [PATCH net-next V2 0/4] Introduce adaptive TX interrupt moderation to net DIM
On 4/2/2018 5:27 PM, David Miller wrote: From: Tal Gilboa Date: Mon, 2 Apr 2018 16:59:30 +0300 Net DIM is a library designed for dynamic interrupt moderation. It was implemented and optimized with receive side interrupts in mind, since these are usually the CPU expensive ones. This patch-set introduces adaptive transmit interrupt moderation to net DIM, complete with a usage in the mlx5e driver. Using adaptive TX behavior would reduce interrupt rate for multiple scenarios. Furthermore, it is essential for increasing bandwidth on cases where payload aggregation is required. v2: Rebased over proper tree. v1: Fix compilation issues due to missed function renaming. This series still needs fixes, and the net-next tree has closed meanwhile. And to be honest, handling this series has been very painful for me so far. The patches either didn't apply or didn't even compile. Please do not resubmit this until the merge window is over and the net-next tree opens up again. Thank you. Ack.
Re: [PATCH net-next] net: ipv6/gre: Add GRO support
On 04/02/2018 05:40 AM, Eran Ben Elisha wrote: > On Sun, Apr 1, 2018 at 7:35 PM, Eric Dumazet wrote: >> >> >> On 04/01/2018 06:17 AM, Tariq Toukan wrote: >>> From: Eran Ben Elisha >>> >>> Add GRO capability for IPv6 GRE tunnel and ip6erspan tap, via gro_cells >>> infrastructure. >>> >>> Performance testing: 55% higher badwidth. >>> Measuring bandwidth of 1 thread IPv4 TCP traffic over IPv6 GRE tunnel >>> while GRO on the physical interface is disabled. >>> CPU: Intel Xeon E312xx (Sandy Bridge) >>> NIC: Mellanox Technologies MT27700 Family [ConnectX-4] >>> Before (GRO not working in tunnel) : 2.47 Gbits/sec >>> After (GRO working in tunnel) : 3.85 Gbits/sec >>> >>> Signed-off-by: Eran Ben Elisha >>> Signed-off-by: Tariq Toukan >>> CC: Eric Dumazet >>> --- >> >> >> Seems good, but why isn't this handled directly in GRO native layer ? > ip6_tunnel and ip6_gre do not share initialization flow functions (unlike > ipv4). > Changing the ipv6 init infrastructure should not be part of this > patch. we prefer to keep this one minimal, simple and safe. Looking at gre_gro_receive() and gre_gro_complete() I could not see why they could not be copied/pasted to IPv6. Maybe give more details on the changelog, it is really not obvious.
Re: [PATCH] net: implement IP_RECVHDRS option to get full headers through recvmsg cmsg.
From: Maciej Żenczykowski Date: Sat, 31 Mar 2018 22:43:14 -0700 > From: Luigi Rizzo > > We have all sorts of different ways to fetch pre-UDP payload metadata: > IP_RECVTOS > IP_RECVTTL > IP_RECVOPTS > IP_RETOPTS > > But nothing generic which simply allows you to receive the entire packet > header. > > This is in similar vein to TCP_SAVE_SYN but for UDP and other datagram > sockets. > > This is envisioned as a way to get GUE extension metadata for encapsulated > packets, but implemented in a way to be much more future proof. > > (Implemented by Luigi, who asked me to send it upstream) > > Cc: Eric Dumazet > Signed-off-by: Luigi Rizzo > Signed-off-by: Maciej Żenczykowski This is an ipv4 level socket option, so why are you copying in the MAC header(s)? That part I don't like at all. First of all, you have no idea what the link level protocol is for that MAC header, therefore how could you even begin to interpret it's contents correctly? Second of all, MAC level details belong not in AF_INET socket interfaces. Thank you.
Re: [PATCH net-next 0/5] virtio-net: Add SCTP checksum offload support
On Mon, Apr 02, 2018 at 09:40:01AM -0400, Vladislav Yasevich wrote: > Now that we have SCTP offload capabilities in the kernel, we can add > them to virtio as well. First step is SCTP checksum. Thanks. > As for GSO, the way sctp GSO is currently implemented buys us nothing > in added support to virtio. To add true GSO, would require a lot of > re-work inside of SCTP and would require extensions to the virtio > net header to carry extra sctp data. Can you please elaborate more on this? Is this because SCTP GSO relies on the gso skb format for knowing how to segment it instead of having a list of sizes? Marcelo
Re: [PATCH net-next] net: ipv6/gre: Add GRO support
>>> Seems good, but why isn't this handled directly in GRO native layer ? >> ip6_tunnel and ip6_gre do not share initialization flow functions (unlike >> ipv4). >> Changing the ipv6 init infrastructure should not be part of this >> patch. we prefer to keep this one minimal, simple and safe. > > > > Looking at gre_gro_receive() and gre_gro_complete() I could not see why they > could not be copied/pasted to IPv6. These functions to handle GRO over GRE are already assigned in gre_offload_init() (in net/ipv4/gre_offload.c under CONFIG_IPV6). However without initializing the gro_cells, the receive path will not go via napi_gro_receive path, but directly to netif_rx. So AFAIU, only gcells->cells was missing for gro_cells_receive to really go via GRO flow. > > Maybe give more details on the changelog, it is really not obvious. Hopefully the above filled this request. >
Re: [net-next PATCH v3 00/11] Add support for netcp driver on K2G SoC
On 04/02/2018 10:40 AM, David Miller wrote: > > The net-next tree is closed, please resubmit this after the merge window and > the net-next tree is open back up again. > Ok. Will do. Thanks -- Murali Karicheri Linux Kernel, Keystone
Re: [PATCH net-next] bridge: Allow max MTU when multiple VLANs present
On Fri, Mar 30, 2018 at 12:54 PM, Chas Williams <3ch...@gmail.com> wrote: > On Thu, Mar 29, 2018 at 9:02 PM, Toshiaki Makita > wrote: >> On 2018/03/30 1:49, Roopa Prabhu wrote: >>> On Thu, Mar 22, 2018 at 9:53 PM, Roopa Prabhu >>> wrote: On Thu, Mar 22, 2018 at 8:34 AM, Chas Williams <3ch...@gmail.com> wrote: > If the bridge is allowing multiple VLANs, some VLANs may have > different MTUs. Instead of choosing the minimum MTU for the > bridge interface, choose the maximum MTU of the bridge members. > With this the user only needs to set a larger MTU on the member > ports that are participating in the large MTU VLANS. > > Signed-off-by: Chas Williams <3ch...@gmail.com> > --- Acked-by: Roopa Prabhu This or an equivalent fix is necessary: as stated above, today the bridge mtu capped at min port mtu limits all vlan devices on top of the vlan filtering bridge to min port mtu. >>> >>> >>> On further thought, since this patch changes default behavior, it may >>> upset people. ie with this patch, a vlan device >>> on the bridge by default will now use the bridge max mtu and that >>> could cause unexpected drops in the bridge driver >>> if the xmit port had a lower mtu. This may surprise users. > > It only changes the default behavior when you are using VLAN aware bridges. > The behavior remains the same otherwise. I don't know if VLAN aware bridges > are that popular yet so there probably isn't any particular > expectation from those > bridges. they are popular...in-fact they are the default bridge mode on our network switches. And they have been around for some time now to ignore its users. Plus it is not right to change default mtu behavior for one mode of the bridge and not the others (bridge mtu handling from user-space is complex enough today due to dynamic mtu changes on port enslave/deslave). > > I don't think those drops are unexpected. If a user has misconfigured > the bridge > we can't be expected to fix that for them. It is the user's > responsbility to ensure > that the ports on the VLAN have a size consistent with the traffic > they expect to > pass. > By default they are not expected today. The problem is changing the bridge to max mtu changes 'all' the vlan devices on top of the vlan aware bridge to max mtu by default which makes drops at the bridge driver more common if the user had mixed mtu on its ports.
Re: [PATCH v3 0/2] net: mvneta: improve suspend/resume
From: Jisheng Zhang Date: Mon, 2 Apr 2018 11:22:29 +0800 > This series tries to optimize the mvneta's suspend/resume > implementation by only taking necessary actions. > > Since v2: > - keep rtnl lock when calling mvneta_start_dev() and mvneta_stop_dev() >Thank Russell for pointing this out > > Since v1: > - unify ret check > - try best to keep the suspend/resume behavior > - split txq deinit into sw/hw parts as well > - adjust mvneta_stop_dev() location Series applied, thank you.
Re: [PATCH net v5 2/3] ipv6: allow to cache dst for a connected sk in ip6_sk_dst_lookup_flow()
From: Alexey Kodanev Date: Mon, 2 Apr 2018 17:00:34 +0300 > +++ b/net/ipv6/ip6_output.c > @@ -1105,23 +1105,32 @@ struct dst_entry *ip6_dst_lookup_flow(const struct > sock *sk, struct flowi6 *fl6, > * @sk: socket which provides the dst cache and route info > * @fl6: flow to lookup > * @final_dst: final destination address for ipsec lookup > + * @connected: whether @sk is connected or not ... > struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, > - const struct in6_addr *final_dst) > + const struct in6_addr *final_dst, > + int connected) Please use type 'bool' and true/false for this new parameter. Thank you.
Re: [PATCH] connector: add parent pid and tgid to coredump and exit events
Hi David, I don't see how it breaks UAPI. The point is that structures coredump_proc_event and exit_proc_event are members of *union* event_data, thus position of the existing data in the structure is unchanged. Furthermore, this change won't increase size of struct proc_event, because comm_proc_event (also a member of event_data) is of bigger size than the changed structures. If I'm wrong, could you please explain what exactly will the change break in UAPI? On 30/03/18 19:59, David Miller wrote: > From: Stefan Strogin > Date: Thu, 29 Mar 2018 17:12:47 +0300 > >> diff --git a/include/uapi/linux/cn_proc.h b/include/uapi/linux/cn_proc.h >> index 68ff25414700..db210625cee8 100644 >> --- a/include/uapi/linux/cn_proc.h >> +++ b/include/uapi/linux/cn_proc.h >> @@ -116,12 +116,16 @@ struct proc_event { >> struct coredump_proc_event { >> __kernel_pid_t process_pid; >> __kernel_pid_t process_tgid; >> +__kernel_pid_t parent_pid; >> +__kernel_pid_t parent_tgid; >> } coredump; >> >> struct exit_proc_event { >> __kernel_pid_t process_pid; >> __kernel_pid_t process_tgid; >> __u32 exit_code, exit_signal; >> +__kernel_pid_t parent_pid; >> +__kernel_pid_t parent_tgid; >> } exit; >> >> } event_data; > > I don't think you can add these members without breaking UAPI. >
Re: [PATCH net-next] net: ipv6/gre: Add GRO support
On 04/02/2018 08:00 AM, Eran Ben Elisha wrote: Seems good, but why isn't this handled directly in GRO native layer ? >>> ip6_tunnel and ip6_gre do not share initialization flow functions (unlike >>> ipv4). >>> Changing the ipv6 init infrastructure should not be part of this >>> patch. we prefer to keep this one minimal, simple and safe. >> >> >> >> Looking at gre_gro_receive() and gre_gro_complete() I could not see why they >> could not be copied/pasted to IPv6. > > These functions to handle GRO over GRE are already assigned in > gre_offload_init() (in net/ipv4/gre_offload.c under CONFIG_IPV6). > However without initializing the gro_cells, the receive path will not > go via napi_gro_receive path, but directly to netif_rx. > So AFAIU, only gcells->cells was missing for gro_cells_receive to > really go via GRO flow. > >> >> Maybe give more details on the changelog, it is really not obvious. > Hopefully the above filled this request. >> Not really :/ gro_cells_receive() is not really useful with native GRO, since packet is already a GRO packet by the time it reaches ip_tunnel_rcv() or __ip6_tnl_rcv() Sure, it might be usefull if native GRO (happening on eth0 if you prefer) did not handle a particular encapsulation. gro_cell was a work around before we extended GRO to be able to decap some tunnel headers. It seems we have to extend this to also support GRE6.
Re: [PATCH v3 2/2] net: usb: asix88179_178a: de-duplicate code
Hi Marc, David, with the v2 patch ("net: usb: asix88179_178a: de-duplicate code") I made an embarrasly stupid mistake of removing the wrong function. The v2 patch accidentially changed ax88179_link_reset() instead of ax88179_reset(). Hunk 6 of v2 ("net: usb: asix88179_178a: de-duplicate code") is just utterly wrong. ax88179_bind() and ax88179_reset() were the correct targets to be de-duplicated, as done in the v3 patch. Sorry for this, Alexander On Mon, 2 Apr 2018, David Miller wrote: > From: Marc Zyngier > Date: Mon, 02 Apr 2018 10:45:40 +0100 > > > What has changed between this patch and the previous one? Having a bit > > of a change-log would certainly help. Also, I would have appreciated a > > reply to the questions I had on v2 before you posted a third version. > > Agreed, and I'm not applying these patches until this is sorted out > and explained properly. >
[RFC] vhost: introduce mdev based hardware vhost backend
This patch introduces a mdev (mediated device) based hardware vhost backend. This backend is an abstraction of the various hardware vhost accelerators (potentially any device that uses virtio ring can be used as a vhost accelerator). Some generic mdev parent ops are provided for accelerator drivers to support generating mdev instances. What's this === The idea is that we can setup a virtio ring compatible device with the messages available at the vhost-backend. Originally, these messages are used to implement a software vhost backend, but now we will use these messages to setup a virtio ring compatible hardware device. Then the hardware device will be able to work with the guest virtio driver in the VM just like what the software backend does. That is to say, we can implement a hardware based vhost backend in QEMU, and any virtio ring compatible devices potentially can be used with this backend. (We also call it vDPA -- vhost Data Path Acceleration). One problem is that, different virtio ring compatible devices may have different device interfaces. That is to say, we will need different drivers in QEMU. It could be troublesome. And that's what this patch trying to fix. The idea behind this patch is very simple: mdev is a standard way to emulate device in kernel. So we defined a standard device based on mdev, which is able to accept vhost messages. When the mdev emulation code (i.e. the generic mdev parent ops provided by this patch) gets vhost messages, it will parse and deliver them to accelerator drivers. Drivers can use these messages to setup accelerators. That is to say, the generic mdev parent ops (e.g. read()/write()/ ioctl()/...) will be provided for accelerator drivers to register accelerators as mdev parent devices. And each accelerator device will support generating standard mdev instance(s). With this standard device interface, we will be able to just develop one userspace driver to implement the hardware based vhost backend in QEMU. Difference between vDPA and PCI passthru The key difference between vDPA and PCI passthru is that, in vDPA only the data path of the device (e.g. DMA ring, notify region and queue interrupt) is pass-throughed to the VM, the device control path (e.g. PCI configuration space and MMIO regions) is still defined and emulated by QEMU. The benefits of keeping virtio device emulation in QEMU compared with virtio device PCI passthru include (but not limit to): - consistent device interface for guest OS in the VM; - max flexibility on the hardware design, especially the accelerator for each vhost backend doesn't have to be a full PCI device; - leveraging the existing virtio live-migration framework; The interface of this mdev based device === 1. BAR0 The MMIO region described by BAR0 is the main control interface. Messages will be written to or read from this region. The message type is determined by the `request` field in message header. The message size is encoded in the message header too. The message format looks like this: struct vhost_vfio_op { __u64 request; __u32 flags; /* Flag values: */ #define VHOST_VFIO_NEED_REPLY 0x1 /* Whether need reply */ __u32 size; union { __u64 u64; struct vhost_vring_state state; struct vhost_vring_addr addr; struct vhost_memory memory; } payload; }; The existing vhost-kernel ioctl cmds are reused as the message requests in above structure. Each message will be written to or read from this region at offset 0: int vhost_vfio_write(struct vhost_dev *dev, struct vhost_vfio_op *op) { int count = VHOST_VFIO_OP_HDR_SIZE + op->size; struct vhost_vfio *vfio = dev->opaque; int ret; ret = pwrite64(vfio->device_fd, op, count, vfio->bar0_offset); if (ret != count) return -1; return 0; } int vhost_vfio_read(struct vhost_dev *dev, struct vhost_vfio_op *op) { int count = VHOST_VFIO_OP_HDR_SIZE + op->size; struct vhost_vfio *vfio = dev->opaque; uint64_t request = op->request; int ret; ret = pread64(vfio->device_fd, op, count, vfio->bar0_offset); if (ret != count || request != op->request) return -1; return 0; } It's quite straightforward to set things to the device. Just need to write the message to device directly: int vhost_vfio_set_features(struct vhost_dev *dev, uint64_t features) { struct vhost_vfio_op op; op.request = VHOST_SET_FEATURES; op.flags = 0; op.size = sizeof(features); op.payload.u64 = features; return vhost_vfio_write(dev, &op); } To get things from the device, two steps are needed. Take VHOST_GET_FEATURE as an example: int vhost_vfio_get_features(struct vhost_dev *dev, uint64_t *features) { struct vhost_vfio_op op;
Re: [PATCH net-next] bridge: Allow max MTU when multiple VLANs present
On Mon, Apr 2, 2018 at 11:08 AM, Roopa Prabhu wrote: > On Fri, Mar 30, 2018 at 12:54 PM, Chas Williams <3ch...@gmail.com> wrote: >> On Thu, Mar 29, 2018 at 9:02 PM, Toshiaki Makita >> wrote: >>> On 2018/03/30 1:49, Roopa Prabhu wrote: On Thu, Mar 22, 2018 at 9:53 PM, Roopa Prabhu wrote: > On Thu, Mar 22, 2018 at 8:34 AM, Chas Williams <3ch...@gmail.com> wrote: >> If the bridge is allowing multiple VLANs, some VLANs may have >> different MTUs. Instead of choosing the minimum MTU for the >> bridge interface, choose the maximum MTU of the bridge members. >> With this the user only needs to set a larger MTU on the member >> ports that are participating in the large MTU VLANS. >> >> Signed-off-by: Chas Williams <3ch...@gmail.com> >> --- > > Acked-by: Roopa Prabhu > > This or an equivalent fix is necessary: as stated above, today the > bridge mtu capped at min port mtu limits all > vlan devices on top of the vlan filtering bridge to min port mtu. On further thought, since this patch changes default behavior, it may upset people. ie with this patch, a vlan device on the bridge by default will now use the bridge max mtu and that could cause unexpected drops in the bridge driver if the xmit port had a lower mtu. This may surprise users. >> >> It only changes the default behavior when you are using VLAN aware bridges. >> The behavior remains the same otherwise. I don't know if VLAN aware bridges >> are that popular yet so there probably isn't any particular >> expectation from those >> bridges. > > they are popular...in-fact they are the default bridge mode on our > network switches. > And they have been around for some time now to ignore its users. > Plus it is not right to change default mtu behavior for one mode of the bridge > and not the others (bridge mtu handling from user-space is complex enough > today > due to dynamic mtu changes on port enslave/deslave). I don't see the issue with one mode of bridge behaving differently from another mode. The VLAN behavior between the two bridge modes is completely different so having a different MTU behavior doesn't seem that surprising. You are potentially mixing different sized VLAN on a same bridge. The only sane choice is to pick the largest MTU for the bridge. This lets you have whatever MTU is appropriate on the child VLAN interfaces of the bridge. If you attempt to forward from a port with a larger MTU to a smaller MTU, you get the expected behavior. Forcing the end user to configure all the ports to the maximum MTU of all the VLANs on the bridge is wrong IMHO. You then risk attempting to forward oversize packets on a network that can't support that. > >> >> I don't think those drops are unexpected. If a user has misconfigured >> the bridge >> we can't be expected to fix that for them. It is the user's >> responsbility to ensure >> that the ports on the VLAN have a size consistent with the traffic >> they expect to >> pass. >> > > By default they are not expected today. The problem is changing the bridge > to max mtu changes 'all' the vlan devices on top of the vlan aware bridge to > max mtu by default which makes drops at the bridge driver more common if the > user had mixed mtu on its ports. That's not been my experience. The MTU on the vlan devices is only limited by the bridges's MTU. Setting the bridge MTU doesn't change the children VLAN devices MTUs.
Re: [BUG/Q] can_pernet_exit() leaves devices on dead net
Hi Kirill, Marc, I checked the code once more and added some debug output to the other parts in CAN notifier code. In fact the code pointed to by both of you seems to be obsolete as I only wanted to be 'really sure' that no leftovers of the CAN filters at module unloading. Yes, this one looks good: https://marc.info/?l=linux-can&m=150169589119335&w=2 Regards, Kirill I was obviously too cautious ;-) All tests I made resulted in the function iterating through all the CAN netdevices doing exactly nothing. I'm fine with removing that stuff - but I'm not sure whether it's worth to push that patch to stable 4.12+ or even before 4.12 (without namespace support - and removing rcu_barrier() too). Any opinions? Best regards, Oliver
Re: [BUG/Q] can_pernet_exit() leaves devices on dead net
Hi, Oliver, On 02.04.2018 18:28, Oliver Hartkopp wrote: > Hi Kirill, Marc, > > I checked the code once more and added some debug output to the other parts > in CAN notifier code. > > In fact the code pointed to by both of you seems to be obsolete as I only > wanted to be 'really sure' that no leftovers of the CAN filters at module > unloading. > > >> Yes, this one looks good: >> https://marc.info/?l=linux-can&m=150169589119335&w=2 >> >> Regards, >> Kirill >> > > I was obviously too cautious ;-) > > All tests I made resulted in the function iterating through all the CAN > netdevices doing exactly nothing. > > I'm fine with removing that stuff - but I'm not sure whether it's worth to > push that patch to stable 4.12+ or even before 4.12 (without namespace > support - and removing rcu_barrier() too). > > Any opinions? I think the same -- it's not need for stable as there is just iteration over empty list, i.e., noop. Kirill
Re: [PATCH net-next] net: ipv6/gre: Add GRO support
On 02/04/2018 6:19 PM, Eric Dumazet wrote: On 04/02/2018 08:00 AM, Eran Ben Elisha wrote: Seems good, but why isn't this handled directly in GRO native layer ? ip6_tunnel and ip6_gre do not share initialization flow functions (unlike ipv4). Changing the ipv6 init infrastructure should not be part of this patch. we prefer to keep this one minimal, simple and safe. Looking at gre_gro_receive() and gre_gro_complete() I could not see why they could not be copied/pasted to IPv6. These functions to handle GRO over GRE are already assigned in gre_offload_init() (in net/ipv4/gre_offload.c under CONFIG_IPV6). However without initializing the gro_cells, the receive path will not go via napi_gro_receive path, but directly to netif_rx. So AFAIU, only gcells->cells was missing for gro_cells_receive to really go via GRO flow. Maybe give more details on the changelog, it is really not obvious. Hopefully the above filled this request. Not really :/ So you're referring to native interface. We thought you meant native IP module. gro_cells_receive() is not really useful with native GRO, since packet is already a GRO packet by the time it reaches ip_tunnel_rcv() or __ip6_tnl_rcv() Right. If GRO on native interface is ON, our patch doesn't help much. The case we improve here is: Native has GRO OFF, GRE has GRO ON. Before this patch there were no GRO packets at all in this case, only MTU packets went up the stack. Sure, it might be usefull if native GRO (happening on eth0 if you prefer) did not handle a particular encapsulation. Or it is turned OFF. gro_cell was a work around before we extended GRO to be able to decap some tunnel headers. It seems we have to extend this to also support GRE6.
Re: [bpf-next PATCH 4/4] bpf: sockmap, add hash map support
On Sun, Apr 01, 2018 at 08:01:10AM -0700, John Fastabend wrote: > Sockmap is currently backed by an array and enforces keys to be > four bytes. This works well for many use cases and was originally > modeled after devmap which also uses four bytes keys. However, > this has become limiting in larger use cases where a hash would > be more appropriate. For example users may want to use the 5-tuple > of the socket as the lookup key. > > To support this add hash support. > > Signed-off-by: John Fastabend api looks good, but I think it came a bit too late for this release. _nulls part you don't need for this hash. Few other nits: > +static void htab_elem_free_rcu(struct rcu_head *head) > +{ > + struct htab_elem *l = container_of(head, struct htab_elem, rcu); > + > + /* must increment bpf_prog_active to avoid kprobe+bpf triggering while > + * we're calling kfree, otherwise deadlock is possible if kprobes > + * are placed somewhere inside of slub > + */ > + preempt_disable(); > + __this_cpu_inc(bpf_prog_active); > + kfree(l); > + __this_cpu_dec(bpf_prog_active); > + preempt_enable(); I don't think it's necessary. > +static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) > +{ > + struct bpf_htab *htab; > + int i, err; > + u64 cost; > + > + if (!capable(CAP_NET_ADMIN)) > + return ERR_PTR(-EPERM); > + > + /* check sanity of attributes */ > + if (attr->max_entries == 0 || > + attr->map_flags & ~SOCK_CREATE_FLAG_MASK) > + return ERR_PTR(-EINVAL); > + > + if (attr->value_size > KMALLOC_MAX_SIZE) > + return ERR_PTR(-E2BIG); doesn't seem to match + u32 fd = *(u32 *)value; that is done later. > +static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, > + u32 hash, void *key, u32 key_size) > +{ > + struct hlist_nulls_node *n; > + struct htab_elem *l; > + > + hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) > + if (l->hash == hash && !memcmp(&l->key, key, key_size)) > + return l; if nulls is needed, there gotta be a comment explaining it. please add tests for all methods. > diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c > index f95fa67..2fa4cbb 100644 > --- a/tools/bpf/bpftool/map.c > +++ b/tools/bpf/bpftool/map.c > @@ -67,6 +67,7 @@ > [BPF_MAP_TYPE_DEVMAP] = "devmap", > [BPF_MAP_TYPE_SOCKMAP] = "sockmap", > [BPF_MAP_TYPE_CPUMAP] = "cpumap", > + [BPF_MAP_TYPE_SOCKHASH] = "sockhash", > }; > > static unsigned int get_possible_cpus(void) > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 9d07465..1a19450 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -115,6 +115,7 @@ enum bpf_map_type { > BPF_MAP_TYPE_DEVMAP, > BPF_MAP_TYPE_SOCKMAP, > BPF_MAP_TYPE_CPUMAP, > + BPF_MAP_TYPE_SOCKHASH, tools/* updates should be in separate commit.
RE: [PATCH v5 12/14] fm10k: Report PCIe link properties with pcie_print_link_status()
> -Original Message- > From: Bjorn Helgaas [mailto:helg...@kernel.org] > Sent: Friday, March 30, 2018 2:06 PM > To: Tal Gilboa > Cc: Tariq Toukan ; Keller, Jacob E > ; Ariel Elior ; Ganesh > Goudar ; Kirsher, Jeffrey T > ; everest-linux...@cavium.com; intel-wired- > l...@lists.osuosl.org; netdev@vger.kernel.org; linux-ker...@vger.kernel.org; > linux-...@vger.kernel.org > Subject: [PATCH v5 12/14] fm10k: Report PCIe link properties with > pcie_print_link_status() > > From: Bjorn Helgaas > > Use pcie_print_link_status() to report PCIe link speed and possible > limitations instead of implementing this in the driver itself. > > Note that pcie_get_minimum_link() can return misleading information because > it finds the slowest link and the narrowest link without considering the > total bandwidth of the link. If the path contains a 16 GT/s x1 link and a > 2.5 GT/s x16 link, pcie_get_minimum_link() returns 2.5 GT/s x1, which > corresponds to 250 MB/s of bandwidth, not the actual available bandwidth of > about 2000 MB/s for a 16 GT/s x1 link. This comment is about what's being fixed, so it would have been easier to parse if it were written to more clearly indicate that we're removing (and not adding) this behavior. Aside from the commit message (which I don't feel strongly enough needs a re-send of the patch) this looks good to me. Acked-by: Jacob Keller Thanks Bjorn and Tal for fixing this! > > Signed-off-by: Bjorn Helgaas > --- > drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 87 > -- > 1 file changed, 1 insertion(+), 86 deletions(-) > > diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c > b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c > index a434fecfdfeb..aa05fb534942 100644 > --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c > +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c > @@ -2120,91 +2120,6 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, > return 0; > } > > -static void fm10k_slot_warn(struct fm10k_intfc *interface) > -{ > - enum pcie_link_width width = PCIE_LNK_WIDTH_UNKNOWN; > - enum pci_bus_speed speed = PCI_SPEED_UNKNOWN; > - struct fm10k_hw *hw = &interface->hw; > - int max_gts = 0, expected_gts = 0; > - > - if (pcie_get_minimum_link(interface->pdev, &speed, &width) || > - speed == PCI_SPEED_UNKNOWN || width == > PCIE_LNK_WIDTH_UNKNOWN) { > - dev_warn(&interface->pdev->dev, > - "Unable to determine PCI Express bandwidth.\n"); > - return; > - } > - > - switch (speed) { > - case PCIE_SPEED_2_5GT: > - /* 8b/10b encoding reduces max throughput by 20% */ > - max_gts = 2 * width; > - break; > - case PCIE_SPEED_5_0GT: > - /* 8b/10b encoding reduces max throughput by 20% */ > - max_gts = 4 * width; > - break; > - case PCIE_SPEED_8_0GT: > - /* 128b/130b encoding has less than 2% impact on throughput */ > - max_gts = 8 * width; > - break; > - default: > - dev_warn(&interface->pdev->dev, > - "Unable to determine PCI Express bandwidth.\n"); > - return; > - } > - > - dev_info(&interface->pdev->dev, > - "PCI Express bandwidth of %dGT/s available\n", > - max_gts); > - dev_info(&interface->pdev->dev, > - "(Speed:%s, Width: x%d, Encoding Loss:%s, Payload:%s)\n", > - (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : > - speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : > - speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : > - "Unknown"), > - hw->bus.width, > - (speed == PCIE_SPEED_2_5GT ? "20%" : > - speed == PCIE_SPEED_5_0GT ? "20%" : > - speed == PCIE_SPEED_8_0GT ? "<2%" : > - "Unknown"), > - (hw->bus.payload == fm10k_bus_payload_128 ? "128B" : > - hw->bus.payload == fm10k_bus_payload_256 ? "256B" : > - hw->bus.payload == fm10k_bus_payload_512 ? "512B" : > - "Unknown")); > - > - switch (hw->bus_caps.speed) { > - case fm10k_bus_speed_2500: > - /* 8b/10b encoding reduces max throughput by 20% */ > - expected_gts = 2 * hw->bus_caps.width; > - break; > - case fm10k_bus_speed_5000: > - /* 8b/10b encoding reduces max throughput by 20% */ > - expected_gts = 4 * hw->bus_caps.width; > - break; > - case fm10k_bus_speed_8000: > - /* 128b/130b encoding has less than 2% impact on throughput */ > - expected_gts = 8 * hw->bus_caps.width; > - break; > - default: > - dev_warn(&interface->pdev->dev, > - "Unable to determine expected PCI Express > bandwidth.\n"); > - return; > - } > - > - if (max_gts >= expected_gts) > -
RE: [PATCH v5 03/14] PCI: Add pcie_bandwidth_capable() to compute max supported link bandwidth
> -Original Message- > From: Tal Gilboa [mailto:ta...@mellanox.com] > Sent: Monday, April 02, 2018 7:34 AM > To: Bjorn Helgaas > Cc: Tariq Toukan ; Keller, Jacob E > ; Ariel Elior ; Ganesh > Goudar ; Kirsher, Jeffrey T > ; everest-linux...@cavium.com; intel-wired- > l...@lists.osuosl.org; netdev@vger.kernel.org; linux-ker...@vger.kernel.org; > linux-...@vger.kernel.org > Subject: Re: [PATCH v5 03/14] PCI: Add pcie_bandwidth_capable() to compute > max supported link bandwidth > > On 4/2/2018 5:05 PM, Bjorn Helgaas wrote: > > On Mon, Apr 02, 2018 at 10:34:58AM +0300, Tal Gilboa wrote: > >> On 4/2/2018 3:40 AM, Bjorn Helgaas wrote: > >>> On Sun, Apr 01, 2018 at 11:38:53PM +0300, Tal Gilboa wrote: > On 3/31/2018 12:05 AM, Bjorn Helgaas wrote: > > From: Tal Gilboa > > > > Add pcie_bandwidth_capable() to compute the max link bandwidth > supported by > > a device, based on the max link speed and width, adjusted by the > encoding > > overhead. > > > > The maximum bandwidth of the link is computed as: > > > > max_link_speed * max_link_width * (1 - encoding_overhead) > > > > The encoding overhead is about 20% for 2.5 and 5.0 GT/s links using > 8b/10b > > encoding, and about 1.5% for 8 GT/s or higher speed links using > > 128b/130b > > encoding. > > > > Signed-off-by: Tal Gilboa > > [bhelgaas: adjust for pcie_get_speed_cap() and pcie_get_width_cap() > > signatures, don't export outside drivers/pci] > > Signed-off-by: Bjorn Helgaas > > Reviewed-by: Tariq Toukan > > --- > > drivers/pci/pci.c | 21 + > > drivers/pci/pci.h |9 + > > 2 files changed, 30 insertions(+) > > > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > > index 43075be79388..9ce89e254197 100644 > > --- a/drivers/pci/pci.c > > +++ b/drivers/pci/pci.c > > @@ -5208,6 +5208,27 @@ enum pcie_link_width > pcie_get_width_cap(struct pci_dev *dev) > > return PCIE_LNK_WIDTH_UNKNOWN; > > } > > +/** > > + * pcie_bandwidth_capable - calculates a PCI device's link bandwidth > capability > > + * @dev: PCI device > > + * @speed: storage for link speed > > + * @width: storage for link width > > + * > > + * Calculate a PCI device's link bandwidth by querying for its link > > speed > > + * and width, multiplying them, and applying encoding overhead. > > + */ > > +u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed > *speed, > > + enum pcie_link_width *width) > > +{ > > + *speed = pcie_get_speed_cap(dev); > > + *width = pcie_get_width_cap(dev); > > + > > + if (*speed == PCI_SPEED_UNKNOWN || *width == > PCIE_LNK_WIDTH_UNKNOWN) > > + return 0; > > + > > + return *width * PCIE_SPEED2MBS_ENC(*speed); > > +} > > + > > /** > > * pci_select_bars - Make BAR mask from the type of resource > > * @dev: the PCI device for which BAR mask is made > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h > > index 66738f1050c0..2a50172b9803 100644 > > --- a/drivers/pci/pci.h > > +++ b/drivers/pci/pci.h > > @@ -261,8 +261,17 @@ void pci_disable_bridge_window(struct pci_dev > *dev); > > (speed) == PCIE_SPEED_2_5GT ? "2.5 GT/s" : \ > > "Unknown speed") > > +/* PCIe speed to Mb/s with encoding overhead: 20% for gen2, ~1.5% for > gen3 */ > > +#define PCIE_SPEED2MBS_ENC(speed) \ > > Missing gen4. > >>> > >>> I made it "gen3+". I think that's accurate, isn't it? The spec > >>> doesn't seem to actually use "gen3" as a specific term, but sec 4.2.2 > >>> says rates of 8 GT/s or higher (which I think includes gen3 and gen4) > >>> use 128b/130b encoding. > >>> > >> > >> I meant that PCIE_SPEED_16_0GT will return 0 from this macro since it > >> wasn't > >> added. Need to return 15754. > > > > Oh, duh, of course! Sorry for being dense. What about the following? > > I included the calculation as opposed to just the magic numbers to try > > to make it clear how they're derived. This has the disadvantage of > > truncating the result instead of rounding, but I doubt that's > > significant in this context. If it is, we could use the magic numbers > > and put the computation in a comment. > > We can always use DIV_ROUND_UP((speed * enc_nominator), > enc_denominator). I think this is confusing and since this introduces a > bandwidth limit I would prefer to give a wider limit than a wrong one, > even it is by less than 1Mb/s. My vote is for leaving it as you wrote below. > > > > > Another question: we currently deal in Mb/s, not MB/s. Mb/s has the > > advantage of sort of corresponding to the GT/s numbers, but using MB/s > > would have the advantage of smaller numbers that match the table here: > > https://en.wikipedia.org/wiki/PCI_Express#H
Re: [PATCH 0/4] RFC: Realtek 83xx SMI driver core
Hi Linus, did you make any progress with this? I noticed that the Vodafone Easybox 904xdsl/904lte models both make use of the RTL8367 switch. About one million of these routers have been deployed in Germany. There is an OpenWrt fork at https://github.com/Quallenauge/Easybox-904-XDSL/commits/master-lede which depends on the out-of-tree patches which seem to be the basis for your Realtek 83xx driver patches. Having your Realtek 83xx patches in the upstream Linux kernel would help tremendously in getting support for those router models merged in OpenWrt. Regards, Carl-Daniel
[net-next 1/2] net: netcp: add api to support set rx mode in netcp modules
From: WingMan Kwok This patch adds an API to support setting rx mode in netcp modules. If a netcp module needs to be notified when upper layer transitions from one rx mode to another and react accordingly, such a module will implement the new API set_rx_mode added in this patch. Currently rx modes supported are PROMISCUOUS and NON_PROMISCUOUS modes. Signed-off-by: WingMan Kwok Signed-off-by: Murali Karicheri --- drivers/net/ethernet/ti/netcp.h | 1 + drivers/net/ethernet/ti/netcp_core.c | 19 +++ 2 files changed, 20 insertions(+) diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h index 416f732..c4ffdf4 100644 --- a/drivers/net/ethernet/ti/netcp.h +++ b/drivers/net/ethernet/ti/netcp.h @@ -214,6 +214,7 @@ struct netcp_module { int (*add_vid)(void *intf_priv, int vid); int (*del_vid)(void *intf_priv, int vid); int (*ioctl)(void *intf_priv, struct ifreq *req, int cmd); + int (*set_rx_mode)(void *intf_priv, bool promisc); /* used internally */ struct list_headmodule_list; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 736f6f7..e40aa3e 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1509,6 +1509,24 @@ static void netcp_addr_sweep_add(struct netcp_intf *netcp) } } +static int netcp_set_promiscuous(struct netcp_intf *netcp, bool promisc) +{ + struct netcp_intf_modpriv *priv; + struct netcp_module *module; + int error; + + for_each_module(netcp, priv) { + module = priv->netcp_module; + if (!module->set_rx_mode) + continue; + + error = module->set_rx_mode(priv->module_priv, promisc); + if (error) + return error; + } + return 0; +} + static void netcp_set_rx_mode(struct net_device *ndev) { struct netcp_intf *netcp = netdev_priv(ndev); @@ -1538,6 +1556,7 @@ static void netcp_set_rx_mode(struct net_device *ndev) /* finally sweep and callout into modules */ netcp_addr_sweep_del(netcp); netcp_addr_sweep_add(netcp); + netcp_set_promiscuous(netcp, promisc); spin_unlock(&netcp->lock); } -- 1.9.1
[net-next 0/2] Add promiscous mode support in k2g network driver
This patch adds support for promiscuous mode in network driver for K2G SoC. This depends on v3 of my series at https://www.spinics.net/lists/kernel/msg2765942.html I plan to fold this to the above series and submit again when the net-next merge windows opens. At this time, please review and let me know if it looks good or need any re-work. I would like to get this ready so that it can be merged along with the above series. The boot and promiscuous mode test logs are at https://pastebin.ubuntu.com/p/XQCvFS3QZb/ WingMan Kwok (2): net: netcp: add api to support set rx mode in netcp modules net: netcp: ethss: k2g: add promiscuous mode support drivers/net/ethernet/ti/netcp.h | 1 + drivers/net/ethernet/ti/netcp_core.c | 19 drivers/net/ethernet/ti/netcp_ethss.c | 56 +++ 3 files changed, 76 insertions(+) -- 1.9.1
[net-next 2/2] net: netcp: ethss: k2g: add promiscuous mode support
From: WingMan Kwok This patch adds support for promiscuous mode in k2g's network driver. When upper layer instructs to transition from non-promiscuous mode to promiscuous mode or vice versa K2G network driver needs to configure ALE accordingly so that in case of non-promiscuous mode, ALE will not flood all unicast packets to host port, while in promiscuous mode, it will pass all received unicast packets to host port. Signed-off-by: WingMan Kwok Signed-off-by: Murali Karicheri --- drivers/net/ethernet/ti/netcp_ethss.c | 56 +++ 1 file changed, 56 insertions(+) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index f7af999..1ac2cd6 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -2771,6 +2771,61 @@ static inline int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *req) } #endif /* CONFIG_TI_CPTS */ +static int gbe_set_rx_mode(void *intf_priv, bool promisc) +{ + struct gbe_intf *gbe_intf = intf_priv; + struct gbe_priv *gbe_dev = gbe_intf->gbe_dev; + struct cpsw_ale *ale = gbe_dev->ale; + unsigned long timeout; + int i, ret = -ETIMEDOUT; + + /* Disable(1)/Enable(0) Learn for all ports (host is port 0 and +* slaves are port 1 and up +*/ + for (i = 0; i <= gbe_dev->num_slaves; i++) { + cpsw_ale_control_set(ale, i, +ALE_PORT_NOLEARN, !!promisc); + cpsw_ale_control_set(ale, i, +ALE_PORT_NO_SA_UPDATE, !!promisc); + } + + if (!promisc) { + /* Don't Flood All Unicast Packets to Host port */ + cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 0); + dev_vdbg(gbe_dev->dev, "promiscuous mode disabled\n"); + return 0; + } + + timeout = jiffies + HZ; + + /* Clear All Untouched entries */ + cpsw_ale_control_set(ale, 0, ALE_AGEOUT, 1); + do { + cpu_relax(); + if (cpsw_ale_control_get(ale, 0, ALE_AGEOUT)) { + ret = 0; + break; + } + + } while (time_after(timeout, jiffies)); + + /* Make sure it is not a false timeout */ + if (ret && !cpsw_ale_control_get(ale, 0, ALE_AGEOUT)) + return ret; + + cpsw_ale_control_set(ale, 0, ALE_AGEOUT, 1); + + /* Clear all mcast from ALE */ + cpsw_ale_flush_multicast(ale, +GBE_PORT_MASK(gbe_dev->ale_ports), +-1); + + /* Flood All Unicast Packets to Host port */ + cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 1); + dev_vdbg(gbe_dev->dev, "promiscuous mode enabled\n"); + return ret; +} + static int gbe_ioctl(void *intf_priv, struct ifreq *req, int cmd) { struct gbe_intf *gbe_intf = intf_priv; @@ -3523,6 +3578,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, gbe_dev->max_num_slaves = 8; } else if (of_device_is_compatible(node, "ti,netcp-gbe-2")) { gbe_dev->max_num_slaves = 1; + gbe_module.set_rx_mode = gbe_set_rx_mode; } else if (of_device_is_compatible(node, "ti,netcp-xgbe")) { gbe_dev->max_num_slaves = 2; } else { -- 1.9.1
RE: [PATCH v5 05/14] PCI: Add pcie_print_link_status() to log link speed and whether it's limited
> -Original Message- > From: Bjorn Helgaas [mailto:helg...@kernel.org] > Sent: Friday, March 30, 2018 2:05 PM > To: Tal Gilboa > Cc: Tariq Toukan ; Keller, Jacob E > ; Ariel Elior ; Ganesh > Goudar ; Kirsher, Jeffrey T > ; everest-linux...@cavium.com; intel-wired- > l...@lists.osuosl.org; netdev@vger.kernel.org; linux-ker...@vger.kernel.org; > linux-...@vger.kernel.org > Subject: [PATCH v5 05/14] PCI: Add pcie_print_link_status() to log link speed > and > whether it's limited > > From: Tal Gilboa > > Add pcie_print_link_status(). This logs the current settings of the link > (speed, width, and total available bandwidth). > > If the device is capable of more bandwidth but is limited by a slower > upstream link, we include information about the link that limits the > device's performance. > > The user may be able to move the device to a different slot for better > performance. > > This provides a unified method for all PCI devices to report status and > issues, instead of each device reporting in a different way, using > different code. > > Signed-off-by: Tal Gilboa > [bhelgaas: changelog, reword log messages, print device capabilities when > not limited] > Signed-off-by: Bjorn Helgaas > --- > drivers/pci/pci.c | 29 + > include/linux/pci.h |1 + > 2 files changed, 30 insertions(+) > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > index e00d56b12747..cec7aed09f6b 100644 > --- a/drivers/pci/pci.c > +++ b/drivers/pci/pci.c > @@ -5283,6 +5283,35 @@ u32 pcie_bandwidth_capable(struct pci_dev *dev, > enum pci_bus_speed *speed, > return *width * PCIE_SPEED2MBS_ENC(*speed); > } > > +/** > + * pcie_print_link_status - Report the PCI device's link speed and width > + * @dev: PCI device to query > + * > + * Report the available bandwidth at the device. If this is less than the > + * device is capable of, report the device's maximum possible bandwidth and > + * the upstream link that limits its performance to less than that. > + */ > +void pcie_print_link_status(struct pci_dev *dev) > +{ > + enum pcie_link_width width, width_cap; > + enum pci_bus_speed speed, speed_cap; > + struct pci_dev *limiting_dev = NULL; > + u32 bw_avail, bw_cap; > + > + bw_cap = pcie_bandwidth_capable(dev, &speed_cap, &width_cap); > + bw_avail = pcie_bandwidth_available(dev, &limiting_dev, &speed, > &width); > + > + if (bw_avail >= bw_cap) > + pci_info(dev, "%d Mb/s available bandwidth (%s x%d link)\n", > + bw_cap, PCIE_SPEED2STR(speed_cap), width_cap); > + else > + pci_info(dev, "%d Mb/s available bandwidth, limited by %s x%d > link at %s (capable of %d Mb/s with %s x%d link)\n", > + bw_avail, PCIE_SPEED2STR(speed), width, > + limiting_dev ? pci_name(limiting_dev) : "", > + bw_cap, PCIE_SPEED2STR(speed_cap), width_cap); > +} Personally, I would make thic last one a pci_warn() to indicate it at a higher log level, but I'm ok with the wording, and if consensus is that this should be at info, I'm ok with that. Thanks, Jake > +EXPORT_SYMBOL(pcie_print_link_status); > + > /** > * pci_select_bars - Make BAR mask from the type of resource > * @dev: the PCI device for which BAR mask is made > diff --git a/include/linux/pci.h b/include/linux/pci.h > index f2bf2b7a66c7..38f7957121ef 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -1086,6 +1086,7 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum > pci_bus_speed *speed, > u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev > **limiting_dev, >enum pci_bus_speed *speed, >enum pcie_link_width *width); > +void pcie_print_link_status(struct pci_dev *dev); > void pcie_flr(struct pci_dev *dev); > int __pci_reset_function_locked(struct pci_dev *dev); > int pci_reset_function(struct pci_dev *dev);
Re: [PATCH 12/15] dmaengine: pxa: make the filter function internal
Hi Robert, I love your patch! Yet something to improve: [auto build test ERROR on linus/master] [also build test ERROR on v4.16] [cannot apply to arm-soc/for-next next-20180329] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Robert-Jarzmik/ARM-pxa-switch-to-DMA-slave-maps/20180402-233029 config: i386-allmodconfig (attached as .config) compiler: gcc-7 (Debian 7.3.0-1) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=i386 All errors (new ones prefixed by >>): In file included from drivers/mtd/nand/marvell_nand.c:21:0: drivers/mtd/nand/marvell_nand.c: In function 'marvell_nfc_init_dma': >> drivers/mtd/nand/marvell_nand.c:2621:42: error: 'pxad_filter_fn' undeclared >> (first use in this function); did you mean 'dma_filter_fn'? dma_request_slave_channel_compat(mask, pxad_filter_fn, ^ include/linux/dmaengine.h:1408:46: note: in definition of macro 'dma_request_slave_channel_compat' __dma_request_slave_channel_compat(&(mask), x, y, dev, name) ^ drivers/mtd/nand/marvell_nand.c:2621:42: note: each undeclared identifier is reported only once for each function it appears in dma_request_slave_channel_compat(mask, pxad_filter_fn, ^ include/linux/dmaengine.h:1408:46: note: in definition of macro 'dma_request_slave_channel_compat' __dma_request_slave_channel_compat(&(mask), x, y, dev, name) ^ vim +2621 drivers/mtd/nand/marvell_nand.c 02f26ecf Miquel Raynal 2018-01-09 2588 02f26ecf Miquel Raynal 2018-01-09 2589 static int marvell_nfc_init_dma(struct marvell_nfc *nfc) 02f26ecf Miquel Raynal 2018-01-09 2590 { 02f26ecf Miquel Raynal 2018-01-09 2591 struct platform_device *pdev = container_of(nfc->dev, 02f26ecf Miquel Raynal 2018-01-09 2592 struct platform_device, 02f26ecf Miquel Raynal 2018-01-09 2593 dev); 02f26ecf Miquel Raynal 2018-01-09 2594 struct dma_slave_config config = {}; 02f26ecf Miquel Raynal 2018-01-09 2595 struct resource *r; 02f26ecf Miquel Raynal 2018-01-09 2596 dma_cap_mask_t mask; 02f26ecf Miquel Raynal 2018-01-09 2597 struct pxad_param param; 02f26ecf Miquel Raynal 2018-01-09 2598 int ret; 02f26ecf Miquel Raynal 2018-01-09 2599 02f26ecf Miquel Raynal 2018-01-09 2600 if (!IS_ENABLED(CONFIG_PXA_DMA)) { 02f26ecf Miquel Raynal 2018-01-09 2601 dev_warn(nfc->dev, 02f26ecf Miquel Raynal 2018-01-09 2602 "DMA not enabled in configuration\n"); 02f26ecf Miquel Raynal 2018-01-09 2603 return -ENOTSUPP; 02f26ecf Miquel Raynal 2018-01-09 2604 } 02f26ecf Miquel Raynal 2018-01-09 2605 02f26ecf Miquel Raynal 2018-01-09 2606 ret = dma_set_mask_and_coherent(nfc->dev, DMA_BIT_MASK(32)); 02f26ecf Miquel Raynal 2018-01-09 2607 if (ret) 02f26ecf Miquel Raynal 2018-01-09 2608 return ret; 02f26ecf Miquel Raynal 2018-01-09 2609 02f26ecf Miquel Raynal 2018-01-09 2610 r = platform_get_resource(pdev, IORESOURCE_DMA, 0); 02f26ecf Miquel Raynal 2018-01-09 2611 if (!r) { 02f26ecf Miquel Raynal 2018-01-09 2612 dev_err(nfc->dev, "No resource defined for data DMA\n"); 02f26ecf Miquel Raynal 2018-01-09 2613 return -ENXIO; 02f26ecf Miquel Raynal 2018-01-09 2614 } 02f26ecf Miquel Raynal 2018-01-09 2615 02f26ecf Miquel Raynal 2018-01-09 2616 param.drcmr = r->start; 02f26ecf Miquel Raynal 2018-01-09 2617 param.prio = PXAD_PRIO_LOWEST; 02f26ecf Miquel Raynal 2018-01-09 2618 dma_cap_zero(mask); 02f26ecf Miquel Raynal 2018-01-09 2619 dma_cap_set(DMA_SLAVE, mask); 02f26ecf Miquel Raynal 2018-01-09 2620 nfc->dma_chan = 02f26ecf Miquel Raynal 2018-01-09 @2621 dma_request_slave_channel_compat(mask, pxad_filter_fn, 02f26ecf Miquel Raynal 2018-01-09 2622 ¶m, nfc->dev, 02f26ecf Miquel Raynal 2018-01-09 2623 "data"); 02f26ecf Miquel Raynal 2018-01-09 2624 if (!nfc->dma_chan) { 02f26ecf Miquel Raynal 2018-01-09 2625 dev_err(nfc->dev, 02f26ecf Miquel Raynal 2018-01-09 2626 "Unable to request data DMA channel\n"); 02f26ecf Miquel Raynal 2018-01-09 2627 return -ENODEV; 02f26ecf Miquel Raynal 2018-01-09 2628 } 02f26ecf Miquel Raynal 2018-01-09 2629
Re: [net-next 0/2] Add promiscous mode support in k2g network driver
From: Murali Karicheri Date: Mon, 2 Apr 2018 12:17:17 -0400 > This patch adds support for promiscuous mode in network driver for K2G > SoC. This depends on v3 of my series at > https://www.spinics.net/lists/kernel/msg2765942.html The net-next tree is closed, please resubmit this series after the merge window when the net-next tree is openned back up. Thank you.
[GIT] Networking
1) Fix RCU locking in xfrm_local_error(), from Taehee Yoo. 2) Fix return value assignments and thus error checking in iwl_mvm_start_ap_ibss(), from Johannes Berg. 3) Don't count header length twice in vti4, from Stefano Brivio. 4) Fix deadlock in rt6_age_examine_exception, from Eric Dumazet. 5) Fix out-of-bounds access in nf_sk_lookup_slow{v4,v6}() from Subash Abhinov. 6) Check nladdr size in netlink_connect(), from Alexander Potapenko. 7) VF representor SQ numbers are 32 not 16 bits, in mlx5 driver, from Or Gerlitz. 8) Out of bounds read in skb_network_protocol(), from Eric Dumazet. 9) r8169 driver sets driver data pointer after register_netdev() which is too late. Fix from Heiner Kallweit. 10) Fix memory leak in mlx4 driver, from Moshe Shemesh. 11) The multi-VLAN decap fix added a regression when dealing with device that lack a MAC header, such as tun. Fix from Toshiaki Makita. 12) Fix integer overflow in dynamic interrupt coalescing code. From Tal Gilboa. 13) Use after free in vrf code, from David Ahern. 14) IPV6 route leak between VRFs fix, also from David Ahern. Please pull, thanks a lot! The following changes since commit f36b7534b83357cf52e747905de6d65b4f7c2512: Merge branch 'akpm' (patches from Andrew) (2018-03-22 18:48:43 -0700) are available in the Git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git for you to fetch changes up to e81b5e01c14add8395dfba7130f8829206bb507d: net: mvneta: fix enable of all initialized RXQs (2018-03-30 14:27:47 -0400) Alexander Potapenko (1): netlink: make sure nladdr has correct size in netlink_connect() Andrei Otcheretianski (2): iwlwifi: mvm: Increase session protection time after CS iwlwifi: mvm: Move unused phy's to a default channel Avraham Stern (3): iwlwifi: mvm: clear tx queue id when unreserving aggregation queue iwlwifi: mvm: make sure internal station has a valid id iwlwifi: mvm: fix array out of bounds reference Beni Lev (1): iwlwifi: mvm: Correctly set IGTK for AP Colin Ian King (1): atm: iphase: fix spelling mistake: "Receiverd" -> "Received" Cong Wang (1): llc: properly handle dev_queue_xmit() return value Craig Dillabaugh (1): net sched actions: fix dumping which requires several messages to user space Dave Watson (1): strparser: Fix sign of err codes David Ahern (2): vrf: Fix use after free and double free in vrf_finish_output net/ipv6: Fix route leaking between VRFs David Lebrun (1): ipv6: sr: fix seg6 encap performances with TSO enabled David S. Miller (11): Merge branch 'mlxsw-GRE-mtu-changes' Merge git://git.kernel.org/.../pablo/nf Merge tag 'wireless-drivers-for-davem-2018-03-24' of git://git.kernel.org/.../kvalo/wireless-drivers Merge branch 'bond-hwaddr-sync-fixes' Merge tag 'batadv-net-for-davem-20180326' of git://git.open-mesh.org/linux-merge Merge tag 'mlx5-fixes-2018-03-23' of git://git.kernel.org/.../saeed/linux Merge branch 'mlx4-misc-fixes-for-4.16' Merge branch 'master' of git://git.kernel.org/.../klassert/ipsec ip_tunnel: Resolve ipsec merge conflict properly. Merge git://git.kernel.org/.../bpf/bpf Merge branch 'vlan-fix' Emmanuel Grumbach (1): iwlwifi: mvm: set the correct tid when we flush the MCAST sta Eran Ben Elisha (1): net/mlx4_en: Fix mixed PFC and Global pause user control requests Eric Dumazet (2): ipv6: fix possible deadlock in rt6_age_examine_exception() net: fix possible out-of-bound read in skb_network_protocol() Florian Westphal (3): netfilter: nf_tables: meter: pick a set backend that supports updates netfilter: nf_tables: permit second nat hook if colliding hook is going away netfilter: nf_tables: add missing netlink attrs to policies Giuseppe Lippolis (1): net-usb: add qmi_wwan if on lte modem wistron neweb d18q1 Hans Wippel (1): net/ipv4: disable SMC TCP option with SYN Cookies Heiner Kallweit (1): r8169: fix setting driver_data after register_netdev Jakub Kicinski (2): tools: bpftool: don't use hex numbers in JSON output nfp: bpf: fix check of program max insn count Jason Wang (3): vhost_net: add missing lock nesting notation vhost: correctly remove wait queue during poll failure vhost: validate log when IOTLB is enabled Jianbo Liu (2): net/mlx5e: Don't override vport admin link state in switchdev mode net/mlx5e: Fix memory usage issues in offloading TC flows Johannes Berg (1): iwlwifi: mvm: fix error checking for multi/broadcast sta John Fastabend (1): net: sched, fix OOO packets with pfifo_fast Kalle Valo (2): Merge tag 'iwlwifi-for-kalle-2018-03-16' of git://git.kernel.org/.../iwlwifi/iwlwifi-fixes Merge tag 'iwlwifi-for-kalle-2018-03-19' of git://git.kernel.org/...
Re: [PATCH 12/15] dmaengine: pxa: make the filter function internal
Hi Robert, I love your patch! Perhaps something to improve: [auto build test WARNING on linus/master] [also build test WARNING on v4.16] [cannot apply to arm-soc/for-next next-20180329] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Robert-Jarzmik/ARM-pxa-switch-to-DMA-slave-maps/20180402-233029 reproduce: # apt-get install sparse make ARCH=x86_64 allmodconfig make C=1 CF=-D__CHECK_ENDIAN__ sparse warnings: (new ones prefixed by >>) drivers/mtd/nand/marvell_nand.c:2621:17: sparse: undefined identifier 'pxad_filter_fn' >> drivers/mtd/nand/marvell_nand.c:2621:17: sparse: call with no type! In file included from drivers/mtd/nand/marvell_nand.c:21:0: drivers/mtd/nand/marvell_nand.c: In function 'marvell_nfc_init_dma': drivers/mtd/nand/marvell_nand.c:2621:42: error: 'pxad_filter_fn' undeclared (first use in this function); did you mean 'dma_filter_fn'? dma_request_slave_channel_compat(mask, pxad_filter_fn, ^ include/linux/dmaengine.h:1408:46: note: in definition of macro 'dma_request_slave_channel_compat' __dma_request_slave_channel_compat(&(mask), x, y, dev, name) ^ drivers/mtd/nand/marvell_nand.c:2621:42: note: each undeclared identifier is reported only once for each function it appears in dma_request_slave_channel_compat(mask, pxad_filter_fn, ^ include/linux/dmaengine.h:1408:46: note: in definition of macro 'dma_request_slave_channel_compat' __dma_request_slave_channel_compat(&(mask), x, y, dev, name) ^ vim +2621 drivers/mtd/nand/marvell_nand.c 02f26ecf Miquel Raynal 2018-01-09 2588 02f26ecf Miquel Raynal 2018-01-09 2589 static int marvell_nfc_init_dma(struct marvell_nfc *nfc) 02f26ecf Miquel Raynal 2018-01-09 2590 { 02f26ecf Miquel Raynal 2018-01-09 2591 struct platform_device *pdev = container_of(nfc->dev, 02f26ecf Miquel Raynal 2018-01-09 2592 struct platform_device, 02f26ecf Miquel Raynal 2018-01-09 2593 dev); 02f26ecf Miquel Raynal 2018-01-09 2594 struct dma_slave_config config = {}; 02f26ecf Miquel Raynal 2018-01-09 2595 struct resource *r; 02f26ecf Miquel Raynal 2018-01-09 2596 dma_cap_mask_t mask; 02f26ecf Miquel Raynal 2018-01-09 2597 struct pxad_param param; 02f26ecf Miquel Raynal 2018-01-09 2598 int ret; 02f26ecf Miquel Raynal 2018-01-09 2599 02f26ecf Miquel Raynal 2018-01-09 2600 if (!IS_ENABLED(CONFIG_PXA_DMA)) { 02f26ecf Miquel Raynal 2018-01-09 2601 dev_warn(nfc->dev, 02f26ecf Miquel Raynal 2018-01-09 2602 "DMA not enabled in configuration\n"); 02f26ecf Miquel Raynal 2018-01-09 2603 return -ENOTSUPP; 02f26ecf Miquel Raynal 2018-01-09 2604 } 02f26ecf Miquel Raynal 2018-01-09 2605 02f26ecf Miquel Raynal 2018-01-09 2606 ret = dma_set_mask_and_coherent(nfc->dev, DMA_BIT_MASK(32)); 02f26ecf Miquel Raynal 2018-01-09 2607 if (ret) 02f26ecf Miquel Raynal 2018-01-09 2608 return ret; 02f26ecf Miquel Raynal 2018-01-09 2609 02f26ecf Miquel Raynal 2018-01-09 2610 r = platform_get_resource(pdev, IORESOURCE_DMA, 0); 02f26ecf Miquel Raynal 2018-01-09 2611 if (!r) { 02f26ecf Miquel Raynal 2018-01-09 2612 dev_err(nfc->dev, "No resource defined for data DMA\n"); 02f26ecf Miquel Raynal 2018-01-09 2613 return -ENXIO; 02f26ecf Miquel Raynal 2018-01-09 2614 } 02f26ecf Miquel Raynal 2018-01-09 2615 02f26ecf Miquel Raynal 2018-01-09 2616 param.drcmr = r->start; 02f26ecf Miquel Raynal 2018-01-09 2617 param.prio = PXAD_PRIO_LOWEST; 02f26ecf Miquel Raynal 2018-01-09 2618 dma_cap_zero(mask); 02f26ecf Miquel Raynal 2018-01-09 2619 dma_cap_set(DMA_SLAVE, mask); 02f26ecf Miquel Raynal 2018-01-09 2620 nfc->dma_chan = 02f26ecf Miquel Raynal 2018-01-09 @2621 dma_request_slave_channel_compat(mask, pxad_filter_fn, 02f26ecf Miquel Raynal 2018-01-09 2622 ¶m, nfc->dev, 02f26ecf Miquel Raynal 2018-01-09 2623 "data"); 02f26ecf Miquel Raynal 2018-01-09 2624 if (!nfc->dma_chan) { 02f26ecf Miquel Raynal 2018-01-09 2625 dev_err(nfc->dev, 02f26ecf Miquel Raynal 2018-01-09 2626 "Unable to request data DMA channel\n"); 02f26ecf Miquel Raynal 2018-01-09 2627
Re: [net-next 2/2] net: netcp: ethss: k2g: add promiscuous mode support
On Mon, Apr 02, 2018 at 12:17:19PM -0400, Murali Karicheri wrote: > +static int gbe_set_rx_mode(void *intf_priv, bool promisc) > +{ > + struct gbe_intf *gbe_intf = intf_priv; > + struct gbe_priv *gbe_dev = gbe_intf->gbe_dev; > + struct cpsw_ale *ale = gbe_dev->ale; > + unsigned long timeout; > + int i, ret = -ETIMEDOUT; > + > + /* Disable(1)/Enable(0) Learn for all ports (host is port 0 and > + * slaves are port 1 and up > + */ > + for (i = 0; i <= gbe_dev->num_slaves; i++) { > + cpsw_ale_control_set(ale, i, > + ALE_PORT_NOLEARN, !!promisc); > + cpsw_ale_control_set(ale, i, > + ALE_PORT_NO_SA_UPDATE, !!promisc); > + } Hi Murali Does this mean that in promisc mode, switching of frames between ports in hardware is disabled? You are relying on the software bridge to perform such bridging between ports? You might want to look at skb->offload_fwd_mark. By setting this, you can tell the software bridge the hardware has already bridged the frame. You might then be able to have promisc enabled, and the hardware still doing the forwarding. Andrew