Re: [bpf-next RFC 1/3] flow_dissector: implements flow dissector BPF hook

2018-08-19 Thread Song Liu
On Thu, Aug 16, 2018 at 4:14 PM, Petar Penkov  wrote:
> On Thu, Aug 16, 2018 at 3:40 PM, Song Liu  wrote:
>>
>> On Thu, Aug 16, 2018 at 9:44 AM, Petar Penkov  
>> wrote:
>> > From: Petar Penkov 
>> >
>> > Adds a hook for programs of type BPF_PROG_TYPE_FLOW_DISSECTOR and
>> > attach type BPF_FLOW_DISSECTOR that is executed in the flow dissector
>> > path. The BPF program is kept as a global variable so it is
>> > accessible to all flow dissectors.
>> >
>> > Signed-off-by: Petar Penkov 
>> > Signed-off-by: Willem de Bruijn 
>> > ---
>> >  include/linux/bpf_types.h |   1 +
>> >  include/linux/skbuff.h|   7 +
>> >  include/net/flow_dissector.h  |  16 +++
>> >  include/uapi/linux/bpf.h  |  14 +-
>> >  kernel/bpf/syscall.c  |   8 ++
>> >  kernel/bpf/verifier.c |   2 +
>> >  net/core/filter.c | 157 ++
>> >  net/core/flow_dissector.c |  76 +++
>> >  tools/bpf/bpftool/prog.c  |   1 +
>> >  tools/include/uapi/linux/bpf.h|   5 +-
>> >  tools/lib/bpf/libbpf.c|   2 +
>> >  tools/testing/selftests/bpf/bpf_helpers.h |   3 +
>> >  12 files changed, 290 insertions(+), 2 deletions(-)
>> >
>> > diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
>> > index cd26c090e7c0..22083712dd18 100644
>> > --- a/include/linux/bpf_types.h
>> > +++ b/include/linux/bpf_types.h
>> > @@ -32,6 +32,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
>> >  #ifdef CONFIG_INET
>> >  BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
>> >  #endif
>> > +BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
>> >
>> >  BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
>> >  BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
>> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> > index 17a13e4785fc..ce0e863f02a2 100644
>> > --- a/include/linux/skbuff.h
>> > +++ b/include/linux/skbuff.h
>> > @@ -243,6 +243,8 @@ struct scatterlist;
>> >  struct pipe_inode_info;
>> >  struct iov_iter;
>> >  struct napi_struct;
>> > +struct bpf_prog;
>> > +union bpf_attr;
>> >
>> >  #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
>> >  struct nf_conntrack {
>> > @@ -1192,6 +1194,11 @@ void skb_flow_dissector_init(struct flow_dissector 
>> > *flow_dissector,
>> >  const struct flow_dissector_key *key,
>> >  unsigned int key_count);
>> >
>> > +int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
>> > +  struct bpf_prog *prog);
>> > +
>> > +int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
>> > +
>> >  bool __skb_flow_dissect(const struct sk_buff *skb,
>> > struct flow_dissector *flow_dissector,
>> > void *target_container,
>> > diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
>> > index 6a4586dcdede..edb919d320c1 100644
>> > --- a/include/net/flow_dissector.h
>> > +++ b/include/net/flow_dissector.h
>> > @@ -270,6 +270,22 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow);
>> >  extern struct flow_dissector flow_keys_dissector;
>> >  extern struct flow_dissector flow_keys_basic_dissector;
>> >
>> > +/* struct bpf_flow_dissect_cb:
>> > + *
>> > + * This struct is used to pass parameters to BPF programs of type
>> > + * BPF_PROG_TYPE_FLOW_DISSECTOR. Before such a program is run, the caller 
>> > sets
>> > + * the control block of the skb to be a struct of this type. The first 
>> > field is
>> > + * used to communicate the next header offset between the BPF programs 
>> > and the
>> > + * first value of it is passed from the kernel. The last two fields are 
>> > used for
>> > + * writing out flow keys.
>> > + */
>> > +struct bpf_flow_dissect_cb {
>> > +   u16 nhoff;
>> > +   u16 unused;
>> > +   void *target_container;
>> > +   struct flow_dissector *flow_dissector;
>> > +};
>> > +
>> >  /* struct flow_keys_digest:
>> >   *
>> >   * This structure is used to hold a digest of the full flow keys. This is 
>> > a
>> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> > index 66917a4eba27..8bc0fdab685d 100644
>> > --- a/include/uapi/linux/bpf.h
>> > +++ b/include/uapi/linux/bpf.h
>> > @@ -152,6 +152,7 @@ enum bpf_prog_type {
>> > BPF_PROG_TYPE_LWT_SEG6LOCAL,
>> > BPF_PROG_TYPE_LIRC_MODE2,
>> > BPF_PROG_TYPE_SK_REUSEPORT,
>> > +   BPF_PROG_TYPE_FLOW_DISSECTOR,
>> >  };
>> >
>> >  enum bpf_attach_type {
>> > @@ -172,6 +173,7 @@ enum bpf_attach_type {
>> > BPF_CGROUP_UDP4_SENDMSG,
>> > BPF_CGROUP_UDP6_SENDMSG,
>> > BPF_LIRC_MODE2,
>> > +   BPF_FLOW_DISSECTOR,
>> > __MAX_BPF_ATTACH_TYPE
>> >  };
>> >
>> > @@ -2141,6 +2143,15 @@ union bpf_attr {
>> >   * request in the skb.
>> >   *

Re: [PATCH bpf] xsk: fix return value of xdp_umem_assign_dev()

2018-08-19 Thread Song Liu
On Sun, Aug 19, 2018 at 5:54 PM, Prashant Bhole
 wrote:
> s/ENOTSUPP/EOPNOTSUPP/ in function umem_assign_dev().
> This function's return value is directly returned by xsk_bind().
> EOPNOTSUPP is bind()'s possible return value.
>
> Fixes: f734607e819b ("xsk: refactor xdp_umem_assign_dev()")
> Signed-off-by: Prashant Bhole 

Acked-by: Song Liu 


> ---
>  net/xdp/xdp_umem.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
> index 911ca6d3cb5a..bfe2dbea480b 100644
> --- a/net/xdp/xdp_umem.c
> +++ b/net/xdp/xdp_umem.c
> @@ -74,14 +74,14 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct 
> net_device *dev,
> return 0;
>
> if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit)
> -   return force_zc ? -ENOTSUPP : 0; /* fail or fallback */
> +   return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */
>
> bpf.command = XDP_QUERY_XSK_UMEM;
>
> rtnl_lock();
> err = xdp_umem_query(dev, queue_id);
> if (err) {
> -   err = err < 0 ? -ENOTSUPP : -EBUSY;
> +   err = err < 0 ? -EOPNOTSUPP : -EBUSY;
> goto err_rtnl_unlock;
> }
>
> --
> 2.17.1
>
>


Re: Experimental fix for MSI-X issue on r8169

2018-08-19 Thread Jian-Hong Pan
2018-08-20 4:34 GMT+08:00 Heiner Kallweit :
> The three of you reported an MSI-X-related error when the system
> resumes from suspend. This has been fixed for now by disabling MSI-X
> on certain chip versions. However more versions may be affected.
>
> I checked with Realtek and they confirmed that on certain chip
> versions a MSIX-related value in PCI config space is reset when
> resuming from S3.
>
> I would appreciate if you could test the following experimental patch
> and whether warning "MSIX address lost, re-configuring" appears in
> your dmesg output after resume from suspend.
>
> Thanks a lot for your efforts.

Tested with the experiment patch on ASUS X441UAR.

This is the information before suspend:

dev@endless:~$ dmesg | grep r8169
[   10.279565] libphy: r8169: probed
[   10.279947] r8169 :02:00.0 eth0: RTL8106e, 0c:9d:92:32:67:b4,
XID 4490, IRQ 127
[   10.445952] r8169 :02:00.0 enp2s0: renamed from eth0
[   15.676229] Generic PHY r8169-200:00: attached PHY driver [Generic
PHY] (mii_bus:phy_addr=r8169-200:00, irq=IGNORE)
[   17.455392] r8169 :02:00.0 enp2s0: Link is Up - 100Mbps/Full -
flow control off

dev@endless:~$ ip addr show enp2s0
4: enp2s0:  mtu 1500 qdisc pfifo_fast
state UP group default qlen 1000
link/ether 0c:9d:92:32:67:b4 brd ff:ff:ff:ff:ff:ff
inet 10.100.13.152/24 brd 10.100.13.255 scope global noprefixroute
dynamic enp2s0
   valid_lft 86347sec preferred_lft 86347sec
inet6 fe80::2873:a2a9:6ca1:c79d/64 scope link noprefixroute
   valid_lft forever preferred_lft forever

This is the information after resume:

dev@endless:~$ dmesg | grep r8169
[   10.279565] libphy: r8169: probed
[   10.279947] r8169 :02:00.0 eth0: RTL8106e, 0c:9d:92:32:67:b4,
XID 4490, IRQ 127
[   10.445952] r8169 :02:00.0 enp2s0: renamed from eth0
[   15.676229] Generic PHY r8169-200:00: attached PHY driver [Generic
PHY] (mii_bus:phy_addr=r8169-200:00, irq=IGNORE)
[   17.455392] r8169 :02:00.0 enp2s0: Link is Up - 100Mbps/Full -
flow control off
[   95.594265] r8169 :02:00.0 enp2s0: Link is Down
[   96.242074] Generic PHY r8169-200:00: attached PHY driver [Generic
PHY] (mii_bus:phy_addr=r8169-200:00, irq=IGNORE)

dev@endless:~$ ip addr show enp2s0
4: enp2s0:  mtu 1500 qdisc
pfifo_fast state DOWN group default qlen 1000
link/ether 0c:9d:92:32:67:b4 brd ff:ff:ff:ff:ff:ff

There is no "MSIX address lost, re-configuring" in dmesg.
The ethernet interface is still down after resume.

This is the ethernet controller in detail:

02:00.0 Ethernet controller [0200]: Realtek Semiconductor Co., Ltd.
RTL8101/2/6E PCI Express Fast/Gigabit Ethernet controller [10ec:8136]
(rev 07)
Subsystem: ASUSTeK Computer Inc. RTL810xE PCI Express Fast Ethernet
controller [1043:200f]
Flags: bus master, fast devsel, latency 0, IRQ 16
I/O ports at e000 [size=256]
Memory at ef10 (64-bit, non-prefetchable) [size=4K]
Memory at e000 (64-bit, prefetchable) [size=16K]
Capabilities: [40] Power Management version 3
Capabilities: [50] MSI: Enable- Count=1/1 Maskable- 64bit+
Capabilities: [70] Express Endpoint, MSI 01
Capabilities: [b0] MSI-X: Enable+ Count=4 Masked-
Capabilities: [d0] Vital Product Data
Capabilities: [100] Advanced Error Reporting
Capabilities: [140] Virtual Channel
Capabilities: [160] Device Serial Number 01-00-00-00-36-4c-e0-00
Capabilities: [170] Latency Tolerance Reporting
Kernel driver in use: r8169
Kernel modules: r8169

Regards,
Jian-Hong Pan

>
> ---
>  drivers/net/ethernet/realtek/r8169.c | 27 ++-
>  1 file changed, 22 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/realtek/r8169.c 
> b/drivers/net/ethernet/realtek/r8169.c
> index 0d9c38318..56b4bdff9 100644
> --- a/drivers/net/ethernet/realtek/r8169.c
> +++ b/drivers/net/ethernet/realtek/r8169.c
> @@ -690,6 +690,8 @@ struct rtl8169_private {
> struct rtl8169_counters *counters;
> struct rtl8169_tc_offsets tc_offset;
> u32 saved_wolopts;
> +   u32 saved_msix_addr_lo;
> +   u32 saved_msix_addr_hi;
>
> struct rtl_fw {
> const struct firmware *fw;
> @@ -6876,6 +6878,19 @@ static int rtl8169_resume(struct device *device)
>  {
> struct pci_dev *pdev = to_pci_dev(device);
> struct net_device *dev = pci_get_drvdata(pdev);
> +   struct rtl8169_private *tp = netdev_priv(dev);
> +   u32 val;
> +
> +   /* Some chip versions loose these values when resuming */
> +   if (pdev->msix_enabled) {
> +   pci_read_config_dword(pdev, PCI_BASE_ADDRESS_4, );
> +   if (!val)
> +   dev_warn(device, "MSIX address lost, 
> re-configuring\n");
> +   pci_write_config_dword(pdev, PCI_BASE_ADDRESS_4,
> +  tp->saved_msix_addr_lo);
> +   pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5,
> +  tp->saved_msix_addr_hi);
> +   }
>
> if (netif_running(dev))
>  

[PATCH bpf] xsk: fix return value of xdp_umem_assign_dev()

2018-08-19 Thread Prashant Bhole
s/ENOTSUPP/EOPNOTSUPP/ in function umem_assign_dev().
This function's return value is directly returned by xsk_bind().
EOPNOTSUPP is bind()'s possible return value.

Fixes: f734607e819b ("xsk: refactor xdp_umem_assign_dev()")
Signed-off-by: Prashant Bhole 
---
 net/xdp/xdp_umem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 911ca6d3cb5a..bfe2dbea480b 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -74,14 +74,14 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct 
net_device *dev,
return 0;
 
if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit)
-   return force_zc ? -ENOTSUPP : 0; /* fail or fallback */
+   return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */
 
bpf.command = XDP_QUERY_XSK_UMEM;
 
rtnl_lock();
err = xdp_umem_query(dev, queue_id);
if (err) {
-   err = err < 0 ? -ENOTSUPP : -EBUSY;
+   err = err < 0 ? -EOPNOTSUPP : -EBUSY;
goto err_rtnl_unlock;
}
 
-- 
2.17.1




Re: bnxt: card intermittently hanging and dropping link

2018-08-19 Thread Daniel Axtens
Hi Michael,

>>> The main issue is the TX timeout.
>>> .
>>>
 [ 2682.911693] bnxt_en :3b:00.0 eth4: TX timeout detected, starting 
 reset task!
 [ 2683.782496] bnxt_en :3b:00.0 eth4: Resp cmpl intr err msg: 0x51
 [ 2683.783061] bnxt_en :3b:00.0 eth4: hwrm_ring_free tx failed. rc:-1
 [ 2684.634557] bnxt_en :3b:00.0 eth4: Resp cmpl intr err msg: 0x51
 [ 2684.635120] bnxt_en :3b:00.0 eth4: hwrm_ring_free tx failed. rc:-1
>>>
>>> and it is not recovering.
>>>
>>> Please provide ethtool -i eth4 which will show the firmware version on
>>> the NIC.  Let's see if the firmware is too old.
>>
>> driver: bnxt_en
>> version: 1.8.0
>> firmware-version: 20.6.151.0/pkg 20.06.05.11
>
> I believe the firmware should be updated.  My colleague will contact
> you on how to proceed.

Thank you very much, I'll follow up with them off-list.

Regards,
Daniel


[PATCH net v2 1/4] qed: Wait for ready indication before rereading the shmem

2018-08-19 Thread Tomer Tayar
The MFW might be reset and re-update its shared memory.
Upon the detection of such a reset the driver rereads this memory, but it
has to wait till the data is valid.
This patch adds the missing wait for a data ready indication.
 
Signed-off-by: Tomer Tayar 
Signed-off-by: Ariel Elior 
---
 drivers/net/ethernet/qlogic/qed/qed_mcp.c | 50 +--
 1 file changed, 41 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c 
b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index d89a0e2..bdcacb3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -183,18 +183,57 @@ int qed_mcp_free(struct qed_hwfn *p_hwfn)
return 0;
 }
 
+/* Maximum of 1 sec to wait for the SHMEM ready indication */
+#define QED_MCP_SHMEM_RDY_MAX_RETRIES  20
+#define QED_MCP_SHMEM_RDY_ITER_MS  50
+
 static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
struct qed_mcp_info *p_info = p_hwfn->mcp_info;
+   u8 cnt = QED_MCP_SHMEM_RDY_MAX_RETRIES;
+   u8 msec = QED_MCP_SHMEM_RDY_ITER_MS;
u32 drv_mb_offsize, mfw_mb_offsize;
u32 mcp_pf_id = MCP_PF_ID(p_hwfn);
 
p_info->public_base = qed_rd(p_hwfn, p_ptt, MISC_REG_SHARED_MEM_ADDR);
-   if (!p_info->public_base)
-   return 0;
+   if (!p_info->public_base) {
+   DP_NOTICE(p_hwfn,
+ "The address of the MCP scratch-pad is not 
configured\n");
+   return -EINVAL;
+   }
 
p_info->public_base |= GRCBASE_MCP;
 
+   /* Get the MFW MB address and number of supported messages */
+   mfw_mb_offsize = qed_rd(p_hwfn, p_ptt,
+   SECTION_OFFSIZE_ADDR(p_info->public_base,
+PUBLIC_MFW_MB));
+   p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id);
+   p_info->mfw_mb_length = (u16)qed_rd(p_hwfn, p_ptt,
+   p_info->mfw_mb_addr +
+   offsetof(struct public_mfw_mb,
+sup_msgs));
+
+   /* The driver can notify that there was an MCP reset, and might read the
+* SHMEM values before the MFW has completed initializing them.
+* To avoid this, the "sup_msgs" field in the MFW mailbox is used as a
+* data ready indication.
+*/
+   while (!p_info->mfw_mb_length && --cnt) {
+   msleep(msec);
+   p_info->mfw_mb_length =
+   (u16)qed_rd(p_hwfn, p_ptt,
+   p_info->mfw_mb_addr +
+   offsetof(struct public_mfw_mb, sup_msgs));
+   }
+
+   if (!cnt) {
+   DP_NOTICE(p_hwfn,
+ "Failed to get the SHMEM ready notification after %d 
msec\n",
+ QED_MCP_SHMEM_RDY_MAX_RETRIES * msec);
+   return -EBUSY;
+   }
+
/* Calculate the driver and MFW mailbox address */
drv_mb_offsize = qed_rd(p_hwfn, p_ptt,
SECTION_OFFSIZE_ADDR(p_info->public_base,
@@ -204,13 +243,6 @@ static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, 
struct qed_ptt *p_ptt)
   "drv_mb_offsiz = 0x%x, drv_mb_addr = 0x%x mcp_pf_id = 
0x%x\n",
   drv_mb_offsize, p_info->drv_mb_addr, mcp_pf_id);
 
-   /* Set the MFW MB address */
-   mfw_mb_offsize = qed_rd(p_hwfn, p_ptt,
-   SECTION_OFFSIZE_ADDR(p_info->public_base,
-PUBLIC_MFW_MB));
-   p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id);
-   p_info->mfw_mb_length = (u16)qed_rd(p_hwfn, p_ptt, p_info->mfw_mb_addr);
-
/* Get the current driver mailbox sequence before sending
 * the first command
 */
-- 
1.8.3.1



[PATCH net v2 4/4] qed: Avoid sending mailbox commands when MFW is not responsive

2018-08-19 Thread Tomer Tayar
Keep sending mailbox commands to the MFW when it is not responsive ends up
with a redundant amount of timeout expiries.
This patch prints the MCP status on the first command which is not
responded, and blocks the following commands.
Since the (un)load request commands might be not responded due to other
PFs, the patch also adds the option to skip the blocking upon a failure.

Signed-off-by: Tomer Tayar 
Signed-off-by: Ariel Elior 
---
 drivers/net/ethernet/qlogic/qed/qed_mcp.c  | 52 +-
 drivers/net/ethernet/qlogic/qed/qed_mcp.h  |  6 ++-
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h |  1 +
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c 
b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index b7279e6..5d37ec7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -320,6 +320,12 @@ int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt 
*p_ptt)
u32 org_mcp_reset_seq, seq, delay = QED_MCP_RESP_ITER_US, cnt = 0;
int rc = 0;
 
+   if (p_hwfn->mcp_info->b_block_cmd) {
+   DP_NOTICE(p_hwfn,
+ "The MFW is not responsive. Avoid sending MCP_RESET 
mailbox command.\n");
+   return -EBUSY;
+   }
+
/* Ensure that only a single thread is accessing the mailbox */
spin_lock_bh(_hwfn->mcp_info->cmd_lock);
 
@@ -445,6 +451,33 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn 
*p_hwfn,
   (p_mb_params->cmd | seq_num), p_mb_params->param);
 }
 
+static void qed_mcp_cmd_set_blocking(struct qed_hwfn *p_hwfn, bool block_cmd)
+{
+   p_hwfn->mcp_info->b_block_cmd = block_cmd;
+
+   DP_INFO(p_hwfn, "%s sending of mailbox commands to the MFW\n",
+   block_cmd ? "Block" : "Unblock");
+}
+
+static void qed_mcp_print_cpu_info(struct qed_hwfn *p_hwfn,
+  struct qed_ptt *p_ptt)
+{
+   u32 cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2;
+   u32 delay = QED_MCP_RESP_ITER_US;
+
+   cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+   cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE);
+   cpu_pc_0 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER);
+   udelay(delay);
+   cpu_pc_1 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER);
+   udelay(delay);
+   cpu_pc_2 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER);
+
+   DP_NOTICE(p_hwfn,
+ "MCP CPU info: mode 0x%08x, state 0x%08x, pc {0x%08x, 0x%08x, 
0x%08x}\n",
+ cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2);
+}
+
 static int
 _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
   struct qed_ptt *p_ptt,
@@ -531,11 +564,15 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn 
*p_hwfn,
DP_NOTICE(p_hwfn,
  "The MFW failed to respond to command 0x%08x [param 
0x%08x].\n",
  p_mb_params->cmd, p_mb_params->param);
+   qed_mcp_print_cpu_info(p_hwfn, p_ptt);
 
spin_lock_bh(_hwfn->mcp_info->cmd_lock);
qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem);
spin_unlock_bh(_hwfn->mcp_info->cmd_lock);
 
+   if (!QED_MB_FLAGS_IS_SET(p_mb_params, AVOID_BLOCK))
+   qed_mcp_cmd_set_blocking(p_hwfn, true);
+
return -EAGAIN;
}
 
@@ -573,6 +610,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
return -EBUSY;
}
 
+   if (p_hwfn->mcp_info->b_block_cmd) {
+   DP_NOTICE(p_hwfn,
+ "The MFW is not responsive. Avoid sending mailbox 
command 0x%08x [param 0x%08x].\n",
+ p_mb_params->cmd, p_mb_params->param);
+   return -EBUSY;
+   }
+
if (p_mb_params->data_src_size > union_data_size ||
p_mb_params->data_dst_size > union_data_size) {
DP_ERR(p_hwfn,
@@ -806,7 +850,7 @@ struct qed_load_req_out_params {
mb_params.data_src_size = sizeof(load_req);
mb_params.p_data_dst = _rsp;
mb_params.data_dst_size = sizeof(load_rsp);
-   mb_params.flags = QED_MB_FLAG_CAN_SLEEP;
+   mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK;
 
DP_VERBOSE(p_hwfn, QED_MSG_SP,
   "Load Request: param 0x%08x [init_hw %d, drv_type %d, 
hsi_ver %d, pda 0x%04x]\n",
@@ -1050,7 +1094,7 @@ int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct 
qed_ptt *p_ptt)
memset(_params, 0, sizeof(mb_params));
mb_params.cmd = DRV_MSG_CODE_UNLOAD_REQ;
mb_params.param = wol_param;
-   mb_params.flags = QED_MB_FLAG_CAN_SLEEP;
+   mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK;
 
return qed_mcp_cmd_and_union(p_hwfn, p_ptt, _params);
 }
@@ -2158,6 +2202,8 @@ int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt 
*p_ptt)
  

[PATCH net v2 3/4] qed: Prevent a possible deadlock during driver load and unload

2018-08-19 Thread Tomer Tayar
The MFW manages an internal lock to prevent concurrent hardware
(de)initialization of different PFs.
This, together with the busy-waiting for the MFW's responses for commands,
might lead to a deadlock during concurrent load or unload of PFs.
This patch adds the option to sleep within the busy-waiting, and uses it
for the (un)load requests (which are not sent from an interrupt context) to
prevent the possible deadlock.

Signed-off-by: Tomer Tayar 
Signed-off-by: Ariel Elior 
---
 drivers/net/ethernet/qlogic/qed/qed_mcp.c | 43 ++-
 drivers/net/ethernet/qlogic/qed/qed_mcp.h | 21 +--
 2 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c 
b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 5f3dbdc..b7279e6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -48,7 +48,7 @@
 #include "qed_reg_addr.h"
 #include "qed_sriov.h"
 
-#define CHIP_MCP_RESP_ITER_US 10
+#define QED_MCP_RESP_ITER_US   10
 
 #define QED_DRV_MB_MAX_RETRIES (500 * 1000)/* Account for 5 sec */
 #define QED_MCP_RESET_RETRIES  (50 * 1000) /* Account for 500 msec */
@@ -317,7 +317,7 @@ static void qed_mcp_reread_offsets(struct qed_hwfn *p_hwfn,
 
 int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-   u32 org_mcp_reset_seq, seq, delay = CHIP_MCP_RESP_ITER_US, cnt = 0;
+   u32 org_mcp_reset_seq, seq, delay = QED_MCP_RESP_ITER_US, cnt = 0;
int rc = 0;
 
/* Ensure that only a single thread is accessing the mailbox */
@@ -449,10 +449,10 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn 
*p_hwfn,
 _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
   struct qed_ptt *p_ptt,
   struct qed_mcp_mb_params *p_mb_params,
-  u32 max_retries, u32 delay)
+  u32 max_retries, u32 usecs)
 {
+   u32 cnt = 0, msecs = DIV_ROUND_UP(usecs, 1000);
struct qed_mcp_cmd_elem *p_cmd_elem;
-   u32 cnt = 0;
u16 seq_num;
int rc = 0;
 
@@ -475,7 +475,11 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn 
*p_hwfn,
goto err;
 
spin_unlock_bh(_hwfn->mcp_info->cmd_lock);
-   udelay(delay);
+
+   if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP))
+   msleep(msecs);
+   else
+   udelay(usecs);
} while (++cnt < max_retries);
 
if (cnt >= max_retries) {
@@ -504,7 +508,11 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn 
*p_hwfn,
 * The spinlock stays locked until the list element is removed.
 */
 
-   udelay(delay);
+   if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP))
+   msleep(msecs);
+   else
+   udelay(usecs);
+
spin_lock_bh(_hwfn->mcp_info->cmd_lock);
 
if (p_cmd_elem->b_is_completed)
@@ -539,7 +547,7 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
   "MFW mailbox: response 0x%08x param 0x%08x [after %d.%03d 
ms]\n",
   p_mb_params->mcp_resp,
   p_mb_params->mcp_param,
-  (cnt * delay) / 1000, (cnt * delay) % 1000);
+  (cnt * usecs) / 1000, (cnt * usecs) % 1000);
 
/* Clear the sequence number from the MFW response */
p_mb_params->mcp_resp &= FW_MSG_CODE_MASK;
@@ -557,7 +565,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
 {
size_t union_data_size = sizeof(union drv_union_data);
u32 max_retries = QED_DRV_MB_MAX_RETRIES;
-   u32 delay = CHIP_MCP_RESP_ITER_US;
+   u32 usecs = QED_MCP_RESP_ITER_US;
 
/* MCP not initialized */
if (!qed_mcp_is_init(p_hwfn)) {
@@ -574,8 +582,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
return -EINVAL;
}
 
+   if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) {
+   max_retries = DIV_ROUND_UP(max_retries, 1000);
+   usecs *= 1000;
+   }
+
return _qed_mcp_cmd_and_union(p_hwfn, p_ptt, p_mb_params, max_retries,
- delay);
+ usecs);
 }
 
 int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
@@ -793,6 +806,7 @@ struct qed_load_req_out_params {
mb_params.data_src_size = sizeof(load_req);
mb_params.p_data_dst = _rsp;
mb_params.data_dst_size = sizeof(load_rsp);
+   mb_params.flags = QED_MB_FLAG_CAN_SLEEP;
 
DP_VERBOSE(p_hwfn, QED_MSG_SP,
   "Load Request: param 0x%08x [init_hw %d, drv_type %d, 
hsi_ver %d, pda 0x%04x]\n",
@@ -1014,7 +1028,8 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
 
 int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-   u32 wol_param, mcp_resp, mcp_param;
+   struct 

[PATCH net v2 0/4] qed: Misc fixes in the interface with the MFW

2018-08-19 Thread Tomer Tayar
This patch series fixes several issues in the driver's interface with the
management FW (MFW).

v1->v2:
- Fix loop counter decrement to be pre instead of post.

Tomer Tayar (4):
  qed: Wait for ready indication before rereading the shmem
  qed: Wait for MCP halt and resume commands to take place
  qed: Prevent a possible deadlock during driver load and unload
  qed: Avoid sending mailbox commands when MFW is not responsive

 drivers/net/ethernet/qlogic/qed/qed_mcp.c  | 187 +
 drivers/net/ethernet/qlogic/qed/qed_mcp.h  |  27 ++--
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h |   2 +
 3 files changed, 178 insertions(+), 38 deletions(-)

-- 
1.8.3.1



[PATCH net v2 2/4] qed: Wait for MCP halt and resume commands to take place

2018-08-19 Thread Tomer Tayar
Successive iterations of halting and resuming the management chip (MCP)
might fail, since currently the driver doesn't wait for these operations to
actually take place.
This patch prevents the driver from moving forward before the operations
are reflected in the state register.

Signed-off-by: Tomer Tayar 
Signed-off-by: Ariel Elior 
---
 drivers/net/ethernet/qlogic/qed/qed_mcp.c  | 46 +-
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h |  1 +
 2 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c 
b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index bdcacb3..5f3dbdc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -2109,31 +2109,61 @@ int qed_mcp_config_vf_msix(struct qed_hwfn *p_hwfn,
return rc;
 }
 
+/* A maximal 100 msec waiting time for the MCP to halt */
+#define QED_MCP_HALT_SLEEP_MS  10
+#define QED_MCP_HALT_MAX_RETRIES   10
+
 int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-   u32 resp = 0, param = 0;
+   u32 resp = 0, param = 0, cpu_state, cnt = 0;
int rc;
 
rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MCP_HALT, 0, ,
 );
-   if (rc)
+   if (rc) {
DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+   return rc;
+   }
 
-   return rc;
+   do {
+   msleep(QED_MCP_HALT_SLEEP_MS);
+   cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE);
+   if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED)
+   break;
+   } while (++cnt < QED_MCP_HALT_MAX_RETRIES);
+
+   if (cnt == QED_MCP_HALT_MAX_RETRIES) {
+   DP_NOTICE(p_hwfn,
+ "Failed to halt the MCP [CPU_MODE = 0x%08x, CPU_STATE 
= 0x%08x]\n",
+ qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE), cpu_state);
+   return -EBUSY;
+   }
+
+   return 0;
 }
 
+#define QED_MCP_RESUME_SLEEP_MS10
+
 int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-   u32 value, cpu_mode;
+   u32 cpu_mode, cpu_state;
 
qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_STATE, 0x);
 
-   value = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
-   value &= ~MCP_REG_CPU_MODE_SOFT_HALT;
-   qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, value);
cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+   cpu_mode &= ~MCP_REG_CPU_MODE_SOFT_HALT;
+   qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, cpu_mode);
+   msleep(QED_MCP_RESUME_SLEEP_MS);
+   cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE);
 
-   return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0;
+   if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED) {
+   DP_NOTICE(p_hwfn,
+ "Failed to resume the MCP [CPU_MODE = 0x%08x, 
CPU_STATE = 0x%08x]\n",
+ cpu_mode, cpu_state);
+   return -EBUSY;
+   }
+
+   return 0;
 }
 
 int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h 
b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index d8ad2dc..2279965 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -562,6 +562,7 @@
0
 #define MCP_REG_CPU_STATE \
0xe05004UL
+#define MCP_REG_CPU_STATE_SOFT_HALTED  (0x1UL << 10)
 #define MCP_REG_CPU_EVENT_MASK \
0xe05008UL
 #define PGLUE_B_REG_PF_BAR0_SIZE \
-- 
1.8.3.1



Experimental fix for MSI-X issue on r8169

2018-08-19 Thread Heiner Kallweit
The three of you reported an MSI-X-related error when the system
resumes from suspend. This has been fixed for now by disabling MSI-X
on certain chip versions. However more versions may be affected.

I checked with Realtek and they confirmed that on certain chip
versions a MSIX-related value in PCI config space is reset when
resuming from S3.

I would appreciate if you could test the following experimental patch
and whether warning "MSIX address lost, re-configuring" appears in
your dmesg output after resume from suspend.

Thanks a lot for your efforts.

Heiner

---
 drivers/net/ethernet/realtek/r8169.c | 27 ++-
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c 
b/drivers/net/ethernet/realtek/r8169.c
index 0d9c38318..56b4bdff9 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -690,6 +690,8 @@ struct rtl8169_private {
struct rtl8169_counters *counters;
struct rtl8169_tc_offsets tc_offset;
u32 saved_wolopts;
+   u32 saved_msix_addr_lo;
+   u32 saved_msix_addr_hi;
 
struct rtl_fw {
const struct firmware *fw;
@@ -6876,6 +6878,19 @@ static int rtl8169_resume(struct device *device)
 {
struct pci_dev *pdev = to_pci_dev(device);
struct net_device *dev = pci_get_drvdata(pdev);
+   struct rtl8169_private *tp = netdev_priv(dev);
+   u32 val;
+
+   /* Some chip versions loose these values when resuming */
+   if (pdev->msix_enabled) {
+   pci_read_config_dword(pdev, PCI_BASE_ADDRESS_4, );
+   if (!val)
+   dev_warn(device, "MSIX address lost, re-configuring\n");
+   pci_write_config_dword(pdev, PCI_BASE_ADDRESS_4,
+  tp->saved_msix_addr_lo);
+   pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5,
+  tp->saved_msix_addr_hi);
+   }
 
if (netif_running(dev))
__rtl8169_resume(dev);
@@ -7076,11 +7091,6 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
RTL_W8(tp, Cfg9346, Cfg9346_Lock);
flags = PCI_IRQ_LEGACY;
-   } else if (tp->mac_version == RTL_GIGA_MAC_VER_40) {
-   /* This version was reported to have issues with resume
-* from suspend when using MSI-X
-*/
-   flags = PCI_IRQ_LEGACY | PCI_IRQ_MSI;
} else {
flags = PCI_IRQ_ALL_TYPES;
}
@@ -7355,6 +7365,13 @@ static int rtl_init_one(struct pci_dev *pdev, const 
struct pci_device_id *ent)
return rc;
}
 
+   if (pdev->msix_enabled) {
+   pci_read_config_dword(pdev, PCI_BASE_ADDRESS_4,
+ >saved_msix_addr_lo);
+   pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5,
+ >saved_msix_addr_hi);
+   }
+
tp->saved_wolopts = __rtl8169_get_wol(tp);
 
mutex_init(>wk.mutex);
-- 
2.18.0



[Patch net 2/9] net_sched: remove unnecessary ops->delete()

2018-08-19 Thread Cong Wang
All ops->delete() wants is getting the tn->idrinfo, but we already
have tc_action before calling ops->delete(), and tc_action has
a pointer ->idrinfo.

More importantly, each type of action does the same thing, that is,
just calling tcf_idr_delete_index().

So it can be just removed.

Fixes: b409074e6693 ("net: sched: add 'delete' function to action ops")
Cc: Jiri Pirko 
Cc: Vlad Buslov 
Signed-off-by: Cong Wang 
---
 include/net/act_api.h  |  2 --
 net/sched/act_api.c| 15 +++
 net/sched/act_bpf.c|  8 
 net/sched/act_connmark.c   |  8 
 net/sched/act_csum.c   |  8 
 net/sched/act_gact.c   |  8 
 net/sched/act_ife.c|  8 
 net/sched/act_ipt.c| 16 
 net/sched/act_mirred.c |  8 
 net/sched/act_nat.c|  8 
 net/sched/act_pedit.c  |  8 
 net/sched/act_police.c |  8 
 net/sched/act_sample.c |  8 
 net/sched/act_simple.c |  8 
 net/sched/act_skbedit.c|  8 
 net/sched/act_skbmod.c |  8 
 net/sched/act_tunnel_key.c |  8 
 net/sched/act_vlan.c   |  8 
 18 files changed, 7 insertions(+), 146 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 1ad5b19e83a9..e32708491d83 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -102,7 +102,6 @@ struct tc_action_ops {
size_t  (*get_fill_size)(const struct tc_action *act);
struct net_device *(*get_dev)(const struct tc_action *a);
void(*put_dev)(struct net_device *dev);
-   int (*delete)(struct net *net, u32 index);
 };
 
 struct tc_action_net {
@@ -158,7 +157,6 @@ void tcf_idr_insert(struct tc_action_net *tn, struct 
tc_action *a);
 void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
 int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
struct tc_action **a, int bind);
-int tcf_idr_delete_index(struct tc_action_net *tn, u32 index);
 int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
 
 static inline int tcf_idr_release(struct tc_action *a, bool bind)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index cd69a6afcf88..00bf7d2b0bdd 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -337,9 +337,8 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, 
struct tc_action **a,
 }
 EXPORT_SYMBOL(tcf_idr_check);
 
-int tcf_idr_delete_index(struct tc_action_net *tn, u32 index)
+static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
 {
-   struct tcf_idrinfo *idrinfo = tn->idrinfo;
struct tc_action *p;
int ret = 0;
 
@@ -370,7 +369,6 @@ int tcf_idr_delete_index(struct tc_action_net *tn, u32 
index)
spin_unlock(>lock);
return ret;
 }
-EXPORT_SYMBOL(tcf_idr_delete_index);
 
 int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
   struct tc_action **a, const struct tc_action_ops *ops,
@@ -1182,24 +1180,25 @@ static int tca_action_flush(struct net *net, struct 
nlattr *nla,
 static int tcf_action_delete(struct net *net, struct tc_action *actions[],
 struct netlink_ext_ack *extack)
 {
-   u32 act_index;
-   int ret, i;
+   int i;
 
for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
struct tc_action *a = actions[i];
const struct tc_action_ops *ops = a->ops;
-
/* Actions can be deleted concurrently so we must save their
 * type and id to search again after reference is released.
 */
-   act_index = a->tcfa_index;
+   struct tcf_idrinfo *idrinfo = a->idrinfo;
+   u32 act_index = a->tcfa_index;
 
if (tcf_action_put(a)) {
/* last reference, action was deleted concurrently */
module_put(ops->owner);
} else  {
+   int ret;
+
/* now do the delete */
-   ret = ops->delete(net, act_index);
+   ret = tcf_idr_delete_index(idrinfo, act_index);
if (ret < 0)
return ret;
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index d30b23e42436..0c68bc9cf0b4 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -395,13 +395,6 @@ static int tcf_bpf_search(struct net *net, struct 
tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
 }
 
-static int tcf_bpf_delete(struct net *net, u32 index)
-{
-   struct tc_action_net *tn = net_generic(net, bpf_net_id);
-
-   return tcf_idr_delete_index(tn, index);
-}
-
 static struct tc_action_ops act_bpf_ops __read_mostly = {
.kind   =   "bpf",
.type   =   TCA_ACT_BPF,
@@ -412,7 +405,6 @@ static struct tc_action_ops act_bpf_ops 

[Patch net 6/9] net_sched: remove unused tcfa_capab

2018-08-19 Thread Cong Wang
Cc: Jamal Hadi Salim 
Signed-off-by: Cong Wang 
---
 include/net/act_api.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index f9c4b871af88..970303448c90 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -28,7 +28,6 @@ struct tc_action {
u32 tcfa_index;
refcount_t  tcfa_refcnt;
atomic_ttcfa_bindcnt;
-   u32 tcfa_capab;
int tcfa_action;
struct tcf_ttcfa_tm;
struct gnet_stats_basic_packed  tcfa_bstats;
@@ -43,7 +42,6 @@ struct tc_action {
 #define tcf_index  common.tcfa_index
 #define tcf_refcnt common.tcfa_refcnt
 #define tcf_bindcntcommon.tcfa_bindcnt
-#define tcf_capab  common.tcfa_capab
 #define tcf_action common.tcfa_action
 #define tcf_tm common.tcfa_tm
 #define tcf_bstats common.tcfa_bstats
-- 
2.14.4



[Patch net 7/9] Revert "net: sched: act_ife: disable bh when taking ife_mod_lock"

2018-08-19 Thread Cong Wang
This reverts commit 42c625a486f3 ("net: sched: act_ife: disable bh
when taking ife_mod_lock"), because what ife_mod_lock protects
is absolutely not touched in rate est timer BH context, they have
no race.

A better fix is following up.

Cc: Vlad Buslov 
Cc: Jamal Hadi Salim 
Signed-off-by: Cong Wang 
---
 net/sched/act_ife.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 92fcf8ba5bca..9decbb74b3ac 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -167,16 +167,16 @@ static struct tcf_meta_ops *find_ife_oplist(u16 metaid)
 {
struct tcf_meta_ops *o;
 
-   read_lock_bh(_mod_lock);
+   read_lock(_mod_lock);
list_for_each_entry(o, , list) {
if (o->metaid == metaid) {
if (!try_module_get(o->owner))
o = NULL;
-   read_unlock_bh(_mod_lock);
+   read_unlock(_mod_lock);
return o;
}
}
-   read_unlock_bh(_mod_lock);
+   read_unlock(_mod_lock);
 
return NULL;
 }
@@ -190,12 +190,12 @@ int register_ife_op(struct tcf_meta_ops *mops)
!mops->get || !mops->alloc)
return -EINVAL;
 
-   write_lock_bh(_mod_lock);
+   write_lock(_mod_lock);
 
list_for_each_entry(m, , list) {
if (m->metaid == mops->metaid ||
(strcmp(mops->name, m->name) == 0)) {
-   write_unlock_bh(_mod_lock);
+   write_unlock(_mod_lock);
return -EEXIST;
}
}
@@ -204,7 +204,7 @@ int register_ife_op(struct tcf_meta_ops *mops)
mops->release = ife_release_meta_gen;
 
list_add_tail(>list, );
-   write_unlock_bh(_mod_lock);
+   write_unlock(_mod_lock);
return 0;
 }
 EXPORT_SYMBOL_GPL(unregister_ife_op);
@@ -214,7 +214,7 @@ int unregister_ife_op(struct tcf_meta_ops *mops)
struct tcf_meta_ops *m;
int err = -ENOENT;
 
-   write_lock_bh(_mod_lock);
+   write_lock(_mod_lock);
list_for_each_entry(m, , list) {
if (m->metaid == mops->metaid) {
list_del(>list);
@@ -222,7 +222,7 @@ int unregister_ife_op(struct tcf_meta_ops *mops)
break;
}
}
-   write_unlock_bh(_mod_lock);
+   write_unlock(_mod_lock);
 
return err;
 }
@@ -343,13 +343,13 @@ static int use_all_metadata(struct tcf_ife_info *ife)
int rc = 0;
int installed = 0;
 
-   read_lock_bh(_mod_lock);
+   read_lock(_mod_lock);
list_for_each_entry(o, , list) {
rc = add_metainfo(ife, o->metaid, NULL, 0, true);
if (rc == 0)
installed += 1;
}
-   read_unlock_bh(_mod_lock);
+   read_unlock(_mod_lock);
 
if (installed)
return 0;
-- 
2.14.4



[Patch net 4/9] net_sched: remove unused tcf_idr_check()

2018-08-19 Thread Cong Wang
tcf_idr_check() is replaced by tcf_idr_check_alloc(),
and __tcf_idr_check() now can be folded into tcf_idr_search().

Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action")
Cc: Jiri Pirko 
Cc: Vlad Buslov 
Signed-off-by: Cong Wang 
---
 include/net/act_api.h |  2 --
 net/sched/act_api.c   | 22 +++---
 2 files changed, 3 insertions(+), 21 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index e32708491d83..eaa0e8b93d5b 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -147,8 +147,6 @@ int tcf_generic_walker(struct tc_action_net *tn, struct 
sk_buff *skb,
   const struct tc_action_ops *ops,
   struct netlink_ext_ack *extack);
 int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
-bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
-   int bind);
 int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
   struct tc_action **a, const struct tc_action_ops *ops,
   int bind, bool cpustats);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index ba55226928a3..d76948f02a02 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -300,21 +300,17 @@ int tcf_generic_walker(struct tc_action_net *tn, struct 
sk_buff *skb,
 }
 EXPORT_SYMBOL(tcf_generic_walker);
 
-static bool __tcf_idr_check(struct tc_action_net *tn, u32 index,
-   struct tc_action **a, int bind)
+int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
 {
struct tcf_idrinfo *idrinfo = tn->idrinfo;
struct tc_action *p;
 
spin_lock(>lock);
p = idr_find(>action_idr, index);
-   if (IS_ERR(p)) {
+   if (IS_ERR(p))
p = NULL;
-   } else if (p) {
+   else if (p)
refcount_inc(>tcfa_refcnt);
-   if (bind)
-   atomic_inc(>tcfa_bindcnt);
-   }
spin_unlock(>lock);
 
if (p) {
@@ -323,20 +319,8 @@ static bool __tcf_idr_check(struct tc_action_net *tn, u32 
index,
}
return false;
 }
-
-int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
-{
-   return __tcf_idr_check(tn, index, a, 0);
-}
 EXPORT_SYMBOL(tcf_idr_search);
 
-bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
-  int bind)
-{
-   return __tcf_idr_check(tn, index, a, bind);
-}
-EXPORT_SYMBOL(tcf_idr_check);
-
 static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
 {
struct tc_action *p;
-- 
2.14.4



[Patch net 8/9] act_ife: move tcfa_lock down to where necessary

2018-08-19 Thread Cong Wang
The only time we need to take tcfa_lock is when adding
a new metainfo to an existing ife->metalist. We don't need
to take tcfa_lock so early and so broadly in tcf_ife_init().

This means we can always take ife_mod_lock first, avoid the
reverse locking ordering warning as reported by Vlad.

Reported-by: Vlad Buslov 
Tested-by: Vlad Buslov 
Cc: Vlad Buslov 
Cc: Jamal Hadi Salim 
Signed-off-by: Cong Wang 
---
 net/sched/act_ife.c | 38 +-
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 9decbb74b3ac..244a8cf48183 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -265,11 +265,8 @@ static const char *ife_meta_id2name(u32 metaid)
 #endif
 
 /* called when adding new meta information
- * under ife->tcf_lock for existing action
 */
-static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
-   void *val, int len, bool exists,
-   bool rtnl_held)
+static int load_metaops_and_vet(u32 metaid, void *val, int len, bool rtnl_held)
 {
struct tcf_meta_ops *ops = find_ife_oplist(metaid);
int ret = 0;
@@ -277,15 +274,11 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, 
u32 metaid,
if (!ops) {
ret = -ENOENT;
 #ifdef CONFIG_MODULES
-   if (exists)
-   spin_unlock_bh(>tcf_lock);
if (rtnl_held)
rtnl_unlock();
request_module("ife-meta-%s", ife_meta_id2name(metaid));
if (rtnl_held)
rtnl_lock();
-   if (exists)
-   spin_lock_bh(>tcf_lock);
ops = find_ife_oplist(metaid);
 #endif
}
@@ -302,10 +295,9 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, 
u32 metaid,
 }
 
 /* called when adding new meta information
- * under ife->tcf_lock for existing action
 */
 static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
-   int len, bool atomic)
+   int len, bool atomic, bool exists)
 {
struct tcf_meta_info *mi = NULL;
struct tcf_meta_ops *ops = find_ife_oplist(metaid);
@@ -332,12 +324,16 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 
metaid, void *metaval,
}
}
 
+   if (exists)
+   spin_lock_bh(>tcf_lock);
list_add_tail(>metalist, >metalist);
+   if (exists)
+   spin_unlock_bh(>tcf_lock);
 
return ret;
 }
 
-static int use_all_metadata(struct tcf_ife_info *ife)
+static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
 {
struct tcf_meta_ops *o;
int rc = 0;
@@ -345,7 +341,7 @@ static int use_all_metadata(struct tcf_ife_info *ife)
 
read_lock(_mod_lock);
list_for_each_entry(o, , list) {
-   rc = add_metainfo(ife, o->metaid, NULL, 0, true);
+   rc = add_metainfo(ife, o->metaid, NULL, 0, true, exists);
if (rc == 0)
installed += 1;
}
@@ -422,7 +418,6 @@ static void tcf_ife_cleanup(struct tc_action *a)
kfree_rcu(p, rcu);
 }
 
-/* under ife->tcf_lock for existing action */
 static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 bool exists, bool rtnl_held)
 {
@@ -436,12 +431,11 @@ static int populate_metalist(struct tcf_ife_info *ife, 
struct nlattr **tb,
val = nla_data(tb[i]);
len = nla_len(tb[i]);
 
-   rc = load_metaops_and_vet(ife, i, val, len, exists,
- rtnl_held);
+   rc = load_metaops_and_vet(i, val, len, rtnl_held);
if (rc != 0)
return rc;
 
-   rc = add_metainfo(ife, i, val, len, exists);
+   rc = add_metainfo(ife, i, val, len, false, exists);
if (rc)
return rc;
}
@@ -540,8 +534,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
p->eth_type = ife_type;
}
 
-   if (exists)
-   spin_lock_bh(>tcf_lock);
 
if (ret == ACT_P_CREATED)
INIT_LIST_HEAD(>metalist);
@@ -551,10 +543,7 @@ static int tcf_ife_init(struct net *net, struct nlattr 
*nla,
   NULL, NULL);
if (err) {
 metadata_parse_err:
-   if (exists)
-   spin_unlock_bh(>tcf_lock);
tcf_idr_release(*a, bind);
-
kfree(p);
return err;
}
@@ -569,17 +558,16 @@ static int tcf_ife_init(struct net *net, struct nlattr 
*nla,
 * as we can. You better have at least one else 

[Patch net 3/9] net_sched: remove unused parameter for tcf_action_delete()

2018-08-19 Thread Cong Wang
Fixes: 16af6067392c ("net: sched: implement reference counted action release")
Cc: Jiri Pirko 
Cc: Vlad Buslov 
Signed-off-by: Cong Wang 
---
 net/sched/act_api.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 00bf7d2b0bdd..ba55226928a3 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1177,8 +1177,7 @@ static int tca_action_flush(struct net *net, struct 
nlattr *nla,
return err;
 }
 
-static int tcf_action_delete(struct net *net, struct tc_action *actions[],
-struct netlink_ext_ack *extack)
+static int tcf_action_delete(struct net *net, struct tc_action *actions[])
 {
int i;
 
@@ -1227,7 +1226,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, 
struct tc_action *actions[],
}
 
/* now do the delete */
-   ret = tcf_action_delete(net, actions, extack);
+   ret = tcf_action_delete(net, actions);
if (ret < 0) {
NL_SET_ERR_MSG(extack, "Failed to delete TC action");
kfree_skb(skb);
-- 
2.14.4



[Patch net 9/9] act_ife: fix a potential deadlock

2018-08-19 Thread Cong Wang
use_all_metadata() acquires read_lock(_mod_lock), then calls
add_metainfo() which calls find_ife_oplist() which acquires the same
lock again. Deadlock!

Introduce __add_metainfo() which accepts struct tcf_meta_ops *ops
as an additional parameter and let its callers to decide how
to find it. For use_all_metadata(), it already has ops, no
need to find it again, just call __add_metainfo() directly.

And, as ife_mod_lock is only needed for find_ife_oplist(),
this means we can make non-atomic allocation for populate_metalist()
now.

Fixes: 817e9f2c5c26 ("act_ife: acquire ife_mod_lock before reading ifeoplist")
Cc: Jamal Hadi Salim 
Signed-off-by: Cong Wang 
---
 net/sched/act_ife.c | 34 +-
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 244a8cf48183..196430aefe87 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -296,22 +296,16 @@ static int load_metaops_and_vet(u32 metaid, void *val, 
int len, bool rtnl_held)
 
 /* called when adding new meta information
 */
-static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
-   int len, bool atomic, bool exists)
+static int __add_metainfo(const struct tcf_meta_ops *ops,
+ struct tcf_ife_info *ife, u32 metaid, void *metaval,
+ int len, bool atomic, bool exists)
 {
struct tcf_meta_info *mi = NULL;
-   struct tcf_meta_ops *ops = find_ife_oplist(metaid);
int ret = 0;
 
-   if (!ops)
-   return -ENOENT;
-
mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL);
-   if (!mi) {
-   /*put back what find_ife_oplist took */
-   module_put(ops->owner);
+   if (!mi)
return -ENOMEM;
-   }
 
mi->metaid = metaid;
mi->ops = ops;
@@ -319,7 +313,6 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 
metaid, void *metaval,
ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL);
if (ret != 0) {
kfree(mi);
-   module_put(ops->owner);
return ret;
}
}
@@ -333,6 +326,21 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 
metaid, void *metaval,
return ret;
 }
 
+static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
+   int len, bool exists)
+{
+   const struct tcf_meta_ops *ops = find_ife_oplist(metaid);
+   int ret;
+
+   if (!ops)
+   return -ENOENT;
+   ret = __add_metainfo(ops, ife, metaid, metaval, len, false, exists);
+   if (ret)
+   /*put back what find_ife_oplist took */
+   module_put(ops->owner);
+   return ret;
+}
+
 static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
 {
struct tcf_meta_ops *o;
@@ -341,7 +349,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool 
exists)
 
read_lock(_mod_lock);
list_for_each_entry(o, , list) {
-   rc = add_metainfo(ife, o->metaid, NULL, 0, true, exists);
+   rc = __add_metainfo(o, ife, o->metaid, NULL, 0, true, exists);
if (rc == 0)
installed += 1;
}
@@ -435,7 +443,7 @@ static int populate_metalist(struct tcf_ife_info *ife, 
struct nlattr **tb,
if (rc != 0)
return rc;
 
-   rc = add_metainfo(ife, i, val, len, false, exists);
+   rc = add_metainfo(ife, i, val, len, exists);
if (rc)
return rc;
}
-- 
2.14.4



[Patch net 1/9] net_sched: improve and refactor tcf_action_put_many()

2018-08-19 Thread Cong Wang
tcf_action_put_many() is mostly called to clean up actions on
failure path, but tcf_action_put_many([acts_deleted]) is
used in the ugliest way: it passes a slice of the array and
uses an additional NULL at the end to avoid out-of-bound
access.

acts_deleted is completely unnecessary since we can teach
tcf_action_put_many() scan the whole array and checks against
NULL pointer. Which also means tcf_action_delete() should
set deleted action pointers to NULL to avoid double free.

Fixes: 90b73b77d08e ("net: sched: change action API to use array of pointers to 
actions")
Cc: Jiri Pirko 
Cc: Vlad Buslov 
Signed-off-by: Cong Wang 
---
 net/sched/act_api.c | 31 +++
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 229d63c99be2..cd69a6afcf88 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -686,14 +686,18 @@ static int tcf_action_put(struct tc_action *p)
return __tcf_action_put(p, false);
 }
 
+/* Put all actions in this array, skip those NULL's. */
 static void tcf_action_put_many(struct tc_action *actions[])
 {
int i;
 
-   for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+   for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
struct tc_action *a = actions[i];
-   const struct tc_action_ops *ops = a->ops;
+   const struct tc_action_ops *ops;
 
+   if (!a)
+   continue;
+   ops = a->ops;
if (tcf_action_put(a))
module_put(ops->owner);
}
@@ -1176,7 +1180,7 @@ static int tca_action_flush(struct net *net, struct 
nlattr *nla,
 }
 
 static int tcf_action_delete(struct net *net, struct tc_action *actions[],
-int *acts_deleted, struct netlink_ext_ack *extack)
+struct netlink_ext_ack *extack)
 {
u32 act_index;
int ret, i;
@@ -1196,20 +1200,17 @@ static int tcf_action_delete(struct net *net, struct 
tc_action *actions[],
} else  {
/* now do the delete */
ret = ops->delete(net, act_index);
-   if (ret < 0) {
-   *acts_deleted = i + 1;
+   if (ret < 0)
return ret;
-   }
}
+   actions[i] = NULL;
}
-   *acts_deleted = i;
return 0;
 }
 
 static int
 tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action 
*actions[],
-  int *acts_deleted, u32 portid, size_t attr_size,
-  struct netlink_ext_ack *extack)
+  u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
 {
int ret;
struct sk_buff *skb;
@@ -1227,7 +1228,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, 
struct tc_action *actions[],
}
 
/* now do the delete */
-   ret = tcf_action_delete(net, actions, acts_deleted, extack);
+   ret = tcf_action_delete(net, actions, extack);
if (ret < 0) {
NL_SET_ERR_MSG(extack, "Failed to delete TC action");
kfree_skb(skb);
@@ -1249,8 +1250,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct 
nlmsghdr *n,
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
size_t attr_size = 0;
-   struct tc_action *actions[TCA_ACT_MAX_PRIO + 1] = {};
-   int acts_deleted = 0;
+   struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
 
ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
if (ret < 0)
@@ -1280,14 +1280,13 @@ tca_action_gd(struct net *net, struct nlattr *nla, 
struct nlmsghdr *n,
if (event == RTM_GETACTION)
ret = tcf_get_notify(net, portid, n, actions, event, extack);
else { /* delete */
-   ret = tcf_del_notify(net, n, actions, _deleted, portid,
-attr_size, extack);
+   ret = tcf_del_notify(net, n, actions, portid, attr_size, 
extack);
if (ret)
goto err;
-   return ret;
+   return 0;
}
 err:
-   tcf_action_put_many([acts_deleted]);
+   tcf_action_put_many(actions);
return ret;
 }
 
-- 
2.14.4



[Patch net 0/9] net_sched: pending clean up and bug fixes

2018-08-19 Thread Cong Wang
This patchset aims to clean up and fixes some bugs in current
merge window, this is why it is targeting -net.

Patch 1-5 are clean up Vlad's patches merged in current merge
window, patch 6 is just a trivial cleanup.

Patch 7 reverts a lockdep warning fix and patch 8 provides a better
fix for it.

Patch 9 fixes a potential deadlock found by me during code review.

Please see each patch for details.

Cc: Jamal Hadi Salim 
Signed-off-by: Cong Wang 

Cong Wang (9):
  net_sched: improve and refactor tcf_action_put_many()
  net_sched: remove unnecessary ops->delete()
  net_sched: remove unused parameter for tcf_action_delete()
  net_sched: remove unused tcf_idr_check()
  net_sched: remove list_head from tc_action
  net_sched: remove unused tcfa_capab
  Revert "net: sched: act_ife: disable bh when taking ife_mod_lock"
  act_ife: move tcfa_lock down to where necessary
  act_ife: fix a potential deadlock

 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c   |  6 +-
 .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c   | 10 +--
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c  |  5 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  |  6 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c| 19 +++--
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c |  3 +-
 .../net/ethernet/mellanox/mlxsw/spectrum_flower.c  |  6 +-
 drivers/net/ethernet/netronome/nfp/flower/action.c |  6 +-
 drivers/net/ethernet/qlogic/qede/qede_filter.c |  6 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c|  5 +-
 include/net/act_api.h  |  7 --
 include/net/pkt_cls.h  | 25 +++---
 net/dsa/slave.c|  4 +-
 net/sched/act_api.c| 70 ++--
 net/sched/act_bpf.c|  8 --
 net/sched/act_connmark.c   |  8 --
 net/sched/act_csum.c   |  8 --
 net/sched/act_gact.c   |  8 --
 net/sched/act_ife.c| 92 ++
 net/sched/act_ipt.c| 16 
 net/sched/act_mirred.c |  8 --
 net/sched/act_nat.c|  8 --
 net/sched/act_pedit.c  |  8 --
 net/sched/act_police.c |  8 --
 net/sched/act_sample.c |  8 --
 net/sched/act_simple.c |  8 --
 net/sched/act_skbedit.c|  8 --
 net/sched/act_skbmod.c |  8 --
 net/sched/act_tunnel_key.c |  8 --
 net/sched/act_vlan.c   |  8 --
 30 files changed, 108 insertions(+), 290 deletions(-)

-- 
2.14.4



[Patch net 5/9] net_sched: remove list_head from tc_action

2018-08-19 Thread Cong Wang
After commit 90b73b77d08e, list_head is no longer needed.
Now we just need to convert the list iteration to array
iteration for drivers.

Fixes: 90b73b77d08e ("net: sched: change action API to use array of pointers to 
actions")
Cc: Jiri Pirko 
Cc: Vlad Buslov 
Signed-off-by: Cong Wang 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c   |  6 ++
 .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c   | 10 -
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c  |  5 ++---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  |  6 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c| 19 
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c |  3 +--
 .../net/ethernet/mellanox/mlxsw/spectrum_flower.c  |  6 ++
 drivers/net/ethernet/netronome/nfp/flower/action.c |  6 ++
 drivers/net/ethernet/qlogic/qede/qede_filter.c |  6 ++
 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c|  5 ++---
 include/net/act_api.h  |  1 -
 include/net/pkt_cls.h  | 25 --
 net/dsa/slave.c|  4 +---
 net/sched/act_api.c|  1 -
 14 files changed, 43 insertions(+), 60 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 139d96c5a023..092c817f8f11 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -110,16 +110,14 @@ static int bnxt_tc_parse_actions(struct bnxt *bp,
 struct tcf_exts *tc_exts)
 {
const struct tc_action *tc_act;
-   LIST_HEAD(tc_actions);
-   int rc;
+   int i, rc;
 
if (!tcf_exts_has_actions(tc_exts)) {
netdev_info(bp->dev, "no actions");
return -EINVAL;
}
 
-   tcf_exts_to_list(tc_exts, _actions);
-   list_for_each_entry(tc_act, _actions, list) {
+   tcf_exts_for_each_action(i, tc_act, tc_exts) {
/* Drop action */
if (is_tcf_gact_shot(tc_act)) {
actions->flags |= BNXT_TC_ACTION_FLAG_DROP;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index 623f73dd7738..c116f96956fe 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -417,10 +417,9 @@ static void cxgb4_process_flow_actions(struct net_device 
*in,
   struct ch_filter_specification *fs)
 {
const struct tc_action *a;
-   LIST_HEAD(actions);
+   int i;
 
-   tcf_exts_to_list(cls->exts, );
-   list_for_each_entry(a, , list) {
+   tcf_exts_for_each_action(i, a, cls->exts) {
if (is_tcf_gact_ok(a)) {
fs->action = FILTER_PASS;
} else if (is_tcf_gact_shot(a)) {
@@ -591,10 +590,9 @@ static int cxgb4_validate_flow_actions(struct net_device 
*dev,
bool act_redir = false;
bool act_pedit = false;
bool act_vlan = false;
-   LIST_HEAD(actions);
+   int i;
 
-   tcf_exts_to_list(cls->exts, );
-   list_for_each_entry(a, , list) {
+   tcf_exts_for_each_action(i, a, cls->exts) {
if (is_tcf_gact_ok(a)) {
/* Do nothing */
} else if (is_tcf_gact_shot(a)) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index 18eb2aedd4cb..c7d2b4dc7568 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -93,14 +93,13 @@ static int fill_action_fields(struct adapter *adap,
unsigned int num_actions = 0;
const struct tc_action *a;
struct tcf_exts *exts;
-   LIST_HEAD(actions);
+   int i;
 
exts = cls->knode.exts;
if (!tcf_exts_has_actions(exts))
return -EINVAL;
 
-   tcf_exts_to_list(exts, );
-   list_for_each_entry(a, , list) {
+   tcf_exts_for_each_action(i, a, exts) {
/* Don't allow more than one action per rule. */
if (num_actions)
return -EINVAL;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 447098005490..af4c9ae7f432 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9171,14 +9171,12 @@ static int parse_tc_actions(struct ixgbe_adapter 
*adapter,
struct tcf_exts *exts, u64 *action, u8 *queue)
 {
const struct tc_action *a;
-   LIST_HEAD(actions);
+   int i;
 
if (!tcf_exts_has_actions(exts))
return -EINVAL;
 
-   tcf_exts_to_list(exts, );
-   list_for_each_entry(a, , list) {
-
+   

Re: [PATCH][net-next] vxlan: reduce dirty cache line in vxlan_find_mac

2018-08-19 Thread David Miller
From: Li RongQing 
Date: Sun, 19 Aug 2018 11:36:08 +0800

> vxlan_find_mac() unconditionally set f->used for every packet,
> this cause a cache miss for every packet, since remote, hlist
> and used of vxlan_fdb share the same cacheline.
> 
> With this change f->used is set only if not equal to jiffies
> This gives up to 5% speed-up with small packets.
> 
> Signed-off-by: Zhang Yu 
> Signed-off-by: Li RongQing 

Please resubmit this when the net-next tree opens back up.

Thanks.


Re: [PATCH net 1/4] qed: Wait for ready indication before rereading the shmem

2018-08-19 Thread David Miller
From: Tomer Tayar 
Date: Sun, 19 Aug 2018 20:58:04 +0300

> + while (!p_info->mfw_mb_length && cnt--) {
> + msleep(msec);
> + p_info->mfw_mb_length =
> + (u16)qed_rd(p_hwfn, p_ptt,
> + p_info->mfw_mb_addr +
> + offsetof(struct public_mfw_mb, sup_msgs));
> + }
> +
> + if (!cnt) {

Because you use postdecrement on 'cnt', the loop will timeout with
'cnt' equal to '-1' not zero.

You need to fix this.


Re: [PATCH 1/1] tap: RCU usage and comment fixes

2018-08-19 Thread David Miller
From: Wang Jian 
Date: Fri, 17 Aug 2018 08:22:53 +

> The tap_queue and the 'tap_dev' are loosely coupled, not 'macvlan_dev'.

There is another reference to macvlan_dev in that comment, which is therefore
also similarly inaccurate.  You should add an appropriate Fixes: line for
where this inaccuracy was introduced, which is:

Fixes: 6fe3faf86757 ("tap: Abstract type of virtual interface from tap 
implementation")

> Taking rcu_read_lock a little later seems can slightly reduce rcu read 
> critical section.

This is a separate change from fixing up a comment.


[PATCH net 2/4] qed: Wait for MCP halt and resume commands to take place

2018-08-19 Thread Tomer Tayar
Successive iterations of halting and resuming the management chip (MCP)
might fail, since currently the driver doesn't wait for these operations to
actually take place.
This patch prevents the driver from moving forward before the operations
are reflected in the state register.

Signed-off-by: Tomer Tayar 
Signed-off-by: Ariel Elior 
---
 drivers/net/ethernet/qlogic/qed/qed_mcp.c  | 46 +-
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h |  1 +
 2 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c 
b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 8861010..e33596a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -2109,31 +2109,61 @@ int qed_mcp_config_vf_msix(struct qed_hwfn *p_hwfn,
return rc;
 }
 
+/* A maximal 100 msec waiting time for the MCP to halt */
+#define QED_MCP_HALT_SLEEP_MS  10
+#define QED_MCP_HALT_MAX_RETRIES   10
+
 int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-   u32 resp = 0, param = 0;
+   u32 resp = 0, param = 0, cpu_state, cnt = 0;
int rc;
 
rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MCP_HALT, 0, ,
 );
-   if (rc)
+   if (rc) {
DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+   return rc;
+   }
 
-   return rc;
+   do {
+   msleep(QED_MCP_HALT_SLEEP_MS);
+   cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE);
+   if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED)
+   break;
+   } while (++cnt < QED_MCP_HALT_MAX_RETRIES);
+
+   if (cnt == QED_MCP_HALT_MAX_RETRIES) {
+   DP_NOTICE(p_hwfn,
+ "Failed to halt the MCP [CPU_MODE = 0x%08x, CPU_STATE 
= 0x%08x]\n",
+ qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE), cpu_state);
+   return -EBUSY;
+   }
+
+   return 0;
 }
 
+#define QED_MCP_RESUME_SLEEP_MS10
+
 int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-   u32 value, cpu_mode;
+   u32 cpu_mode, cpu_state;
 
qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_STATE, 0x);
 
-   value = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
-   value &= ~MCP_REG_CPU_MODE_SOFT_HALT;
-   qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, value);
cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+   cpu_mode &= ~MCP_REG_CPU_MODE_SOFT_HALT;
+   qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, cpu_mode);
+   msleep(QED_MCP_RESUME_SLEEP_MS);
+   cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE);
 
-   return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0;
+   if (cpu_state & MCP_REG_CPU_STATE_SOFT_HALTED) {
+   DP_NOTICE(p_hwfn,
+ "Failed to resume the MCP [CPU_MODE = 0x%08x, 
CPU_STATE = 0x%08x]\n",
+ cpu_mode, cpu_state);
+   return -EBUSY;
+   }
+
+   return 0;
 }
 
 int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h 
b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index d8ad2dc..2279965 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -562,6 +562,7 @@
0
 #define MCP_REG_CPU_STATE \
0xe05004UL
+#define MCP_REG_CPU_STATE_SOFT_HALTED  (0x1UL << 10)
 #define MCP_REG_CPU_EVENT_MASK \
0xe05008UL
 #define PGLUE_B_REG_PF_BAR0_SIZE \
-- 
1.8.3.1



[PATCH net 1/4] qed: Wait for ready indication before rereading the shmem

2018-08-19 Thread Tomer Tayar
The MFW might be reset and re-update its shared memory.
Upon the detection of such a reset the driver rereads this memory, but it
has to wait till the data is valid.
This patch adds the missing wait for a data ready indication.
 
Signed-off-by: Tomer Tayar 
Signed-off-by: Ariel Elior 
---
 drivers/net/ethernet/qlogic/qed/qed_mcp.c | 50 +--
 1 file changed, 41 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c 
b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index d89a0e2..8861010 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -183,18 +183,57 @@ int qed_mcp_free(struct qed_hwfn *p_hwfn)
return 0;
 }
 
+/* Maximum of 1 sec to wait for the SHMEM ready indication */
+#define QED_MCP_SHMEM_RDY_MAX_RETRIES  20
+#define QED_MCP_SHMEM_RDY_ITER_MS  50
+
 static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
struct qed_mcp_info *p_info = p_hwfn->mcp_info;
+   u8 cnt = QED_MCP_SHMEM_RDY_MAX_RETRIES;
+   u8 msec = QED_MCP_SHMEM_RDY_ITER_MS;
u32 drv_mb_offsize, mfw_mb_offsize;
u32 mcp_pf_id = MCP_PF_ID(p_hwfn);
 
p_info->public_base = qed_rd(p_hwfn, p_ptt, MISC_REG_SHARED_MEM_ADDR);
-   if (!p_info->public_base)
-   return 0;
+   if (!p_info->public_base) {
+   DP_NOTICE(p_hwfn,
+ "The address of the MCP scratch-pad is not 
configured\n");
+   return -EINVAL;
+   }
 
p_info->public_base |= GRCBASE_MCP;
 
+   /* Get the MFW MB address and number of supported messages */
+   mfw_mb_offsize = qed_rd(p_hwfn, p_ptt,
+   SECTION_OFFSIZE_ADDR(p_info->public_base,
+PUBLIC_MFW_MB));
+   p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id);
+   p_info->mfw_mb_length = (u16)qed_rd(p_hwfn, p_ptt,
+   p_info->mfw_mb_addr +
+   offsetof(struct public_mfw_mb,
+sup_msgs));
+
+   /* The driver can notify that there was an MCP reset, and might read the
+* SHMEM values before the MFW has completed initializing them.
+* To avoid this, the "sup_msgs" field in the MFW mailbox is used as a
+* data ready indication.
+*/
+   while (!p_info->mfw_mb_length && cnt--) {
+   msleep(msec);
+   p_info->mfw_mb_length =
+   (u16)qed_rd(p_hwfn, p_ptt,
+   p_info->mfw_mb_addr +
+   offsetof(struct public_mfw_mb, sup_msgs));
+   }
+
+   if (!cnt) {
+   DP_NOTICE(p_hwfn,
+ "Failed to get the SHMEM ready notification after %d 
msec\n",
+ QED_MCP_SHMEM_RDY_MAX_RETRIES * msec);
+   return -EBUSY;
+   }
+
/* Calculate the driver and MFW mailbox address */
drv_mb_offsize = qed_rd(p_hwfn, p_ptt,
SECTION_OFFSIZE_ADDR(p_info->public_base,
@@ -204,13 +243,6 @@ static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, 
struct qed_ptt *p_ptt)
   "drv_mb_offsiz = 0x%x, drv_mb_addr = 0x%x mcp_pf_id = 
0x%x\n",
   drv_mb_offsize, p_info->drv_mb_addr, mcp_pf_id);
 
-   /* Set the MFW MB address */
-   mfw_mb_offsize = qed_rd(p_hwfn, p_ptt,
-   SECTION_OFFSIZE_ADDR(p_info->public_base,
-PUBLIC_MFW_MB));
-   p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id);
-   p_info->mfw_mb_length = (u16)qed_rd(p_hwfn, p_ptt, p_info->mfw_mb_addr);
-
/* Get the current driver mailbox sequence before sending
 * the first command
 */
-- 
1.8.3.1



Re: [PATCH] net: lan743x_ptp: convert to ktime_get_clocktai_ts64

2018-08-19 Thread David Miller
From: Arnd Bergmann 
Date: Wed, 15 Aug 2018 19:49:49 +0200

> timekeeping_clocktai64() has been renamed to ktime_get_clocktai_ts64()
> for consistency with the other ktime_get_* access functions.
> 
> Rename the new caller that has come up as well.
> 
> Question: this is the only ptp driver that sets the hardware time
> to the current system time in TAI. Why does it do that?
> 
> Signed-off-by: Arnd Bergmann 

Deciding whether PTP drivers should set the hardware time at boot
to the current system time is a separate discussion from using
the new name for the timekeeping_clocktai64() interface, I'm applying
this.

Thanks Arnd.


[PATCH net 0/4] qed: Misc fixes in the interface with the MFW

2018-08-19 Thread Tomer Tayar
This patch series fixes several issues in the driver's interface with the
management FW (MFW).

Tomer Tayar (4):
  qed: Wait for ready indication before rereading the shmem
  qed: Wait for MCP halt and resume commands to take place
  qed: Prevent a possible deadlock during driver load and unload
  qed: Avoid sending mailbox commands when MFW is not responsive

 drivers/net/ethernet/qlogic/qed/qed_mcp.c  | 187 +
 drivers/net/ethernet/qlogic/qed/qed_mcp.h  |  27 ++--
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h |   2 +
 3 files changed, 178 insertions(+), 38 deletions(-)

-- 
1.8.3.1



[PATCH net 3/4] qed: Prevent a possible deadlock during driver load and unload

2018-08-19 Thread Tomer Tayar
The MFW manages an internal lock to prevent concurrent hardware
(de)initialization of different PFs.
This, together with the busy-waiting for the MFW's responses for commands,
might lead to a deadlock during concurrent load or unload of PFs.
This patch adds the option to sleep within the busy-waiting, and uses it
for the (un)load requests (which are not sent from an interrupt context) to
prevent the possible deadlock.

Signed-off-by: Tomer Tayar 
Signed-off-by: Ariel Elior 
---
 drivers/net/ethernet/qlogic/qed/qed_mcp.c | 43 ++-
 drivers/net/ethernet/qlogic/qed/qed_mcp.h | 21 +--
 2 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c 
b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index e33596a..d82c4de 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -48,7 +48,7 @@
 #include "qed_reg_addr.h"
 #include "qed_sriov.h"
 
-#define CHIP_MCP_RESP_ITER_US 10
+#define QED_MCP_RESP_ITER_US   10
 
 #define QED_DRV_MB_MAX_RETRIES (500 * 1000)/* Account for 5 sec */
 #define QED_MCP_RESET_RETRIES  (50 * 1000) /* Account for 500 msec */
@@ -317,7 +317,7 @@ static void qed_mcp_reread_offsets(struct qed_hwfn *p_hwfn,
 
 int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-   u32 org_mcp_reset_seq, seq, delay = CHIP_MCP_RESP_ITER_US, cnt = 0;
+   u32 org_mcp_reset_seq, seq, delay = QED_MCP_RESP_ITER_US, cnt = 0;
int rc = 0;
 
/* Ensure that only a single thread is accessing the mailbox */
@@ -449,10 +449,10 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn 
*p_hwfn,
 _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
   struct qed_ptt *p_ptt,
   struct qed_mcp_mb_params *p_mb_params,
-  u32 max_retries, u32 delay)
+  u32 max_retries, u32 usecs)
 {
+   u32 cnt = 0, msecs = DIV_ROUND_UP(usecs, 1000);
struct qed_mcp_cmd_elem *p_cmd_elem;
-   u32 cnt = 0;
u16 seq_num;
int rc = 0;
 
@@ -475,7 +475,11 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn 
*p_hwfn,
goto err;
 
spin_unlock_bh(_hwfn->mcp_info->cmd_lock);
-   udelay(delay);
+
+   if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP))
+   msleep(msecs);
+   else
+   udelay(usecs);
} while (++cnt < max_retries);
 
if (cnt >= max_retries) {
@@ -504,7 +508,11 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn 
*p_hwfn,
 * The spinlock stays locked until the list element is removed.
 */
 
-   udelay(delay);
+   if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP))
+   msleep(msecs);
+   else
+   udelay(usecs);
+
spin_lock_bh(_hwfn->mcp_info->cmd_lock);
 
if (p_cmd_elem->b_is_completed)
@@ -539,7 +547,7 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
   "MFW mailbox: response 0x%08x param 0x%08x [after %d.%03d 
ms]\n",
   p_mb_params->mcp_resp,
   p_mb_params->mcp_param,
-  (cnt * delay) / 1000, (cnt * delay) % 1000);
+  (cnt * usecs) / 1000, (cnt * usecs) % 1000);
 
/* Clear the sequence number from the MFW response */
p_mb_params->mcp_resp &= FW_MSG_CODE_MASK;
@@ -557,7 +565,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
 {
size_t union_data_size = sizeof(union drv_union_data);
u32 max_retries = QED_DRV_MB_MAX_RETRIES;
-   u32 delay = CHIP_MCP_RESP_ITER_US;
+   u32 usecs = QED_MCP_RESP_ITER_US;
 
/* MCP not initialized */
if (!qed_mcp_is_init(p_hwfn)) {
@@ -574,8 +582,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
return -EINVAL;
}
 
+   if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) {
+   max_retries = DIV_ROUND_UP(max_retries, 1000);
+   usecs *= 1000;
+   }
+
return _qed_mcp_cmd_and_union(p_hwfn, p_ptt, p_mb_params, max_retries,
- delay);
+ usecs);
 }
 
 int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
@@ -793,6 +806,7 @@ struct qed_load_req_out_params {
mb_params.data_src_size = sizeof(load_req);
mb_params.p_data_dst = _rsp;
mb_params.data_dst_size = sizeof(load_rsp);
+   mb_params.flags = QED_MB_FLAG_CAN_SLEEP;
 
DP_VERBOSE(p_hwfn, QED_MSG_SP,
   "Load Request: param 0x%08x [init_hw %d, drv_type %d, 
hsi_ver %d, pda 0x%04x]\n",
@@ -1014,7 +1028,8 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
 
 int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-   u32 wol_param, mcp_resp, mcp_param;
+   struct 

[PATCH net 4/4] qed: Avoid sending mailbox commands when MFW is not responsive

2018-08-19 Thread Tomer Tayar
Keep sending mailbox commands to the MFW when it is not responsive ends up
with a redundant amount of timeout expiries.
This patch prints the MCP status on the first command which is not
responded, and blocks the following commands.
Since the (un)load request commands might be not responded due to other
PFs, the patch also adds the option to skip the blocking upon a failure.

Signed-off-by: Tomer Tayar 
Signed-off-by: Ariel Elior 
---
 drivers/net/ethernet/qlogic/qed/qed_mcp.c  | 52 +-
 drivers/net/ethernet/qlogic/qed/qed_mcp.h  |  6 ++-
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h |  1 +
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c 
b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index d82c4de..a31a4b0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -320,6 +320,12 @@ int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt 
*p_ptt)
u32 org_mcp_reset_seq, seq, delay = QED_MCP_RESP_ITER_US, cnt = 0;
int rc = 0;
 
+   if (p_hwfn->mcp_info->b_block_cmd) {
+   DP_NOTICE(p_hwfn,
+ "The MFW is not responsive. Avoid sending MCP_RESET 
mailbox command.\n");
+   return -EBUSY;
+   }
+
/* Ensure that only a single thread is accessing the mailbox */
spin_lock_bh(_hwfn->mcp_info->cmd_lock);
 
@@ -445,6 +451,33 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn 
*p_hwfn,
   (p_mb_params->cmd | seq_num), p_mb_params->param);
 }
 
+static void qed_mcp_cmd_set_blocking(struct qed_hwfn *p_hwfn, bool block_cmd)
+{
+   p_hwfn->mcp_info->b_block_cmd = block_cmd;
+
+   DP_INFO(p_hwfn, "%s sending of mailbox commands to the MFW\n",
+   block_cmd ? "Block" : "Unblock");
+}
+
+static void qed_mcp_print_cpu_info(struct qed_hwfn *p_hwfn,
+  struct qed_ptt *p_ptt)
+{
+   u32 cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2;
+   u32 delay = QED_MCP_RESP_ITER_US;
+
+   cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+   cpu_state = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_STATE);
+   cpu_pc_0 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER);
+   udelay(delay);
+   cpu_pc_1 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER);
+   udelay(delay);
+   cpu_pc_2 = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_PROGRAM_COUNTER);
+
+   DP_NOTICE(p_hwfn,
+ "MCP CPU info: mode 0x%08x, state 0x%08x, pc {0x%08x, 0x%08x, 
0x%08x}\n",
+ cpu_mode, cpu_state, cpu_pc_0, cpu_pc_1, cpu_pc_2);
+}
+
 static int
 _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
   struct qed_ptt *p_ptt,
@@ -531,11 +564,15 @@ static void __qed_mcp_cmd_and_union(struct qed_hwfn 
*p_hwfn,
DP_NOTICE(p_hwfn,
  "The MFW failed to respond to command 0x%08x [param 
0x%08x].\n",
  p_mb_params->cmd, p_mb_params->param);
+   qed_mcp_print_cpu_info(p_hwfn, p_ptt);
 
spin_lock_bh(_hwfn->mcp_info->cmd_lock);
qed_mcp_cmd_del_elem(p_hwfn, p_cmd_elem);
spin_unlock_bh(_hwfn->mcp_info->cmd_lock);
 
+   if (!QED_MB_FLAGS_IS_SET(p_mb_params, AVOID_BLOCK))
+   qed_mcp_cmd_set_blocking(p_hwfn, true);
+
return -EAGAIN;
}
 
@@ -573,6 +610,13 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
return -EBUSY;
}
 
+   if (p_hwfn->mcp_info->b_block_cmd) {
+   DP_NOTICE(p_hwfn,
+ "The MFW is not responsive. Avoid sending mailbox 
command 0x%08x [param 0x%08x].\n",
+ p_mb_params->cmd, p_mb_params->param);
+   return -EBUSY;
+   }
+
if (p_mb_params->data_src_size > union_data_size ||
p_mb_params->data_dst_size > union_data_size) {
DP_ERR(p_hwfn,
@@ -806,7 +850,7 @@ struct qed_load_req_out_params {
mb_params.data_src_size = sizeof(load_req);
mb_params.p_data_dst = _rsp;
mb_params.data_dst_size = sizeof(load_rsp);
-   mb_params.flags = QED_MB_FLAG_CAN_SLEEP;
+   mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK;
 
DP_VERBOSE(p_hwfn, QED_MSG_SP,
   "Load Request: param 0x%08x [init_hw %d, drv_type %d, 
hsi_ver %d, pda 0x%04x]\n",
@@ -1050,7 +1094,7 @@ int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct 
qed_ptt *p_ptt)
memset(_params, 0, sizeof(mb_params));
mb_params.cmd = DRV_MSG_CODE_UNLOAD_REQ;
mb_params.param = wol_param;
-   mb_params.flags = QED_MB_FLAG_CAN_SLEEP;
+   mb_params.flags = QED_MB_FLAG_CAN_SLEEP | QED_MB_FLAG_AVOID_BLOCK;
 
return qed_mcp_cmd_and_union(p_hwfn, p_ptt, _params);
 }
@@ -2158,6 +2202,8 @@ int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt 
*p_ptt)
  

Re: [PATCH net-next] net: sched: always disable bh when taking tcf_lock

2018-08-19 Thread David Miller
From: Vlad Buslov 
Date: Tue, 14 Aug 2018 21:46:16 +0300

> Recently, ops->init() and ops->dump() of all actions were modified to
> always obtain tcf_lock when accessing private action state. Actions that
> don't depend on tcf_lock for synchronization with their data path use
> non-bh locking API. However, tcf_lock is also used to protect rate
> estimator stats in softirq context by timer callback.
> 
> Change ops->init() and ops->dump() of all actions to disable bh when using
> tcf_lock to prevent deadlock reported by following lockdep warning:
 ...
> Taking tcf_lock in sample action with bh disabled causes lockdep to issue a
> warning regarding possible irq lock inversion dependency between tcf_lock,
> and psample_groups_lock that is taken when holding tcf_lock in sample init:
 ...
> In order to prevent potential lock inversion dependency between tcf_lock
> and psample_groups_lock, extract call to psample_group_get() from tcf_lock
> protected section in sample action init function.
> 
> Fixes: 4e232818bd32 ("net: sched: act_mirred: remove dependency on rtnl lock")
> Fixes: 764e9a24480f ("net: sched: act_vlan: remove dependency on rtnl lock")
> Fixes: 729e01260989 ("net: sched: act_tunnel_key: remove dependency on rtnl 
> lock")
> Fixes: d77284956656 ("net: sched: act_sample: remove dependency on rtnl lock")
> Fixes: e8917f437006 ("net: sched: act_gact: remove dependency on rtnl lock")
> Fixes: b6a2b971c0b0 ("net: sched: act_csum: remove dependency on rtnl lock")
> Fixes: 2142236b4584 ("net: sched: act_bpf: remove dependency on rtnl lock")
> Signed-off-by: Vlad Buslov 

Applied, thanks Vlad.


Re: how to (cross)connect two (physical) eth ports for ping test?

2018-08-19 Thread Roman Mashak
"Robert P. J. Day"  writes:

>   (i'm sure this has been explained many times before, so a link
> covering this will almost certainly do just fine.)
>
>   i want to loop one physical ethernet port into another, and just
> ping the daylights from one to the other for stress testing. my fedora
> laptop doesn't actually have two unused ethernet ports, so i just want
> to emulate this by slapping a couple startech USB/net adapters into
> two empty USB ports, setting this up, then doing it all over again
> monday morning on the actual target system, which does have multiple
> ethernet ports.

[...]

I used this in the past to test dual-port NIC over loopback cable, you
will need to ajust the script:

#!/bin/bash -x

ip="sudo $HOME/bin/ip"
eth1=192.168.2.100
eth2=192.168.2.101

dev1=eth1
dev2=eth2
dev1mac=00:1b:21:9b:24:b4
dev2mac=00:1b:21:9b:24:b5

# fake client interfaces and addresses
dev=dummy0
dev_mac=00:00:00:00:00:11

# max fake clients supported for simulation
maxusers=3

## Create dummy device
## Accepted parameters:
##$1 - devname
##$2 - devmac
##$3 - subnet (e.g. 10.10.10)
##$4 - max number of IP addresses to create on interface
setup_dummy()
{
#   sudo sh -c "echo 1 > /proc/sys/net/ipv4/ip_forward"
   # Enable tc hardware offload
#   ethtool -K $SGW_DEV hw-tc-offload on

   $ip link add $1 address $2 type dummy
   $ip link set $1 up
   for i in `seq 1 $4`;
   do
  $ip addr add $3.$i/32 dev $1
   done
}

## Delete dummy device
## Accepted parameters:
##$1 - devname
delete_dummy()
{
  $ip link del $1 type dummy
}

setup_network()
{
  # Send traffic eth3 <-> eth4 over loopback cable, where both interfaces
  # eth3 and eth4 are in the same subnet.
  #
  # We assume that NetworkManager is not running and eth3/eth4 are configured
  # via /etc/network/interfaces:
  #
  # 192.168.1.100/32 dev eth3
  # 192.168.1.101/32 dev eth4
  #
  # Specify source IP address when sending the traffic:
  # ping -I 192.168.1.100 192.168.1.101
  #
  #
  $ip neigh add $eth2 lladdr $dev2mac nud permanent dev $dev1
  $ip neigh add $eth1 lladdr $dev1mac nud permanent dev $dev2
  $ip route add table main $eth1 dev $dev2
  $ip route add table main $eth2 dev $dev1
  $ip rule add from all lookup local pref 100
  $ip rule del pref 0
  $ip rule add from $eth2 to $eth1 iif $dev1 lookup local pref 1
  $ip rule add from $eth1 to $eth2 iif $dev2 lookup local pref 2
  $ip rule add from $eth2 to $eth1 lookup main pref 3
  $ip rule add from $eth1 to $eth2 lookup main pref 4

#  $ip rule add from 10.10.10.0/24 to $eth1 iif $dev1 lookup local pref 5
#  $ip rule add from 10.10.10.0/24 to $eth2 iif $dev2 lookup local pref 6
#  $ip rule add from $eth1 to 10.10.10.0/24 iif $dev2 lookup local pref 7
#  $ip rule add from $eth2 to 10.10.10.0/24 iif $dev1 lookup local pref 8
}

restore_network()
{
  # FIX: hangs connections
  $ip rule flush
  $ip rule add priority 32767 lookup default
}

#delete_dummy dummy0
#delete_dummy dummy1

#setup_dummy dummy0 00:00:00:00:00:11 10.10.10 3
#setup_dummy dummy1 00:00:00:00:00:22 20.20.20 3
setup_network



Re: [RFC 0/1] Appletalk AARP probe broken by receipt of own broadcasts.

2018-08-19 Thread Andrew Lunn
> I run inside Virtualbox with the Realtek PCIe GBE Family Controller.
> 
> Assuming I'm reading /sys/class/net/enp0s3/driver correctly, it's using the
> e1000 driver.

Hi Craig

Ah. And how do you connect to the network? Please run some tcpdumps
and collect packets at various points. Make sure your network setup is
not duplicating packets, in particular, any bridges you might have in
order to connect the segments together.

> However, it might not be the ethernet driver's fault. I've been a bit loose
> with terminology. Appletalk AARP probe packets aren't ethernet broadcasts as
> such; they're multicast packets, via the psnap driver, to hardware address
> 09:00:07:ff:ff:ff.

Basically, the same question applies for Multicast as for Broadcast.
I'm pretty sure the interface should not receiver the packet it
transmitted itself. But if something on the network has duplicated the
packet, it will receiver the duplicate. So before we add a filter,
lets understand where the packets are coming from.

Andrew


Cześć słodka!!!

2018-08-19 Thread Wesley
Jak się dzisiaj czujesz, mam nadzieję, że wszystko jest w porządku, cieszę się, 
że mogę się z tobą spotkać. W każdym razie jestem Wesley ze Stanów 
Zjednoczonych Ameryki, przebywa obecnie w Syrii na misję pokojową. Chcę cię 
lepiej poznać, jeśli mogę być odważny. Uważam się za łatwego człowieka, a 
obecnie szukam związku, w którym czuję się kochany. Proszę wybaczyć moje 
maniery, nie jestem dobry, jeśli chodzi o Internet, ponieważ to nie jest moja 
dziedzina. W Syrii nie wolno nam wychodzić, co sprawia, że jest dla mnie bardzo 
znudzony, więc myślę, że potrzebuję przyjaciela do rozmowy z trzymaj mnie!


Re: how to (cross)connect two (physical) eth ports for ping test?

2018-08-19 Thread Robert P. J. Day
On Sat, 18 Aug 2018, Willy Tarreau wrote:

> On Sat, Aug 18, 2018 at 09:10:25PM +0200, Andrew Lunn wrote:
> > On Sat, Aug 18, 2018 at 01:39:50PM -0400, Robert P. J. Day wrote:
> > >
> > >   (i'm sure this has been explained many times before, so a link
> > > covering this will almost certainly do just fine.)
> > >
> > >   i want to loop one physical ethernet port into another, and just
> > > ping the daylights from one to the other for stress testing. my fedora
> > > laptop doesn't actually have two unused ethernet ports, so i just want
> > > to emulate this by slapping a couple startech USB/net adapters into
> > > two empty USB ports, setting this up, then doing it all over again
> > > monday morning on the actual target system, which does have multiple
> > > ethernet ports.
> > >
> > >   so if someone can point me to the recipe, that would be great and
> > > you can stop reading.
> > >
> > >   as far as my tentative solution goes, i assume i need to put at
> > > least one of the physical ports in a network namespace via "ip netns",
> > > then ping from the netns to the root namespace. or, going one step
> > > further, perhaps putting both interfaces into two new namespaces, and
> > > setting up forwarding.
> >
> > Namespaces is a good solution. Something like this should work:
> >
> > ip netns add namespace1
> > ip netns add namespace2
> >
> > ip link set eth1 netns namespace1
> > ip link set eth2 netns namespace2
> >
> > ip netns exec namespace1 \
> > ip addr add 10.42.42.42/24 dev eth1
> >
> > ip netns exec namespace1 \
> > ip link set eth1 up
> >
> > ip netns exec namespace2 \
> > ip addr add 10.42.42.24/24 dev eth2
> >
> > ip netns exec namespace2 \
> > ip link set eth2 up
> >
> > ip netns exec namespace1 \
> > ping 10.42.42.24
> >
> > You might also want to consider iperf3 for stress testing, depending
> > on the sort of stress you need.
>
> FWIW I have a setup somewhere involving ip rule + ip route which
> achieves the same without involving namespaces. It's a bit hackish
> but sometimes convenient. I can dig if someone is interested.

  sure, i'm interested ... always educational to see different
solutions.

rday

-- 


Robert P. J. Day Ottawa, Ontario, CANADA
  http://crashcourse.ca/dokuwiki

Twitter:   http://twitter.com/rpjday
LinkedIn:   http://ca.linkedin.com/in/rpjday